C104の原稿をoverleafで書きながら、どうも読点の間隔が広すぎるなぁと思い、句読点で分割してセンテンスの長さをチェックするPythonをちょろっと書いた。TeXの書式を全て知ってるわけではないので、適当にパースしてるが、まあ使えるので放流。
#!/usr/bin/env python3.11
import argparse
import re
import sys
def main() -> None:
# set up the argument parser
parser = argparse.ArgumentParser(description = "Check the length of document in TeX format")
parser.add_argument("file_path",
help = "Path of a TeX file to be checked")
parser.add_argument("--length", "-l", default = 30, help = "Number of characters to check")
args = parser.parse_args()
p = re.compile(r'\\[a-z]+\{[^\}]+\}')
content = []
with open(args.file_path, "r") as f:
opened = 0
for ln in f.readlines():
stripped = ln.strip()
if stripped[0:6] == "\\begin":
opened += 1
continue
if opened >= 1:
if stripped[0:4] == "\\end":
opened -= 1
continue
if stripped[0:1] == "\\":
continue
if len(stripped) == 0:
continue
replaced = p.sub("", stripped)
content.append(replaced)
if len(content) >= 1:
ln_no = 1
for ln in content:
splited = re.split("[。、]", ln)
pr_no = 1
for part in splited:
part_ln = len(part)
if part_ln >= int(args.length):
print(f'TOO LONG in line #{ln_no}, part #{pr_no}: {part_ln}, "{part}"')
pr_no += 1
ln_no += 1
if __name__ == "__main__":
main()