ながーい文章を書く悪癖があるのでチェックする

C104の原稿をoverleafで書きながら、どうも読点の間隔が広すぎるなぁと思い、句読点で分割してセンテンスの長さをチェックするPythonをちょろっと書いた。TeXの書式を全て知ってるわけではないので、適当にパースしてるが、まあ使えるので放流。

#!/usr/bin/env python3.11

import argparse
import re
import sys

def main() -> None:
    # set up the argument parser
    parser = argparse.ArgumentParser(description = "Check the length of document in TeX format")
    parser.add_argument("file_path",
        help = "Path of a TeX file to be checked")
    parser.add_argument("--length", "-l", default = 30, help = "Number of characters to check")

    args = parser.parse_args()

    p = re.compile(r'\\[a-z]+\{[^\}]+\}')
    
    content = []
    
    with open(args.file_path, "r") as f:
        opened = 0
        for ln in f.readlines():
            stripped = ln.strip()
            if stripped[0:6] == "\\begin":
                opened += 1
                continue
            if opened >= 1:
                if stripped[0:4] == "\\end":
                    opened -= 1
                continue
            if stripped[0:1] == "\\":
                continue
            if len(stripped) == 0:
                continue
            replaced = p.sub("", stripped)
            content.append(replaced)
    
    if len(content) >= 1:
        ln_no = 1
        for ln in content:
            splited = re.split("[。、]", ln)
            pr_no = 1
            for part in splited:
                part_ln = len(part)
                if part_ln >= int(args.length):
                    print(f'TOO LONG in line #{ln_no}, part #{pr_no}: {part_ln}, "{part}"')
                pr_no += 1
            ln_no += 1    

if __name__ == "__main__":
    main()