MobileRead Forums - View Single Post - Detect red underline spelling mistakes with a regex?

lomkiri · 03-01-2026, 12:05 PM

I made the same function, but for all text files at once (or any number of files : current file, or selected files, etc.).
Same search string : <body[^>]*>(.+)</body>
Put the cursor at the top of a file, preferably at the top of the first one to be scanned.

Code:

def replace(match, number, file_name, metadata, dictionaries, data, functions, *args, **kwargs):
    """
    Count the number of errors using dictionary(), for one file.
    Use "replace all" with "dot all"
    search string is <body[^>]*>(.+)</body>
    """

    import regex
    lowered = False # True or False
    
    # First passage
    if not data:
        replace.call_after_last_match = True
        data["regex"] = regex.compile(r"(?:<[^>]+>)(*SKIP)(*FAIL)|\b\w+\b")
        data["files"]= {}
        data["total_err"] = 0
        
    # Last passage
    if not match:
        if data["total_err"]:
            print(f'"Lower words before check" is {str(lowered)}')
            print(f"{len(data['files'])} files scanned, {data['total_err']} errors in it")
            print("==============================")
            for el in data["files"]:
                res = data["files"][el]
                print("\n", f"file {el}: , {res[0]} error(s), {res[1]} words")
                if res[0]:
                    print(res[2])  
        return 

    # Normal passage
    nberr = 0
    errors = set()
    words = data["regex"].findall(match[0])
    for word in words:
        word = word.lower() if lowered else word
        if not dictionaries.recognized(word):            
            nberr += 1
            errors.add(word)
    data["files"].setdefault(file_name, (nberr, len(words), errors))
    data["total_err"] += nberr
            
    return match[0]

Edit : Fixed a bug, now it may be applied on any number of files.