#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab

# Copyright 2025 Maciej Haudek


import sys, os, re, regex, os.path
iswindows = sys.platform.startswith('win')

import xml.etree.ElementTree as ET

from xml.sax.saxutils import escape


def get_current_version(bk):
    _version_pattern = re.compile(r'<version>([^<]*)</version>')
    _installed_version = None

    ppath = os.path.join(bk._w.plugin_dir, bk._w.plugin_name, "plugin.xml")
    with open(ppath,'r') as f:
        data = f.read()
        m = _version_pattern.search(data)
        if m:
            _installed_version = m.group(1).strip()
    return _installed_version


# code provided by KevinH
def generate_line_offsets(s):
    offlst = [0]
    i = s.find('\n', 0)
    while i >= 0:
        offlst.append(i)
        i = s.find('\n', i + 1)
    return offlst


# code provided by KevinH
def charoffset(line, col, offlst):
    coffset = None
    if iswindows:
        if offlst == [0]:
            return 999
        coffset = offlst[line-1]  + 2 + (col - 1) - line
    else:
        coffset = offlst[line-1]  + 1 + (col - 1)
    if line == 1:
        coffset -= 1
    return coffset


def run(bk):

    if bk.launcher_version() < 20210430:
        print("Error: FindForeignWords requires a newer version of Sigil >= 1.60")
        return 1

    # Plugin version
    _current_version = get_current_version(bk)
    print('\nLaunching the FindForeignWords plugin (version: ' + _current_version + ')\n\n')

    # Clean Validation Results
    # bk.add_result(None, None, None, None)

    linenumber = None
    colnumber = None
    coffset = None

    temp_dir = bk._w.ebook_root
    opf_path = os.path.join(temp_dir, bk.get_opfbookpath())
    opf_path_short = bk.get_opfbookpath()
    opf_name = os.path.basename(opf_path)

    tree = ET.parse(opf_path)
    root = tree.getroot()

    # get all selected xhtml files
    selected_files = []
    for file_name in list(bk.selected_iter()):
        if bk.id_to_mime(file_name[1]) == 'application/xhtml+xml':
            selected_files.append((file_name[1], bk.id_to_href(file_name[1])))

    if selected_files != []:
        # process only selected files
        file_list = selected_files
    else:
        # on files selected, process ALL files
        file_list = list(bk.text_iter())

    # process file list
    for (html_id, href) in file_list:
        html = bk.readfile(html_id)
        offlst = generate_line_offsets(html)
        filename = os.path.basename(href)

        print('Processing ' + filename + '...')
        filename = bk.id_to_bookpath(html_id)

        # search each line
        lines = html.splitlines()
        linenumber = 0
        matchcount = 0
        for index, line in enumerate(lines):

            lang_regex = r"(<span(?:\s+[^>]*\bclass=\"[^\"]*\")?(?:\s+[^>]*\bxml:lang=\"[^\"]*\")?\s+[^>]*\blang=\"([^\"]*)\"[^>]*>)(.*?)</span>"

            matches = re.finditer(lang_regex, line)

            linenumber = index + 1
            for match in matches:
                message = escape(match.group(2)) + ": " + escape(match.group(3).replace('"','`'))   #.replace('&','&amp;').replace('<','&lt;').replace('>','&gt;'))
                add_offset = len(match.group(1))
                colnumber = match.start() + 1 + add_offset
                coffset = charoffset(int(linenumber), int(colnumber), offlst)
                bk.add_extended_result("info", escape(filename), linenumber, coffset, message)

    return 0

def main():
    print('I reached main when I should not have\n')
    return -1

if __name__ == "__main__":
    sys.exit(main())
