#!/usr/bin/env python
# -*- coding: utf-8 -*-

# plugin: ForeignWords
# author: Maciej Haudek
# file: essential.py (essential functions)
# Copyright (c) 2024-2025 Maciej Haudek


from __future__ import unicode_literals, division, absolute_import, print_function

import os, inspect, re

import sys

import locale

import xml.etree.ElementTree as ET

try:
    from sigil_bs4 import BeautifulSoup, Tag
except Exception:
    from bs4 import BeautifulSoup, Tag


def GetDictPath(dict_file="default"):
    # get plugin directory path
    plugin_path = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
    # get rid of the last two directories
    tmp_path = os.path.dirname(os.path.dirname(plugin_path))
    # add the dictionary path
    dictionary_path = os.path.join(tmp_path, 'user_dictionaries', dict_file)
    return(dictionary_path)


def read_words(file_path):
    if not os.path.exists(file_path):
        print(f"Warning:\nFile '{file_path}' not found.\nReturning an empty list.")
        return ""
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            return set(line.strip() for line in file)
    except Exception as e:
        print(f"Error reading file '{file_path}': {e}")
        return ""


def find_expressions(text, word_list, combine_words):
    soup = BeautifulSoup(text, 'html.parser')

    expressions = []

    for element in soup.body.find_all(text=True):
        words_temp = element.strip().split()
        chars_to_remove = "“”„‘’‹›«»‚’\".;!?…,\(\)\[\]"
        cleaned_words = [word.strip(chars_to_remove) for word in words_temp]
        i = 0
        while i < len(cleaned_words):
            #print("--- main word before: " + cleaned_words[i])
            if cleaned_words[i] in word_list:
                #print("--- main word before jest na liście: " + str(word_list))
                if combine_words:
                    #print("--- main word: " + cleaned_words[i])
                    # Phrase with original words
                    expression = [words_temp[i]]
                    # PREV
                    j = i - 1
                    #print("prev word: " + cleaned_words[j])
                    while j >= 0 and cleaned_words[j] in word_list:
                        expression.insert(0, words_temp[j])
                        #print("Jest!")
                        j -= 1
                    # NEXT
                    j = i + 1
                    #print("next word: " + cleaned_words[j])
                    while j < len(cleaned_words) and cleaned_words[j] in word_list:
                        expression.append(words_temp[j])
                        #print("Jest!")
                        j += 1

                    # Original phrase (without remove chars)
                    expressions.append(' '.join(expression))
                    i = j
                else:
                    # Single words (not phrases!)
                    expressions.append(words_temp[i])
                    i += 1
            else:
                i += 1

    return expressions



def mark_expressions(text, expressions, lang_search=None, use_class=False, class_name="", ignore_class=False, use_lang=False, use_xml_lang=False, lang="en", combine_words=False):
    #print(f"ME z parametrami: lang_search={lang_search}, use_class={use_class} class_name={class_name} ignore_class={ignore_class} use_lang={use_lang} use_xml_lang={use_xml_lang} lang={lang} combine_words={combine_words}")
    expressions = clean_expressions(expressions)
    soup = BeautifulSoup(text, "html.parser")

    # Jeśli opcja combine_words włączona, sortujemy wyrażenia od najdłuższego do najkrótszego
    if combine_words:
        expressions.sort(key=len, reverse=True)

    # Funkcja pomocnicza do stworzenia tagu <span> z odpowiednimi atrybutami
    def create_span(expr):
        span = soup.new_tag("span")
        span.string = expr
        if use_class and not ignore_class:
            span['class'] = class_name
        if use_lang:
            span['lang'] = lang
        if use_xml_lang:
            span['xml:lang'] = lang
        return span

    # Pobieramy wszystkie węzły tekstowe, które nie są wewnątrz tagu <span>
    text_nodes = []
    for node in soup.find_all(string=True):
        if node.parent.name != "span":
            text_nodes.append(node)

    # Dla każdego węzła tekstowego wykonujemy zamianę wszystkich wyrażeń
    for node in text_nodes:
        new_text = node
        # Iterujemy po wyrażeniach - im dłuższe, tym wcześniej
        for expr in expressions:
            # Używamy \b aby wykrywać całe słowa; jeśli wyrażenie zawiera spację, będzie traktowane jako fraza
            pattern = re.compile(rf'\b{re.escape(expr)}\b')
            if pattern.search(new_text):
                # Zastępujemy znalezione wyrażenie unikalnym placeholderem
                placeholder = f"__PLACEHOLDER_{hash(expr)}__"
                new_text = pattern.sub(placeholder, new_text)
        # Jeśli dokonano jakiejkolwiek zamiany, dzielimy tekst według placeholderów
        if new_text != node:
            # Rozdzielamy tekst według placeholderów, aby odtworzyć strukturę z tagami
            fragments = []
            # Wzorzec pasujący do placeholderów
            ph_pattern = re.compile(r'(__PLACEHOLDER_[\-\d]+__)')
            parts = ph_pattern.split(new_text)
            for part in parts:
                # Jeśli część jest placeholderem, zamieniamy na odpowiadający tag
                m = ph_pattern.fullmatch(part)
                if m:
                    # Odszukujemy które wyrażenie pasuje do danego hasha
                    for expr in expressions:
                        if hash(expr) == int(part.replace("__PLACEHOLDER_", "").replace("__", "")):
                            fragments.append(create_span(expr))
                            break
                else:
                    if part:
                        fragments.append(part)
            # New structure
            new_fragment = BeautifulSoup("", "html.parser")
            for frag in fragments:
                if isinstance(frag, str):
                    new_fragment.append(frag)
                else:
                    new_fragment.append(frag)
            node.replace_with(new_fragment)

    # Update spans
    for span in soup.find_all("span"):
        #lang_work_lang = False
        #lang_work_xml_lang = False
        #if 'lang' in span.attrs:
        #    lang_work_lang = True
        #if 'xml:lang' in span.attrs:
        #    print(f"Znaleziono 'xml:lang': {span.attrs['xml:lang']}")
        #    lang_work = True
        #print("lang_search:")
        #print(lang_search)
        #print("span-lang:")
        #print(span.get("lang"))
        #print("span-xml-lang:")
        #print(span.get("xml:lang"))
        #print("lang:")
        #print(lang)

        if lang_search:  # czyli tu szukamy w pliku
            if span.get("lang") == lang_search or span.get("xml:lang") == lang_search:
                #print("szukany lang:")
                #print(span.string)
                if not use_class and 'class' in span.attrs and not ignore_class:
                    del span.attrs['class']
                if not use_lang and 'lang' in span.attrs:
                    del span.attrs['lang']
                if not use_xml_lang and 'xml:lang' in span.attrs:
                    del span.attrs['xml:lang']
                if use_class and not ignore_class:
                    span['class'] = class_name
                if use_lang:
                    span['lang'] = lang
                if use_xml_lang:
                    span['xml:lang'] = lang
                # QT_TRANSLATE_NOOP
                print(f"Updated span: {span.string}")
        else:   # Korzystamy ze słownika użytkownika
            if not use_class and 'class' in span.attrs and not ignore_class:
                del span.attrs['class']
            if not use_lang and 'lang' in span.attrs:
                del span.attrs['lang']
            if not use_xml_lang and 'xml:lang' in span.attrs:
                del span.attrs['xml:lang']
            if use_class and not ignore_class:
                span['class'] = class_name
            if use_lang:
                span['lang'] = lang
            if use_xml_lang:
                span['xml:lang'] = lang
            # QT_TRANSLATE_NOOP
            print(f"Updated span: {span.string}")

    return str(soup)


def merge_spans(text):
    span_pattern = re.compile(
        r'(<span([^>]+)>)([^<]*)</span>(\s*)(<span\2>)([^<]*)</span>'
    )

    while True:
        new_text = span_pattern.sub(lambda m: f'{m.group(1)}{m.group(3)}{m.group(4)}{m.group(6)}</span>', text)
        if new_text == text:  # No changes -> end
            break
        text = new_text
    #print(f"INFO: Merged span: {text}")
    return text


def clean_expressions(expressions, chars_to_remove="“”„‘’‹›«»‚’\".;!?…,\(\)\[\]"):
    """
    Iteratively cleans the first and last characters of each expression in the list
    if they match chars_to_remove.

    Parameters:
        expressions (list of str): List of phrases to clean.
        chars_to_remove (str): String containing characters to remove from the start and end of each phrase.

    Returns:
        list of str: List of fully cleaned expressions.
    """
    cleaned_expressions = []
    for expression in expressions:
        while expression and expression[0] in chars_to_remove:
            expression = expression[1:]  # Usuń pierwszy znak
        while expression and expression[-1] in chars_to_remove:
            expression = expression[:-1]  # Usuń ostatni znak
        cleaned_expressions.append(expression)
    return cleaned_expressions


def main():
    print("I reached main when I should not have\n")
    return -1


if __name__ == "__main__":
    sys.exit(main())
