#!/usr/bin/env python
 # -*- coding: utf-8 -*-
import sys, os, re, regex
from xml.sax.saxutils import escape
from tkinter import Tk, BOTH, StringVar, IntVar, BooleanVar
from tkinter.ttk import Frame, Button, Style, Label, Entry, Radiobutton, Checkbutton
iswindows = sys.platform.startswith('win')

try:
    from sigil_bs4 import BeautifulSoup
except:
    from bs4 import BeautifulSoup

# code provided by KevinH
def generate_line_offsets(s):
    ''' returns the line offset '''
    offlst = [0]
    i = s.find('\n', 0)
    while i >= 0:
        offlst.append(i)
        i = s.find('\n', i + 1)
    return offlst

# code provided by KevinH
def charoffset(line, col, offlst):
    ''' returns the character offset '''
    coffset = None
    if iswindows:
        coffset = offlst[line-1]  + 2 + (col - 1) - line
    else:
        coffset = offlst[line-1]  + 1 + (col - 1)
    if line == 1:
        coffset -= 1
    return coffset

# tkinter dialog box class
class Dialog(Frame):
    ''' displays the GUI '''
    global parameters
    parameters = {}

    def savevalues(self):
        # save dialog box values in parameters dictionary
        parameters['engine'] = self.engineValue.get()
        parameters['regex'] = self.regexValue.get()
        parameters['DontSearchTags'] = self.DontSearchTags.get()
        self.master.destroy()

    def __init__(self, parent):
        # display the dialog box
        Frame.__init__(self, parent)
        self.parent = parent
        self.initUI()

    def initUI(self):
        # define general dialog box properties
        self.parent.title("Regex Tester")
        self.pack(fill=BOTH, expand=1)

        # define regex engine radio buttons
        engineLabel = Label(self, text="Engine: ")
        engineLabel.place(x=20, y=10)
        self.engineValue = StringVar(None)
        reRadiobutton = Radiobutton(self, text="re", value="re", variable=self.engineValue)
        reRadiobutton.invoke()
        regexRadiobutton = Radiobutton(self, text="regex", value="regex", variable=self.engineValue)
        reRadiobutton.place(x=70, y=10)
        regexRadiobutton.place(x=110, y=10)

        # define Don't search tags check box
        self.DontSearchTags = BooleanVar(None)
        self.DontSearchTags.set(False)
        dstCheckbutton = Checkbutton(self, text="Don't search tags.", variable=self.DontSearchTags)
        dstCheckbutton.place(x=20, y=35)

        # define regex text box
        regexLabel = Label(self, text="Regex: ")
        regexLabel.place(x=20, y=60)
        self.regexValue = StringVar(None)
        regexEntry = Entry(self, textvariable=self.regexValue)
        regexEntry.place(x=60, y=60, width=170)

        # define Find and Cancel buttons
        cancelButton = Button(self, text="Cancel", command=self.quit)
        cancelButton.place(x=155, y=90)
        findButton = Button(self, text="Find", command=self.savevalues)
        findButton.place(x=20, y=90)

def run(bk):
    ''' main routine '''

    # set Tk parameters for dialog box
    root = Tk()
    root.geometry("250x130+300+300")
    app = Dialog(root)
    root.mainloop()

    # check for empty search strings
    if parameters and parameters['regex']:

        # get all selected xhtml files
        selected_files = []
        for file_name in list(bk.selected_iter()):
            if bk.id_to_mime(file_name[1]) == 'application/xhtml+xml':
                selected_files.append((file_name[1], bk.id_to_href(file_name[1])))

        if selected_files != []:
            # process only selected files
            file_list = selected_files
        else:
            # on files selected, process ALL files
            file_list = list(bk.text_iter())

        # process file list
        for (html_id, href) in file_list:
            html = bk.readfile(html_id)
            offlst = generate_line_offsets(html)
            filename = os.path.basename(href)

            # search each line
            lines = html.splitlines()
            linenumber = 0

            for index, line in enumerate(lines):
                if parameters['DontSearchTags']:
                    # strip html ltags from line
                    orig_line = line
                    line = BeautifulSoup(line).text

                # select engine
                if parameters['engine'] == 're':
                    matches = re.finditer(parameters['regex'], line)
                else:
                    matches = regex.finditer(parameters['regex'], line)

                # check for matches
                linenumber = index + 1
                for match in matches:
                    message = escape(match.group(0)).replace('"', "&quot;")

                    if parameters['DontSearchTags']:
                        colnumber = orig_line.find(match.group(0), match.start()) +1
                    else:
                        colnumber = match.start() + 1
                    coffset = charoffset(int(linenumber), int(colnumber), offlst)

                    # sigil 1.x requires file paths
                    if  bk.launcher_version() >= 20190927:
                        filename = bk.id_to_bookpath(html_id)

                    # add message to validation pane
                    if bk.launcher_version() >= 20160909:
                        bk.add_extended_result('info', escape(filename), linenumber, coffset, 'Col: ' + str(colnumber) + ' ' + message)
                    else:
                        bk.add_result('info', escape(filename), linenumber, 'Col: ' + str(colnumber) + ' ' + message)

    return 0


def main():
    print('I reached main when I should not have\n')
    return -1

if __name__ == "__main__":
    sys.exit(main())
