#!/usr/bin/env python
# -*- coding: utf-8 -*-
import sys, os, shutil, tempfile, webbrowser, re, json
from os.path import expanduser
from subprocess import Popen, PIPE
from xml.sax.saxutils import escape
from epub_utils import epub_zip_up_book_contents
from compatibility_utils import iswindows
from sigil_bs4 import BeautifulSoup
from cficonvert import cfi_convert
islinux = sys.platform.startswith('linux')

# from KevinH's Access-Aide plugin
_epubtype_aria_map = {
    "abstract"        : "doc-abstract",
    "acknowledgments" : "doc-acknowledgments",
    "afterword"       : "doc-afterword",
    "appendix"        : "doc-appendix",
    "biblioentry"     : "doc-biblioentry",
    "bibliography"    : "doc-bibliography",
    "biblioref"       : "doc-biblioref",
    "chapter"         : "doc-chapter",
    "colophon"        : "doc-colophon",
    "conclusion"      : "doc-conclusion",
    "cover-image"     : "doc-cover",
    "credit"          : "doc-credit",
    "credits"         : "doc-credits",
    "dedication"      : "doc-dedication",
    "endnote"         : "doc-endnote",
    "endnotes"        : "doc-endnotes",
    "epigraph"        : "doc-epigraph",
    "epilogue"        : "doc-epilogue",
    "errata"          : "doc-errata",
    "figure"          : "figure",
    "footnote"        : "doc-footnote",
    "foreword"        : "doc-foreword",
    "glossary"        : "doc-glossary",
    "glossdef"        : "definition",
    "glossref"        : "doc-glossref",
    "glossterm"       : "term",
    "index"           : "doc-index",
    "introduction"    : "doc-introduction",
    "landmarks"       : "directory",
    "list"            : "list",
    "list-item"       : "listitem",
    "noteref"         : "doc-noteref",
    "notice"          : "doc-notice",
    "page-list"       : "doc-pagelist",
    "pagebreak"       : "doc-pagebreak",
    "part"            : "doc-part",
    "preface"         : "doc-preface",
    "prologue"        : "doc-prologue",
    "pullquote"       : "doc-pullquote",
    "qna"             : "doc-qna",
    "referrer"        : "doc-backlink",
    "subtitle"        : "doc-subtitle",
    "table"           : "table",
    "table-row"       : "row",
    "table-cell"      : "cell",
    "tip"             : "doc-tip",
    "toc"             : "doc-toc",
}

_aria_role_allowed_tags = {
    "doc-abstract"       : ("section"),
    "doc-acknowledgments": ("section"),
    "doc-afterword"      : ("section"),
    "doc-appendix"       : ("section"),
    "doc-biblioentry"    : ("li"),
    "doc-bibliography"   : ("section"),
    "doc-biblioref"      : ("a"),
    "doc-chapter"        : ("section"),
    "doc-colophon"       : ("section"),
    "doc-conclusion"     : ("section"),
    "doc-cover"          : ("img"),
    "doc-credit"         : ("section"),
    "doc-credits"        : ("section"),
    "doc-dedication"     : ("section"),
    "doc-endnote"        : ("li"),
    "doc-endnotes"       : ("section"),
    "doc-epigraph"       : (),
    "doc-epilogue"       : ("section"),
    "doc-errata"         : ("section"),
    "figure"             : (),
    "doc-footnote"       : ("aside", "footer", "header"),
    "doc-foreword"       : ("section"),
    "doc-glossary"       : ("section"),
    "definition"         : (),
    "doc-glossref"       : ("a"),
    "term"               : (),
    "doc-index"          : ("nav", "section"),
    "doc-introduction"   : ("section"),
    "directory"          : ("ol", "ul"),
    "list"               : (),
    "listitem"           : (),
    "doc-noteref"        : ("a"),
    "doc-notice"         : ("section"),
    "doc-pagelist"       : ("nav", "section"),
    "doc-pagebreak"      : ("hr"),
    "doc-part"           : ("section"),
    "doc-preface"        : ("section"),
    "doc-prologue"       : ("section"),
    "doc-pullquote"      : ("aside", "section"),
    "doc-qna"            : ("section"),
    "doc-backlink"       : ("a"),
    "doc-subtitle"       : ("h1", "h2", "h3", "h4", "h5", "h6"),
    "table"              : (),
    "cell"               : (),
    "row"                : (),
    "doc-tip"            : ("aside"),
    "doc-toc"            : ("nav", "section"),
}

# these tags allow all aria roles
# subject to some conditions
# conditions field: (href_allowed, need_alt)
_all_role_tags = {
    "a"          : (False, False),
    "abbr"       : (True, False),
    "address"    : (True, False),
    "b"          : (True, False),
    "bdi"        : (True, False),
    "bdo"        : (True, False),
    "blockquote" : (True, False),
    "br"         : (True, False),
    "canvas"     : (True, False),
    "cite"       : (True, False),
    "code"       : (True, False),
    "del"        : (True, False),
    "dfn"        : (True, False),
    "div"        : (True, False),
    "em"         : (True, False),
    "i"          : (True, False),
    "img"        : (False, True),
    "ins"        : (True, False),
    "kbd"        : (True, False),
    "mark"       : (True, False),
    "output"     : (True, False),
    "p"          : (True, False),
    "pre"        : (True, False),
    "q"          : (True, False),
    "rp"         : (True, False),
    "rt"         : (True, False),
    "ruby"       : (True, False),
    "s"          : (True, False),
    "samp"       : (True, False),
    "small"      : (True, False),
    "span"       : (True, False),
    "strong"     : (True, False),
    "sub"        : (True, False),
    "sup"        : (True, False),
    "table"      : (True, False),
    "tbody"      : (True, False),
    "td"         : (True, False),
    "tfoot"      : (True, False),
    "thead"      : (True, False),
    "th"         : (True, False),
    "tr"         : (True, False),
    "time"       : (True, False),
    "u"          : (True, False),
    "var"        : (True, False),
    "wbr"        : (True, False)
}

# epub 3.0.2 and aria rules makes this quite a mess
def _role_from_etype(etype, tname, has_href, has_alt):
    # first get role for epub type from map
    role = _epubtype_aria_map.get(etype, None)
    if role is None:
        return role
    # a possible role exists, check if allowed
    allowed = False
    # check if role would be in a tag that allows all roles
    # subject to conditions
    if tname in _all_role_tags:
        allowed = True
        (href_allowed, need_alt) = _all_role_tags[tname]
        if not href_allowed and has_href:
            allowed = False
        if need_alt and not has_alt:
            allowed = False
    if allowed:
        return role
    # still need to check for specifc additions/exceptions
    if role in _aria_role_allowed_tags:
        tagset = _aria_role_allowed_tags[role]
        if tname in tagset:
            return role
    return None

# simple ace wrapper
def aceWrapper(*args):
    ''' wrapper for running ace '''
    if islinux:
        process = Popen(list(args), stdout=PIPE, stderr=PIPE, shell=False)
    else:
        process = Popen(list(args), stdout=PIPE, stderr=PIPE, shell=True)
    ret = process.communicate()
    return ret

def run(bk):
    ''' main routine '''
    # get/set preferences
    prefs = bk.getPrefs()
    if prefs == {}:
        html_output = True
        prefs['html_output'] = html_output
        bk.savePrefs(prefs)
    else:
        html_output = prefs.get('html_output', True)
        debug = prefs.get('debug', False)

    # get epubcfi file prefixes; first entry = 2, second entry, 4 etc.
    counter = 0
    spine_dict = {}
    for manifest_id, _ in bk.getspine():
        counter += 2
        spine_dict[manifest_id] = counter

    # get document folder
    home = expanduser('~')

    # get epub file path
    # if bk.launcher_version() >= 20180122:
        # epub_filepath = bk.get_epub_filepath()
    # else:
        # epub_filepath = bk._w.ebook_root

    # define paths and file names
    epub_temp_dir = tempfile.mkdtemp()
    output_temp_dir = tempfile.mkdtemp()
    epub_path = os.path.join(output_temp_dir, 'temp.epub')
    report_file_name = os.path.join(home, 'report.html')
    json_file_name = report_file_name.replace('.html', '.json')
    if os.path.exists(report_file_name):
        os.remove(report_file_name)
    if os.path.exists(json_file_name):
        os.remove(json_file_name)

    # copy epub files to temp folder and add a mimetype file
    bk.copy_book_contents_to(epub_temp_dir)
    with open(os.path.join(epub_temp_dir, "mimetype"), "w") as mimetype:
        mimetype.write("application/epub+zip")

    # zip up files
    epub_zip_up_book_contents(epub_temp_dir, epub_path)

    # run ace
    args = ['ace', '-V', '-f', '-o', home, epub_path]
    print('Running ACE ... please wait.\n')
    stdout, stderr = aceWrapper(*args)

    # if ACE succeeded, there should be a report file in the home folder
    if  os.path.isfile(report_file_name):
        with open(json_file_name, 'r', encoding='utf-8') as file:
            json_string = file.read()
        parsed_json = json.loads(json_string)
        earl_outcome = parsed_json['earl:result']['earl:outcome']

        #===================
        # main routine
        #===================

        if earl_outcome == 'fail':
            for assertion in parsed_json['assertions']:

                # get file name, id and number
                file_name = assertion['earl:testSubject']['url']
                base_file_name = os.path.basename(file_name)
                print('\nWorking on {}'.format(base_file_name))
                manifest_id = bk.href_to_id(file_name)
                linenumber = None
                contents = ''

                # content.opf is unmanifested and has no manifest id
                if manifest_id is not None:
                    file_number = str(spine_dict[manifest_id])
                    contents = bk.readfile(manifest_id)

                # process all ACE assertions
                for earl_assertion in assertion['assertions']:
                    cfi = None
                    epubcfi = None
                    real_epubcfi = None
                    #html = None
                    role = None
                    #earl_assertedBy = None
                    text = ''
                    foffset = None

                    # get mnemonic error message, e.g. 'html-has-lang
                    #rule = earl_assertion['earl:test']['dct:title']

                    # get actual error message
                    error_message = escape(earl_assertion['earl:result']['dct:description'].replace('\n', ' ').replace('Fix any of the following:', '').replace('Fix all of the following:', '')).replace('"', '&quot;').strip()

                    # get error level (serious, moderate, minor)
                    error_level = earl_assertion['earl:test']['earl:impact']

                    # define Sigil message type
                    if error_level == 'serious':
                        restype = 'error'
                    elif error_level == 'moderate':
                        restype = 'warning'
                    else:
                        restype = 'info'

                    # get epubcfi
                    if 'earl:pointer' in earl_assertion['earl:result']:
                        cfi = earl_assertion['earl:result']['earl:pointer']['cfi'][0]
                        # ignore pointless cfi value
                        if cfi == "/":
                            cfi = None

                    if cfi:
                        epubcfi = '{}#epubcfi({})'.format(file_name, cfi)
                        real_epubcfi = 'epubcfi(/6/{}!{})'.format(file_number, cfi)
                        try:
                            res = cfi_convert(bk, real_epubcfi)
                            #href, linenumber, ncol, foffset, time_offset, space_offset, text_offset, text = res
                            _, linenumber, _, foffset, _, _, _, text = res
                        except Exception as ex:
                            print('\n*** PYTHON ERROR ***\nAn exception of type {0} occurred.\nArguments:\n{1!r}'.format(type(ex).__name__, ex.args))
                            if debug:
                                linenumber = 2
                            else:
                                return -1

                    else:
                        # ACE doesn't report line numbers for non-HTML files
                        linenumber = 3

                    # get html (snippet) and recommended ARIA role
                    if 'html' in earl_assertion['earl:result']:
                        epub_type = None
                        tag_name = None
                        has_href = False
                        has_alt = False
                        snippet = earl_assertion['earl:result']['html']
                        soup = BeautifulSoup(snippet, 'xml')
                        tag = soup.contents[0]
                        tag_name = tag.name
                        if 'epub:type' in tag.attrs:
                            epub_type = tag['epub:type']
                            if 'href' in tag.attrs:
                                has_href = True
                            if 'alt' in tag.attrs:
                                has_alt = True
                            role = _role_from_etype(epub_type, tag_name, has_href, has_alt)

                    # add suggested role:
                    if error_message == 'Element has no ARIA role matching its epub:type':
                        if role:
                            error_message += '. Matching ARIA role: ' + role + '.'
                        else:
                            if epub_type in _epubtype_aria_map:
                                role = _epubtype_aria_map[epub_type]
                                allowed_tags = _aria_role_allowed_tags[role]
                                if allowed_tags != ():
                                    error_message += '. Matching ARIA role &quot;{}&quot; only allowed on {}.'.format(role, allowed_tags)
                                else:
                                    if debug:
                                        error_message += '. Matching ARIA role: *** NONE ***.'

                    # add epubcfi to error message
                    if real_epubcfi is not None and text != '':
                        if not error_message.endswith('.'):
                            error_message += '. Element: ' + escape(text).replace('"', '&quot;')
                        else:
                            error_message += ' Element: ' + escape(text).replace('"', '&quot;')

                    # calculate offset for missing empty <title> tag
                    #if rule == "document-title" and contents != '':
                        #text = '<head>'

                    # calculate offset for other tags (not calculated for Linux/macOS)
                    if text != '' and contents != '':
                        if iswindows:
                            foffset = contents.find(text) - linenumber + 1

                    # the sigil 1.x validation plugin api requires file paths
                    if  bk.launcher_version() >= 20190927:
                        if manifest_id is not None:
                            base_file_name = bk.id_to_bookpath(manifest_id)
                        else:
                            base_file_name = bk.get_opfbookpath()

                    # populate validation pane
                    if foffset and iswindows:
                        bk.add_extended_result(restype, base_file_name, linenumber, foffset, error_message)
                    else:
                        bk.add_result(restype, base_file_name, linenumber, error_message)

        if html_output:
            url = 'file://' + os.path.abspath(report_file_name)
            webbrowser.open(url)

    else:
        #================================
        # ACE error handling
        #================================

        # delete temp folders
        shutil.rmtree(epub_temp_dir, False)
        shutil.rmtree(output_temp_dir, False)

        if iswindows:
            stdout = stdout.decode('cp850', 'replace')
            stderr = stderr.decode('cp850', 'replace')
        else:
            stdout = stdout.decode('utf-8', 'replace')
            stderr = stderr.decode('utf-8', 'replace')

        # remove escape formatting sequences
        stdout = re.sub(r'\x1b\[\d+m', '', stdout)
        stderr = re.sub(r'\x1b\[\d+m', '', stderr)
        print(stdout, '\n', stderr)
        return -1

    # delete temp folders
    shutil.rmtree(epub_temp_dir, False)
    shutil.rmtree(output_temp_dir, False)

    return 0

def main():
    print('I reached main when I should not have\n')
    return -1

if __name__ == "__main__":
    sys.exit(main())
