#!/usr/bin/env python
# -*- coding: utf-8 -*-
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab

# Copyright 2015-2017 Kevin B. Hendricks, Stratford Ontario

# This plugin's source code is available under the GNU LGPL Version 2.1 or GNU LGPL Version 3 License.
# See https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html or
# https://www.gnu.org/licenses/lgpl.html for the complete text of the license.

from __future__ import unicode_literals, division, absolute_import, print_function

import sys
import os

# keep this just to prevent failure without feedback on python 2.7
PY3 = sys.version_info[0] == 3

if PY3:
    from urllib.parse import unquote
    from urllib.parse import urlparse
    # import tkinter
    # import tkinter.ttk as tkinter_ttk
    # import tkinter.constants as tkinter_constants
    # import tkinter.filedialog as tkinter_filedialog

_epubtype_aria_map = {
    "abstract"        : "doc-abstract",
    "acknowledgments" : "doc-acknowledgments",
    "afterword"       : "doc-afterword",
    "appendix"        : "doc-appendix",
    "biblioentry"     : "doc-biblioentry",
    "bibliography"    : "doc-bibliography",
    "biblioref"       : "doc-biblioref",
    "chapter"         : "doc-chapter",
    "colophon"        : "doc-colophon",
    "conclusion"      : "doc-conclusion",
    "cover"           : "doc-cover",
    "credit"          : "doc-credit",
    "credits"         : "doc-credits",
    "dedication"      : "doc-dedication",
    "endnote"         : "doc-endnote",
    "endnotes"        : "doc-endnotes",
    "epigraph"        : "doc-epigraph",
    "epilogue"        : "doc-epilogue",
    "errata"          : "doc-errata",
    "figure"          : "figure",
    "footnote"        : "doc-footnote",
    "foreword"        : "doc-foreword",
    "glossary"        : "doc-glossary",
    "glossterm"       : "term",
    "glossdef"        : "definition",
    "glossref"        : "doc-glossref",
    "introduction"    : "doc-introduction",
    "landmarks"       : "directory",
    "list"            : "list",
    "list-item"       : "listitem",
    "noteref"         : "doc-noteref",
    "notice"          : "doc-notice",
    "pagebreak"       : "doc-pagebreak",
    "page-list"       : "doc-pagelist",
    "part"            : "doc-part",
    "preface"         : "doc-preface",
    "prologue"        : "doc-prologue",
    "pullquote"       : "doc-pullquote",
    "qna"             : "doc-qna",
    "referrer"        : "doc-backlink",
    "subtitle"        : "doc-subtitle",
    "table"           : "table",
    "table-row"       : "row",
    "table-cell"      : "cell",
    "tip"             : "doc-tip",
    "toc"             : "doc-toc",
}

_USER_HOME = os.path.expanduser("~")

# the plugin entry point
def run(bk):

    # protect against epub2 epubs being sent to Access-Aide
    epubversion = "2.0"
    if bk.launcher_version() >= 20160102:
        epubversion = bk.epub_version()

    if epubversion.startswith("2"):
        print("Error: Access-Aide requires a valid epub 3 ebook as input")
        return -1


    # before anything check for video and audio files and abort if they exist
    has_audio_video = False
    for mid, href, mime in bk.media_iter():
        if mime.startswith('audio') or mime.startswith('video'):
            has_audio_video = True
    if has_audio_video:
        print("Error: Access-Aide can not handle epubs with audio and video resources (yet)")
        return -1


    # find the primary language (first dc:language metadata value)
    # and update it to include the accessibility metadata
    plang = None
    res = []
    has_access_meta = False
    qp = bk.qp
    metaxml = bk.getmetadataxml()
    qp.setContent(metaxml)
    for text, tagprefix, tagname, tagtype, tagattr in qp.parse_iter():
        if text is not None:
            res.append(text)
            if tagprefix.endswith("dc:language"):
                if plang is None:
                    plang = text
                    if "-" in text:
                        plang, region = text.split("-")
        else:
            if tagname == "meta" and tagtype == "begin":
                if "property" in tagattr:
                    prop = tagattr["property"]
                    if prop.startswith("schema:access"):
                        has_access_meta = True
            if tagname == "metadata" and tagtype == "end":
                # insert accessibility metadata for the close if needed
                if not has_access_meta:
                    res.append('<meta property="schema:accessibilitySummary">This publication conforms to WCAG 2.0 AA.</meta>\n')
                    res.append('<meta property="schema:accessMode">textual</meta>\n')
                    res.append('<meta property="schema:accessMode">visual</meta>\n')
                    res.append('<meta property="schema:accessModeSufficient">textual</meta>\n')
                    res.append('<meta property="schema:accessibilityFeature">structuralNavigation</meta>\n')
            res.append(qp.tag_info_to_xml(tagname, tagtype, tagattr))
    metaxml = "".join(res)
    bk.setmetadataxml(metaxml)

    if plang is None:
        print("Error: at least one dc:language must be specified in the opf")
        return -1

    # determine id of the nav
    navid = None
    navfilename = None
    for mid, href, mtype, mprops, fallback, moverlay in bk.manifest_epub3_iter():
        if mprops is not None and "nav" in mprops:
            navid = mid
            urlobj = urlparse(href)
            path = unquote(urlobj.path)
            navfilename = os.path.basename(unquote(urlobj.path))
            break

    if navid is None:
        print("Error: nav property missing from the opf manifest propertiese")
        return -1


    # parse the nav, building up a list of first toc titles for each new xhtml file
    # to use as html head title tags
    # plus parse the first h1 tag to get a potential title for the nav file itself
    # and parse the landmarks to collect epub:type semantics set on files and fragments
    titlemap = {}
    etypemap = {}
    qp = bk.qp
    qp.setContent(bk.readfile(navid))
    in_toc = False
    in_lms = False
    getlabel = False
    navtitle = None
    tochref = None
    prevfilename = ""
    for text, tagprefix, tagname, tagtype, tagattr in qp.parse_iter():
        if text is None:
            if tagname == "nav" and tagtype == "begin":
                if tagattr is not None and "epub:type" in tagattr:
                    in_toc = tagattr["epub:type"] == "toc"
                    in_lms = tagattr["epub:type"] == "landmarks"
            if in_toc and tagname == "a" and tagtype == "begin":
                if tagattr is not None and "href" in tagattr:
                    tochref = tagattr["href"]
                    getlabel = True
            if in_lms and tagname == "a" and tagtype == "begin":
                if tagattr is not None and "href" in tagattr:
                    lmhref = tagattr["href"]
                    if "epub:type" in tagattr:
                        etype = tagattr["epub:type"]
                        urlobj = urlparse(lmhref)
                        filename = os.path.basename(unquote(urlobj.path))
                        fragment = urlobj.fragment
                        if fragment == '':
                            etypemap[filename] = ("body", '', etype)
                        else:
                            etypemap[filename] = ("id", fragment, etype)
        else:
            if navtitle is None and tagprefix.endswith("h1"):
                navtitle = text
                titlemap[navfilename] = navtitle
            if in_toc and getlabel:
                if tochref is not None:
                    urlobj = urlparse(tochref)
                    filename = os.path.basename(unquote(urlobj.path))
                    if filename != prevfilename:
                        titlemap[filename] = text
                tochref = None
                getlabel = False


    # now process every xhtml file including the nav
    # adding primary language to html tag, setting the title, 
    # adding known nav landmark semantics epub:types and building up a list
    # of image links so that alt attributes can be more easily added
    imglst = []
    for mid, href in bk.text_iter():
        print("... updating: ", href, " with manifest id: ", mid)
        xhtmldata, ilst = convert_xhtml(bk, mid, href, plang, titlemap, etypemap)
        bk.writefile(mid, xhtmldata)
        if len(ilst) > 0:
            imglst.extend(ilst)

    # allow user to update alt info for each image tag
    print("\nImages Information")
    for (mid, filename, imgcnt, imgsrc, alttext) in imglst:
        print("   ... ", filename, " #", imgcnt, " src:", imgsrc, " alt text:", alttext)

#    if sys.platform.startswith('darwin'):
#        # localRoot is is an empty topmost root window that is hidden by withdrawing it
#        # but localRoot needs to be centred, and lifted and focus_force used
#        # so that its child dialog will inherit focus upon launch
#        localRoot.overrideredirect(True)
#        # center on screen but make size 0 to hide the empty localRoot
#        w = localRoot.winfo_screenwidth()
#        h = localRoot.winfo_screenheight()
#        x = int(w/2)
#        y = int(h/2)
#        localRoot.geometry('%dx%d+%d+%d' % (0, 0, x, y))
#        localRoot.deiconify()
#        localRoot.lift()
#        localRoot.focus_force()
#
#    fpath = tkinter_filedialog.asksaveasfilename(
#        parent=localRoot,
#        title="Save ePub3 as ...",
#        initialfile=fname,
#        initialdir=_USER_HOME,
#        defaultextension=".epub"
#        )
#    # localRoot.destroy()
#    localRoot.quit()
#    if not fpath:
#        shutil.rmtree(temp_dir)
#        print("ePub3-itizer plugin cancelled by user")
#        return 0

    print("Updating Complete")
    # Setting the proper Return value is important.
    # 0 - means success
    # anything else means failure
    return 0
 

# convert xhtml to be more Accessibility friendly
#  - add lang and xml:lang to html tag attributes
#  - add title info to head title tag
#  - collect info on image use and contents of related alt attributes
#  - add known epub:type semantics from nav landmarks to body tag or tag with "fragment" 
#  - add aria role attributes to complement existing epub:type attributes
# returns updated xhtml and list of lists for images (manifest_id, filename, image_count, image_src, alt_text)
def convert_xhtml(bk, mid, href, plang, titlemap, etypemap):
    res = []
    #parse the xhtml, converting on the fly to update it
    qp = bk.qp
    qp.setContent(bk.readfile(mid))
    filename = bk.href_to_basename(href)
    maintitle = None
    loctype = ""
    fragment = ""
    etype = ""
    if filename in etypemap:
        (loctype, fragment, etype) = etypemap[filename] 
    imgcnt = 0
    imglst = []
    for text, tprefix, tname, ttype, tattr in qp.parse_iter():
        if text is not None:
            # get any existing title in head
            if "head" in tprefix and tprefix.endswith("title"):
                if text != "":
                    maintitle = text
            res.append(text)
        else:
            # add missing epub:type for nav landmarks that point to fragments
            if loctype == "id" and ttype in ("single", "begin"):
                if "id" in tattr:
                    id = tattr["id"]
                    if id == fragment and "epub:type" not in tattr:
                        tattr["epub:type"] = etype

            # add mssing epub:type for nav landmarks that have no fragments
            if loctype == "body" and tname == "body" and ttype == "begin":
                    if "epub:type" not in tattr:
                        tattr["epub:type"] = etype

            # add primary language attributes to html tag
            if tname == "html" and ttype=="begin":
                tattr["lang"] = plang
                tattr["xml:lang"] = plang

            # add missing alt text attributes on img tags
            # build up list of img links and current alt text
            if tname == "img" and ttype in ("single", "begin"):
                imgcnt += 1
                alttext = tattr.get("alt", "")
                tattr["alt"] = alttext
                imgsrc = tattr.get("src","")
                imglst.append((mid,filename,imgcnt,imgsrc,alttext)) 

            # build up url to epub:types mapping
            if ttype in ["begin", "single"] and "epub:type" in tattr and "role" not in tattr:
                epubtype = tattr["epub:type"]
                if epubtype in _epubtype_aria_map:
                    ariarole = _epubtype_aria_map[epubtype]
                    tattr["role"] = ariarole

            # inject any missing titles if possible
            if tname == "title" and ttype == "end" and "head" in tprefix:
                if maintitle is None:
                    res.append(titlemap.get(filename,""))

            res.append(qp.tag_info_to_xml(tname, ttype, tattr))

    return "".join(res), imglst


def main():
    print("I reached main when I should not have\n")
    return -1
    
if __name__ == "__main__":
    sys.exit(main())

