#!/usr/bin/env python
# -*- coding: utf-8 -*-
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab

import sys, os, re, shutil, platform
from urllib.request import urlretrieve
from sigil_bs4 import BeautifulSoup
from compatibility_utils import iswindows


def get_unique_id(bk, id):
    nid = id
    n = 0
    while bk.id_to_href(nid, None) is not None:
        n = n + 1
        nid = id + '{:04d}'.format(n)
    return nid

def get_unique_filename(bk, name):
    rootname, ext = os.path.splitext(name)
    used_names = {}
    for id, href, mtype in bk.manifest_iter():
        used_names[os.path.basename(href)] = "used"
    aname = name
    n = 0
    while aname in used_names:
        n = n + 1
        aname = rootname + '{:04d}'.format(n) + ext
    return aname


epub2_body = '''<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
  "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">

<html xmlns="http://www.w3.org/1999/xhtml">
<head>
  <title></title>
</head>

<body>
  {}
</body>
</html>'''

epub3_body = '''<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html>

<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops">
<head>
  <title></title>
</head>

<body>
  {}
</body>
</html>'''

# get metadata
def get_language(bk):
    ''' returns dc:language value '''
    metadata = bk.getmetadataxml()
    language = 'en'
    ps = bk.qp
    ps.setContent(metadata)
    res = []
    last_tagattr = None
    for text, tagprefix, tagname, tagtype, tagattr in ps.parse_iter():
        if text:
            if tagprefix == '.metadata.dc:language':
                language = text
                break
    return language

# find cmark-gfm
def find_cmark_exe(bk):
    ''' returns the md exe file path '''
    cmark_exe_path = None
    if sys.platform.startswith('win'):
        cmark_exe_path = os.path.join(bk._w.plugin_dir, bk._w.plugin_name, 'cmark-gfm.exe')
    elif sys.platform.startswith('darwin'):
        cmark_exe_path = os.path.join(bk._w.plugin_dir, bk._w.plugin_name, 'cmark-gfm')
        os.chmod(cmark_exe_path, 0o744)
    else:
        cmark_exe_path = shutil.which('cmark-gfm')
    return cmark_exe_path

# wrapper for cmark-gfm
def md_wrapper(*args):
    ''' a wrapper for cmark-gfm '''
    import subprocess
    startupinfo = None

    # stop the windows console popping up every time the prog is run
    if iswindows:
        startupinfo = subprocess.STARTUPINFO()
        startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
        startupinfo.wShowWindow = subprocess.SW_HIDE

    process = subprocess.Popen(list(args), stdout=subprocess.PIPE, stderr=subprocess.PIPE, startupinfo=startupinfo)
    stdout, stderr = process.communicate()

    returncode = process.returncode
    return stdout.decode('utf-8'), stderr.decode('utf-8'), returncode

# main routine
def run(bk):
    ''' the main routine '''

    # PyQt requires Sigil 0.9.8
    if not bk.launcher_version() >= 20170115:
        print('This plugin requires Sigil 0.9.8 or higher.\nClick OK to close the Plugin Runner window.')
        return -1
    else:
        from plugin_utils import QtWidgets
        from plugin_utils import PluginApplication

    # get epub version number
    epubversion = bk.epub_version()

    #----------------------------
    # get preferences
    #----------------------------
    prefs = prefs = bk.getPrefs()

    # save default preferences
    if prefs == {}:
        if epubversion.startswith('2'):
            prefs['extensions'] = ['footnotes', 'table', 'strikethrough', 'autolink', 'tagfilter']
        else:
            prefs['extensions'] = ['footnotes', 'table', 'strikethrough', 'autolink', 'tagfilter', 'tasklist']
        prefs['strong'] = True
        prefs['sigil_split_marker'] = False
        cmark_exe_path = find_cmark_exe(bk)
        if cmark_exe_path:
            prefs['cmark_exe_path'] = cmark_exe_path
            bk.savePrefs(prefs)

    # get pref values
    extensions = prefs.get('extensions', ['footnotes', 'table', 'strikethrough', 'autolink', 'tagfilter', 'tasklist'])
    cmark_exe_path = prefs.get('cmark_exe_path', None)
    strong = prefs.get('strong', True) # use <i> and <b> instead of <em> and <strong>
    sigil_split_marker = prefs.get('sigil_split_marker', False) # add sigil_split_marker class attribute
    debug = prefs.get('debug', False) # show debug messages

    # make sure that the cmark-gfm binary was found
    if not cmark_exe_path or not os.path.isfile(cmark_exe_path): 
        print('cmark-gfm binary not found!\nClick OK to close the Plugin Runner window.')
        return -1

    #------------------------------
    # get markdown file
    #------------------------------
    md_file_path = None
    mdp = True if iswindows else False
    app = PluginApplication(sys.argv, bk, match_dark_palette=mdp)
    dlg = QtWidgets.QFileDialog()
    if sys.platform.startswith('darwin'):
        dlg.setOption(QtWidgets.QFileDialog.DontUseNativeDialog)
    dlg.setNameFilter("Markdown file (*.md *.text *.txt)")
    #dlg.setFileMode(QtWidgets.QFileDialog.FileMode.AnyFile)
    if dlg.exec():
        filenames = dlg.selectedFiles()
        md_file_path = filenames[0]

    # use the bundled test.md file in debug mode
    if debug and not md_file_path:
        md_file_path = os.path.join(bk._w.plugin_dir, bk._w.plugin_name, 'test.md')

    if not md_file_path:
        print('No markdown file selected!\nClick OK to close the Plugin Runner window.')
        return -1
    else:
        print('Converting {}...'.format(os.path.basename(md_file_path)))

    #-----------------------------------------------------------
    # assemble cmark-gfm command line
    #-----------------------------------------------------------

    # --unsafe                                            Render raw HTML and dangerous URLs
    # --smart                                             Use smart punctuation
    # --validate-utf8                                    Replace UTF-8 invalid sequences with U+FFFD
    # --extension, -e EXTENSION_NAME    Specify an extension name to use
    # --table-prefer-style-attributes             Use style attributes to align table cells instead of align attributes.

    # epub2 doesn't support the <input> tag used by the tasklist extension
    if epubversion.startswith('2') and 'tasklist' in extensions:
        extensions.remove('tasklist')
        print('Tasklists are not allowed in epub2 books.')

    # no extensions specified, use defaults
    if extensions == []:
        args = [cmark_exe_path, '--smart', '--unsafe', '--validate-utf8', '--table-prefer-style-attributes', md_file_path]

    # parse & validate extension list
    else:
        args = [cmark_exe_path, '--smart', '--unsafe', '--validate-utf8', '--table-prefer-style-attributes']
        # for more information on gfm extensions see https://www.markdownguide.org/extended-syntax/
        for extension in extensions:
            if extension in ['footnotes', 'table', 'strikethrough', 'autolink', 'tagfilter', 'tasklist']:
                args.extend(['-e', extension])
            else:
                print('Unknown extension ignored:', extension)

    # epub2 doesn't require table styles
    if epubversion.startswith('2') and '--table-prefer-style-attributes' in args:
        args.remove('--table-prefer-style-attributes')
        
    # add the Markdown file path
    args.append(md_file_path)

    # run cmark-gfm
    if debug: print(args)
    stdout, stderr, returncode = md_wrapper(*args)

    # display error messages, if markdown-gfm failed
    if returncode != 0:
        print('Markdown conversion failed!\n{}\nClick OK to close the Plugin Runner window'.format(stderr, returncode))
        return -1

    #---------------------------------
    # assemble HTML file
    #---------------------------------
    if stdout.strip() == '':
        print('Empty file ignored.\nClick OK to close the Plugin Runner window')
        return 0
    else:
        base_name = os.path.basename(md_file_path)
        html_file_name =os.path.splitext(base_name)[0] + '.xhtml'

        # embed in epub2 html 
        if epubversion.startswith('2'):
            data = epub2_body.format(stdout)

            # <section> tags aren't allowed in epub2 books
            if 'footnotes' in extensions:
                # replace <section> with <div>
                data = re.sub('(</*)section', r'\1div', data)

        # embed in epub3 html 
        else:
            data = epub3_body.format(stdout)

        #------------------------------------------------------------------------------------------
        # replace smart quotation marks in French, German and Polish books
        #------------------------------------------------------------------------------------------
        language = get_language(bk)
        if debug: print('\nLanguage:', language)

        # German quotation marks look like this: ‚_‘ „_“ or ›_‹ »_«
        if language.startswith('de'):
            data = re.sub(r'(\W)‘([^‘]+)’(\W)', r'\1›\2‹\3', data)
            data = re.sub(r'(\W)“([^“]+)”(\W)', r'\1»\2«\3', data)

        # French quotation marks look like this: ‹ _ › « _ »
        elif language.startswith('fr') or language.startswith('es') or language.startswith('ru'):
            data = re.sub(r'(\W)‘([^‘]+)’(\W)', r'\1‹\2›\3', data)
            data = re.sub(r'(\W)“([^“]+)”(\W)', r'\1«\2»\3', data)

        # Polish quotation marks look like this: ‚_’ „_”
        elif language.startswith('pl'):
            data = re.sub(r'(\W)‘([^‘]+)’(\W)', r'\1‚\2’\3', data)
            data = re.sub(r'(\W)“([^“]+)”(\W)', r'\1„\2”\3', data)

        # no changes
        else:
            pass

        #----------------------------------------------------------------------
        # replace <em> and <strong> with <i> and <b>
        #----------------------------------------------------------------------
        if not strong:
            data = re.sub('(</*)strong>', r'\1b>', data)
            data = re.sub('(</*)em>', r'\1i>', data)


        # add the current file to the epub with a placeholder
        md_id = get_unique_id(bk, html_file_name)
        md_name = get_unique_filename(bk, html_file_name)
        bk.addfile(md_id, md_name, "PLACEHOLDER", "application/xhtml+xml")
        bk.spine_insert_before(-1, md_id, "no", None)
        md_bookpath = "OEBPS/Text/" + md_name
        if bk.launcher_version() >= 20190927:
            md_bookpath = bk.id_to_bookpath(md_id)

        #---------------------------
        # handle images
        #---------------------------
        soup = BeautifulSoup(data, 'html.parser')
        images = soup.find_all('img')
        image_list = []
        if images:

            # get the directory that contains the markdown file
            md_file_dir = os.path.dirname(md_file_path)

            # update all image references in the html file
            for image in images:
                image_src = image['src']
                image_file_name = os.path.basename(image_src)
                image_path = os.path.join(md_file_dir, image_file_name)

                # download external images
                if image_src.startswith('http'):
                    print('External image reference found:', image_file_name)
                    try:
                        urlretrieve(image_src, image_path)
                        print(image_file_name, 'downloaded.')
                    except:
                        print(image_file_name, 'couldn\'t be downloaded.')

                # make sure the image actually exists, add it to the epub
                # and update the src attribute
                if os.path.isfile(image_path):
                
                    # add the image file to the epub
                    image_id = get_unique_id(bk, image_file_name)
                    image_name = get_unique_filename(bk, image_file_name)
                    with open(image_path, 'rb') as fp:
                        image_data = fp.read()
                    bk.addfile(image_id, image_name, image_data)
                    print('\nImage file', image_name, 'added.')
                    
                    # calculate the img src href
                    image_bookpath = "OEBPS/Images/" + image_name
                    href = "../Images/" + image_name
                    if bk.launcher_version() >= 20190927:
                        image_bookpath = bk.id_to_bookpath(image_id)
                        href = bk.get_relativepath(md_bookpath, image_bookpath)  
                    image['src'] = href

                else:
                    print('\nImage file not found:', image_file_name)

            # update html
            data = str(soup.prettyprint_xhtml())

        #-----------------------------------------------
        # add epub3 footnote attributes
        #-----------------------------------------------
        if epubversion.startswith('3') and 'footnotes' in args:
            noterefs = soup.find_all('sup', { 'class' : 'footnote-ref' })
            for noteref in noterefs:
                noteref['epub:type'] = 'noteref'
                footnote_id = noteref.a['href'][1:]
                footnote = soup.find(attrs={ 'id' : footnote_id })
                if footnote:
                    footnote['epub:type'] = 'footnote'
            if noterefs != []:
                if debug: print('\nepub:type footnote attribute(s) added.')
                data = str(soup.prettyprint_xhtml())

        #-----------------------------------------------------------
        # add sigil_split_marker class attributes
        #-----------------------------------------------------------
        if sigil_split_marker:
            hrs = soup.find_all('hr')
            for hr in hrs:
                hr['class'] = 'sigil_split_marker'
            if hrs != []:
                if debug: print('\nsigil_split_marker class attribute(s) added.')
                data = str(soup.prettyprint_xhtml())

        #------------------------------------------
        # update the html file contents
        #------------------------------------------
        bk.writefile(md_id, data)
        print('\nHTML file', md_name, 'updated.')

    print('\nClick OK to close the Plugin Runner window.')

    return 0

def main():
    print('I reached main when I should not have\n')
    return -1

if __name__ == "__main__":
    sys.exit(main())
