#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import unicode_literals, division, absolute_import, print_function

try:
    from sigil_bs4 import BeautifulSoup
except:
    from bs4 import BeautifulSoup

import sys, re, os

# page target templates
ncx_page_list_block = '''<pageList>
    <navLabel>
        <text>Paper Edition Page Mapping</text>
    </navLabel>{}
</pageList>'''

ncx_page_target = '''
    <pageTarget id="{}" type="normal" value="{}">
        <navLabel>
            <text>{}</text>
        </navLabel>
        <content src="{}"/>
    </pageTarget>'''

nav_page_list_block = '''
        <nav epub:type="page-list" id="{}">
            <h1>Print Page List</h1>
            <ol>\n{}            </ol>
        </nav>
'''

nav_page_target = '''                <li>
                    <a href="{}">{}</a>
                </li>\n'''

def run(bk):
    ''' main plugin routine '''

    #------------------------------------------
    # get epub version number
    #------------------------------------------
    if bk.launcher_version() >= 20160102:
        epubversion = bk.epub_version()
    else:
        epubversion = BeautifulSoup(bk.get_opf(), 'lxml').find('package')['version']

    #----------------------------------
    # get preferences
    #----------------------------------
    prefs = bk.getPrefs()

    # write default preferences
    if prefs == {}:
        prefs['tag'] = 'span'
        prefs['attribute'] = 'epub:type'
        prefs['value'] = 'pagebreak'
        bk.savePrefs(prefs)

    debug = prefs.get('debug', False)
    tag = prefs.get('tag', 'span')
    attribute = prefs.get('attribute', 'epub:type')
    value = prefs.get('value', 'pagebreak')

    #-------------------------------------------
    # get toc.ncx id
    #------------------------------------------
    nav_id = ncx_id = ncx_bookpath = None
    ncx_id = bk.gettocid()

    #---------------------------------------------------------
    # get nav_bookpath & ncx_bookpath
    #---------------------------------------------------------

    # check for non-standard books
    if bk.launcher_version() >= 20190927:
        if ncx_id: 
            ncx_bookpath = bk.id_to_bookpath(ncx_id)

        if epubversion.startswith('3'):
                nav_id = bk.getnavid()
                nav_bookpath = bk.id_to_bookpath(nav_id)

    # "standard" Sigil books
    else:
        if epubversion.startswith('3'):
            opf_soup = BeautifulSoup(bk.get_opf(), 'lxml')
            nav = opf_soup.find('item', {'properties' : 'nav'})
            if not nav:
                print('NAV doc not found!')
                return -1
        else:
            if not ncx_id:
                print('NCX not found!')
                return -1

    #--------------------------------------------
    # define page list templates
    #--------------------------------------------

    ncx_pagelist = ''
    nav_pagelist = ''
    page_targets = 0
    ncx_href = None
    nav_href = None

    #==================================
    # look for page break markers
    #==================================
    page_dic = {}
    errors = 0
    last_page = None
    for (html_id, href) in bk.text_iter():
        html = bk.readfile(html_id)
        base_name = os.path.basename(href)

        # get relative paths
        if bk.launcher_version() >= 20190927:
            html_bookpath = bk.id_to_bookpath(html_id)

            if ncx_bookpath:
                ncx_href = bk.get_relativepath(ncx_bookpath, html_bookpath)

            if epubversion.startswith('3'):
                nav_href = bk.get_relativepath(nav_bookpath, html_bookpath)

        else:
            ncx_href = href
            if epubversion.startswith('3'):
                nav_href = base_name


        # load html code into BeautifulSoup
        soup = BeautifulSoup(html, 'html.parser')
        orig_soup = str(soup)

        # find pagebreaks
        page_numbers = soup.find_all(tag, {attribute : value})
        if not page_numbers:
            print('\nNo [{} {}="{}"] page number targets found in {}.\n'.format(tag, attribute, value, base_name))
        else:
            page_targets += len(page_numbers)
            print('\n{} page number targets found in {}.'.format(len(page_numbers), base_name))

        # add pagelist entries to pagelist
        for page_number in page_numbers:

            # title has priority over string
            if page_number.has_attr('title'):
                title = page_number['title']
            elif page_number.has_attr('aria-label'):
                title = page_number['aria-label']
            else:
                title = page_number.string

            # add missing id, if necessary
            if not page_number.has_attr('id'):
                if title:
                    id = 'page' + title
                    page_number['id'] = id
                    if debug: print('id attribute added', page_number)
                else:
                    print('id attribute missing!', page_number.parent)
                    return -1

            id = page_number['id']

            # add missing epub3 pagebreak attribute
            if epubversion.startswith('3') and not page_number.has_attr('epub:type'):
                page_number['epub:type'] = 'pagebreak'

            # check for duplicate titles/ids
            if title not in page_dic:
                page_dic[title] = os.path.basename(href + '#' + id)
            else:
                errors += 1
                page_dic[title] += ' / ' + os.path.basename(href + '#' + id)
                print('\nERROR: duplicate page number found:', title, page_dic[title])

            # check for missing page targets
            try:
                if last_page is not None:
                    if int(title) - last_page > 1:
                        print('\nWARNING: page gap detected:', last_page, '<=>', title)
                last_page = int(title)
            except:
                pass
                last_page = title

            # epub2
            if ncx_href:
                ncx_pagelist += ncx_page_target .format(id, title, title, ncx_href + '#' + id)

            # epub3
            if nav_href:
                nav_pagelist += nav_page_target.format(nav_href + '#' + id, title)

        #if errors != 0:
            #print('Plugin aborted because of {} duplicate page number(s).'.format(str(errors)))
            #return -1

        # update html if the code was changed
        if str(soup) != orig_soup:
            try:
                bk.writefile(html_id, str(soup.prettyprint_xhtml(indent_level=0, eventual_encoding="utf-8", formatter="minimal", indent_chars="  ")))
            except:
                bk.writefile(html_id, str(soup))
            print('{} updated.'.format(base_name))


    if page_targets:

        # display page target count
        print('\n{} page number targets found.'.format(page_targets))

        #===================================
        # add/replace NCX pagelist section
        #===================================
        if ncx_id and ncx_pagelist !='':

            # assemble NCX page list
            ncx_pagelist_section = ncx_page_list_block.format(ncx_pagelist)

            # get ncx file name
            ncx_href = bk.id_to_href(ncx_id)
            ncx_name = os.path.basename(ncx_href)

            # get existing ncx contents
            ncx = bk.readfile(ncx_id)

            # delete existing pagelist
            ncx = re.sub('\s*\<pageList\>.*?\<\/pageList\>\s*', '', ncx, flags=re.DOTALL)

            # insert new pagelist
            ncx = ncx.replace('</ncx>', ncx_pagelist_section + '\n</ncx>')

            # update dtb:totalPageCount
            totalPageCount = ('<meta name="dtb:totalPageCount" content="{}"/>'.format(str(page_targets)))
            ncx = re.sub('<meta [^>]+"dtb\:totalPageCount"[^>]+>', totalPageCount, ncx)

            # update dtb:maxPageNumber
            maxPageNumber = ('<meta name="dtb:maxPageNumber" content="{}"/>'.format(title))
            ncx = re.sub('<meta [^>]+"dtb\:maxPageNumber"[^>]+>', maxPageNumber, ncx)

            # update ncx file
            bk.writefile(ncx_id, ncx)
            print('\n{} updated.'.format(ncx_name))
        else:
            print('\nNCX file not found or empty page list.', ncx_href, ncx_id)

        #=========================================
        # add/replace NAV pagelist section
        #=========================================
        if nav_id:
            nav_file_name = os.path.basename(nav_bookpath)
            pagelist_id = "pagelist"

            # get nav contents
            nav = bk.readfile(nav_id)
            nav_soup = BeautifulSoup(nav, 'html.parser')
            orig_nav_soup = str(nav_soup)
            old_page_list = nav_soup.find('nav', {'epub:type' : 'page-list'})

            # check for existing pagelist
            if old_page_list is not None:

                # preserve existing page list id
                if old_page_list.has_attr('id'):
                    pagelist_id = old_page_list['id']
                    if debug and (pagelist_id != 'pagelist'): print('custom page list id:', pagelist_id)

                # get number of existing entries
                all_list_items = old_page_list.find_all('li')
                if all_list_items:
                    print('\nOriginal NAV page list contained {} page targets.'.format(len(all_list_items)))
                else:
                    print('\nOriginal NAV page list contained no page targets.')

                # replace page list
                nav_pagelist_section = nav_page_list_block.format(pagelist_id, nav_pagelist)
                new_pagelist = BeautifulSoup(nav_pagelist_section, 'html.parser')
                old_page_list.replace_with(new_pagelist)
                print('Replaced with {} page targets.'.format(page_targets))

            else:
            
                # add new page list
                nav_pagelist_section = nav_page_list_block.format(pagelist_id, nav_pagelist)
                new_pagelist = BeautifulSoup(nav_pagelist_section, 'html.parser')
                nav_soup.body.insert(2, new_pagelist)
                print('\nNew NAV page list with {} page targets added.'.format(page_targets))

            # update nav
            if str(nav_soup) != orig_nav_soup:
                try:
                    bk.writefile(nav_id, str(nav_soup.prettyprint_xhtml(indent_level=0, eventual_encoding="utf-8", formatter="minimal", indent_chars="  ")))
                except:
                    bk.writefile(nav_id, str(nav_soup))
                print('{} updated.'.format(nav_file_name))
            else:
                print('NAV NOT file updated.')

    else:
        print('\nNo page number targets found.')

    print('\nPlease click OK to close the Plugin Runner window.')

    return 0

def main():
    print('I reached main when I should not have\n')
    return -1

if __name__ == "__main__":
    sys.exit(main())
