#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab

# target script

import sys
import os
import re
import sigil_bs4
import sigil_gumbo_bs4_adapter as gumbo_bs4
from cfiparse import parser as cfi_parser
from cfiparse import cfi_sort_key

_DEBUG = False

def _remove_xml_header(data):
    return re.sub(r'<\s*\?xml\s*[^\?>]*\?*>\s*','',data, flags=re.I)

class CFIConvertException(Exception):
    pass

def cfi_convert(bk, ecfi):

    if _DEBUG:
        print(ecfi)

    # parse epubcfi
    raw = ecfi
    p = cfi_parser()
    parent_cfi, start_cfi, end_cfi, nraw = p.parse_epubcfi(raw)

    if start_cfi or end_cfi:
        raise CFIConvertException("Error: Using unsupported cfi range")

    if not parent_cfi:
        raise CFIConvertException("Error: Using unsupported cfi feature")

    if _DEBUG:
        print("parent_cfi: ", parent_cfi)
        print("parent_cfi['steps']:", parent_cfi['steps'])
        print("parent_cfi['redirect']:", parent_cfi['redirect'])

    # extract the node path
    raw = ecfi
    stepnums, assertids, cfioffsets = cfi_sort_key(raw, only_path=False)

    # note nodepath and idtests are parallel lists and should be kept that way
    nodepath = list(stepnums)
    idtests = list(assertids)

    if _DEBUG:
        print("stepnums: ", stepnums)
        print("assertids: ", assertids)
        print("temporal offset:", cfioffsets[0])
        print("spacial offset:", cfioffsets[1])
        print("textual offset:", cfioffsets[2])

    text_offset = cfioffsets[2]
    space_offset = cfioffsets[1]
    time_offset = cfioffsets[0]

    # an epubcfi starts at the package tag of the opf
    # the package tag has the following tag children using the
    # epubcfi node counting scheme:
    # 2: metadata, 4: manifest, 6: spine, 8: guide if present, etc)
    
    # An epubcfi should always be redirected to its target using nodenum 6: the spine

    # we could parse the opf into an xml node tree and follow along the path
    # but since the opf is already in parsed form by Sigil plugin interface, 
    # we can simply look up which spine entry we need and go from there

    nodenum = nodepath.pop(0)
    tid = idtests.pop(0)
    if nodenum != 6:
        raise CFIConvertException("Error: the epubcfi only supports spine redirection in the opf")

    # now get the nodenumber for the target element of the spine
    nodenum = nodepath.pop(0)
    tid = idtests.pop(0)
    
    # build a list of manifest ids as ordered by the spine
    epubversion = bk.epub_version()
    mids = []
    if epubversion.startswith("3"):
        for idref, linear, properties, href in bk.spine_epub3_iter():
            mids.append(idref)
    else:
        for idref, linear, href in bk.spine_iter():
            mids.append(idref)

    # now get the manifest id in the spine at the requested position
    target_idref = mids[nodenum // 2 - 1]
    target_href = bk.id_to_href(target_idref)
    target_mime = bk.id_to_mime(target_idref)
    target_filename = bk.href_to_basename(target_href)

    if _DEBUG:
        print("target_idref:", target_idref)
        print("target_href:", target_href)
        print("target_mime:", target_mime)
        print("target_filename:", target_filename)
        
    # verify that the next step in the epubcfi is in fact a redirect
    nodenum = nodepath.pop(0)
    tid = idtests.pop(0)
    if nodenum != -1:
        raise CFIConvertException("Error: epubcfi is missing the spine redirection")

    # parse the target_file in bs4/gumbo and follow the node path
    data = bk.readfile(target_idref)
    line_correction = 0
    if data.startswith('<?xml '):
        data = _remove_xml_header(data)
        line_correction = 1
    soup = gumbo_bs4.parse(data)

    # we start at the html tag
    pos = soup.html
    
    # walk the remaining node path to terminal element
    for nodenum in nodepath:
        tid = idtests.pop(0)

        if _DEBUG:
            print("node number for step is: ", nodenum)

        if nodenum < 0: 
            raise CFIConvertException("Error: Using a second redirection for iframe or object tag is unsupported")

        if nodenum & 1 == 0:

            # tag node (even), hoping that javascript is not playing with the dom
            # convert to a pure tag count 
            elnum = nodenum // 2

            if _DEBUG:
                print("looking for element number: ", elnum)

            # walk through children of current node looking for the correct tag node
            for node in pos.contents:
                if isinstance(node, sigil_bs4.element.Tag):
                    elnum -= 1
                if elnum == 0:
                    pos = node
                    # test any asserts and if fail try to find node by id
                    if tid != "":
                        if tid != pos.attrs.get("id", ""):
                            print("Failed assert: ", tid, pos.attrs.get("id", ""))
                            dest = soup.find_all(id=tid)
                            if len(dest) > 0:
                                pos = dest[0]
                    break
        else:
            # text node (odd)
            # note: finding the right text node is not easy because of differences
            # in how text nodes are created by the dom. Specifically, are empty 
            # text nodes added before the first tag child or between later tag 
            # children if needed?  If not, the normal cfi node number must be fixed as
            # it always assumes all odd numbered children of a node are text nodes
            
            # one solution is to find the element node that precedes the target node
            # and then return its next sibling
            elnum = (nodenum - 1) // 2

            # first handle the special case of the first child being the target
            if elnum == 0:
                node = pos.contents[0]
                pos = node
            else:
                # skip through to element children that precede the desired text node

                if _DEBUG:
                    print("skipping over ", elnum, " elements and take its next sibling")

                for node in pos.contents:
                    if isinstance(node, sigil_bs4.element.Tag):
                        elnum -= 1
                    if elnum == 0:
                        pos = node.next_sibling
                        break
                   
    # return the required information from the gumbo node tree
    # look now look at what you found
    nline = pos.line + line_correction
    ncol = pos.col
    foffset = pos.offset
    print("Found:", pos.original)
    if isinstance(pos, sigil_bs4.element.Tag):
        if _DEBUG:
            print("tag:",pos.name)
            print("original:", pos.original)
            print("line:", nline, "col:", ncol, "file offset:", foffset)
            print("textual offset:", text_offset)
            if hasattr(pos, 'end_line'):
                print("end_line: ", pos.end_line, "end_col:", pos.end_col, "end file offset:", pos.end_offset)
    elif isinstance(pos, sigil_bs4.element.NavigableString):
        txt = str(pos)
        if _DEBUG:
            print("#text:", txt)
            print("original:", pos.original)
            print("line:", nline, "col:", ncol, "file offset:", foffset)
            print("textual offset:", text_offset)
    else:
        msg = "Error: Found unknown bs4 element of type %s" % type(pos)
        raise CFIConvertException(msg)

    # returns target file href, line and column of target element, utf-8 file offset of target element
    # plus any additional offsets as specified by the epubcfi
    res = (target_href, nline, ncol, foffset, time_offset, space_offset, text_offset)            
    return res

