#!/usr/bin/env python
# -*- coding: utf-8 -*-

from __future__ import unicode_literals, division, absolute_import, print_function

import os
import sys, os.path, inspect
from cutils import addDOCTYPEHeader
import shutil
import platform

# Redirect sys.path to import only the plugin version of tidylib
PLUGIN_DIR = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
sys.path.insert(1, PLUGIN_DIR)

try:
    from tidylib import tidy_document
except ImportError as err:
    print('\n >>> ImportError: doc_tidy.py, line 17 - Unable to import tidylib!! ' + str(err))
    

__all__=["docTidy", "docTidyNoWrap", "docTidyXML", "showHTMLBodyOnly"]


def docTidy(wdir, file):
    
    output = wdir + os.sep + 'tidy_nowrap.html'
    outfp = open(output, 'wt', encoding=('utf-8'))
    with open(file, 'rt', encoding=('utf-8')) as infp:
        xhtml = infp.read()
    
    base_options = {

        'input-xml':0,
        'output-html':0,
        'output-xhtml':1,
        'output-xml':0,
        'break-before-br':0,
        'css-prefix':"sgc",
        'uppercase-tags':0,
        'uppercase-attributes':0,
        'drop-proprietary-attributes':1,
        'preserve-entities':0,
        'quote-nbsp':0,    
        'quote-marks':1,  ####  
        'new-empty-tags': '<mobi:pagebreak>', 
        'doctype':'omit',
        'alt-text':"",
        'clean':1,
        'wrap':0,
        'indent':"auto",
        'indent-spaces':2,
        'markup':1,
        'numeric-entities':1,
        'vertical-space':0,
        'indent-attributes':0,
        'show-body-only':0,
        'literal-attributes':0,
        'ncr':1,
        'merge-spans':1,
        'join-classes':1,
        'join-styles':1,
        'quote-nbsp':0,
        'word-2000':1,
        'drop-empty-paras':1,
        'drop-font-tags':0,
        'bare':0,
        'char-encoding':"utf8",
        'force-output':1,
        'show-errors':1
        
    }
    
    html, errors = tidy_document(xhtml, options=base_options)
    outfp.writelines(html)
    
    outfp.close()
    os.remove(file)
    os.rename(output, file)  
    return(0)
    
def docTidyNoWrap(wdir, file):
    
    output = wdir + os.sep + 'tidy_nowrap.html'
    outfp = open(output, 'wt', encoding=('utf-8'))
    with open(file, 'rt', encoding=('utf-8')) as infp:
        xhtml = infp.read()
    
    
    base_options = {

        'input-xml':0,
        'output-html':0,
        'output-xhtml':1,
        'output-xml':0,
        'break-before-br':0,
        'uppercase-tags':0,
        'uppercase-attributes':0,
        'drop-proprietary-attributes':1, 
        'preserve-entities':0,
        'repeated-attributes':'keep-last',
        'quote-nbsp':0,
        'quote-marks':1,   #####
        'alt-text':"",
        'doctype':'omit',
        'clean':0,
        'wrap':0,
        'indent':"auto",
        'indent-spaces':2,
        'markup':1,
        'numeric-entities':0,
        'vertical-space':0,
        'indent-attributes':0,
        'show-body-only':0,
        'literal-attributes':0,
        'ncr':0,
        'merge-spans':1,
        'join-classes':1,
        'join-styles':1,
        'quote-nbsp':0,
        'word-2000':0,
        'drop-empty-paras':1,
        'drop-font-tags':0,
        'bare':0,
        'char-encoding':"utf8",
        'force-output':1,
        'show-errors':1
        
    }
    
    html, errors = tidy_document(xhtml, options=base_options)
    outfp.writelines(html)
    
    outfp.close()
    os.remove(file)
    os.rename(output, file)
    addDOCTYPEHeader(wdir, file)
    return(0)    
    
def docTidyXML(wdir, file):
    
    output = wdir + os.sep + 'tidy_nowrap.html'
    outfp = open(output, 'wt', encoding=('utf-8'))
    with open(file, 'rt', encoding=('utf-8')) as infp:
        xhtml = infp.read()
        
    base_options = {

        'input-xml':1,
        'output-html':0,
        'output-xhtml':0,
        'output-xml':1,
        'break-before-br':0,
        'uppercase-tags':0,
        'uppercase-attributes':0,
        'drop-proprietary-attributes':1,
        'preserve-entities':0,
        'doctype':"omit",
        'alt-text':"",
        'clean':1,
        'wrap':0,
        'indent':"auto",
        'indent-spaces':2,
        'markup':1,
        'numeric-entities':0,
        'vertical-space':0,
        'indent-attributes':0,
        'char-encoding':"utf8",
        'show-body-only':0,
        'literal-attributes':0,
        'ncr':0,
        'merge-spans':1,
        'join-classes':1,
        'word-2000':1,
        'drop-empty-paras':1,
        'drop-font-tags':0,
        'bare':0,
        'char-encoding':'utf8',
        'force-output':1,
        'show-errors':0
        
    }
    
    html, errors = tidy_document(xhtml, options=base_options)
    outfp.writelines(html)
    
    outfp.close()
    os.remove(file)
    os.rename(output, file)  
    return(0)    
    
def showHTMLBodyOnly(wdir, file):
    
    output = wdir + os.sep + 'tidy_nowrap.html'
    outfp = open(output, 'wt', encoding=('utf-8'))
    with open(file, 'rt', encoding=('utf-8')) as infp:
        xhtml = infp.read()

    base_options = {

        'input-xml':0,
        'output-html':0,
        'output-xhtml':1,
        'output-xml':0,
        'break-before-br':0,
        'uppercase-tags':0,
        'uppercase-attributes':0,
        'drop-proprietary-attributes':1, 
        'preserve-entities':0,
        'doctype':"auto",
        'alt-text':"",
        'clean':0,
        'wrap':0,
        'indent':"auto",
        'indent-spaces':2,
        'markup':1,
        'numeric-entities':1,
        'vertical-space':0,
        'indent-attributes':0,
        'show-body-only':1,
        'literal-attributes':0,
        'ncr':0,
        'merge-spans':1,
        'join-classes':1,
        'join-styles':1,
        'quote-nbsp':0,
        'word-2000':0,
        'drop-empty-paras':1,
        'drop-font-tags':0,
        'bare':0,
        'char-encoding':"utf8",
        'force-output':1,
        'show-errors':1
        
    }
    html, errors = tidy_document(xhtml, options=base_options)
    outfp.writelines(html)
    outfp.close()
    
    os.remove(file)
    os.rename(output, file)    
    return(file)
        
