#!/Python3/python
# -*- coding: utf-8 -*-

from __future__ import unicode_literals, division, absolute_import, print_function

import os, os.path, sys, codecs, shutil, inspect, re, time
# 'chardet' removido – não utilizado
from decimal import *
from tempfile import mkdtemp                  
from PIL import Image
import options
import time

from xml.sax.saxutils import escape
import locale 
# import lxml.html.clean as clean  # Removido – não utilizado
import tkinter as tk
import tkinter.messagebox as mbox

try:
    from sigil_bs4 import BeautifulSoup, Comment
except:
    from bs4 import BeautifulSoup, Comment  
    
    
# Kindle supported html tags    
SUPPORTED_TAGS              = ['!DOCTYPE','?xml?','a','p','h1','h2','h3','h4','h5','h6','b','big',\
                               'body','blockquote','br','center','cite', 'dd','del','div','dfn','em',\
                               'font','head','hr','html','i','img','li','meta','link','ol','s','small',\
                               'span','strike','strong','sub','sup','title','u','ul','var',\
                               'mbp:pagebreak','mbp:nu','mbp:section','table','tbody','td','tfoot',\
                               'th','thead','tr','style', 'image', 'svg']    
# Kindle supported attributes
SUPPORTED_XHTML_ATTRIBUTES  = ['href','id', 'name','body','title','align','bgcolor','color','face',\
                               'size','width','class','border','height','src','alt','style','xml:lang',\
                               'xmlns','charset','type','http-equiv','content','style','rel',\
                               'preserveaspectratio','version','xmlns:xlink','viewbox','xlink:href']
                               
UNSUPPORTED_STYLE_ATTRIBUTES = ['counter-increment','counter-reset','visited','hover','active',
                                'outline', 'outline-color','outline-style','outline-width','max-width', 'max-height', 'min-width', 'min-height']
                                           
# Kindle supported image types                   
SUPPORTED_COVER_IMAGE_TYPES = ['jpeg', 'jpg', 'tiff'] 
SUPPORTED_IMAGE_TYPES       = ['jpeg','jpg','gif','bmp','png']                  
                   
# validation error/warning messages  
UNSUPPORTED_TAG             = 'tag is not supported by Kindle.'
UNSUPPORTED_ATTR            = 'attribute is not supported by Kindle.'
HEADING_STYLES_NOT_FOUND    = '[GENERAL WARNING]: Both h1 and h2 heading styles are not used ' + \
                              'in the epub. Heading styles should always be declared in the css and ' + \
                              'used for all main headings and chapter headings.'
COVER_FILE_NOT_ALLOWED      = '[WARNING]: According to the Kindle Guidelines a cover ' + \
                              'page is not required when you upload an epub to KDP. The cover page should be removed.'
ILLEGAL_COVER_IMAGE_TYPE    = '[ERROR]: Incorrect cover image format. Only JPEG or TIFF' + \
                              'formats are allowed.'
ILLEGAL_EBOOK_IMAGE_TYPE    = '[ERROR]: Incorrect ebook image format. Only GIF, BMP, JPEG and PNG ' + \
                              'formats are allowed.'
ILLEGAL_SVG_IMAGE           = '[WARNING]: SVG images are not fully supported. SVG images ' + \
                              'will display on KF8 devices but will not display on older KF7 devices.'
IMG_NOT_IN_DUAL_FORMAT      = '[WARNING]: Image has not been dual formatted using media queries. ' + \
                              'This could cause sub-optimal sizing problems for smaller-than-page-size image displays on both KF7 and KF8 devices.'
TOC_NOT_FOUND               = '[ERROR]: Epub TOC file not found.'
BAD_INTERNAL_LINK           = '[ERROR]: Bad internal link.'
ILLEGAL_STYLE_ATTRIBUTE     = 'attribute is either not allowed or will be ignored.'
BAD_LOGICAL_TOC             = '[WARNING]: The Logical TOC heading list(toc.ncx) does ' + \
                              'not contain the same toc items as the epub TOC heading list'
MISSING_OPF_GUIDES_REFS     = '[WARNING]: One or more guide items is missing. The opf guide should ' + \
                              'contain 2 appropriate references for the toc and start read ' + \
                              'point which are used as links in the Go To menu on the Kindle device.'
TOO_MANY_OPF_GUIDES_REFS    = '[WARNING]: Too many references in the opf guide. The opf guide should ' + \
                              'contain only 2 appropriate references for the toc and start read ' + \
                              'point which are used as links in the Go To menu on the Kindle device.'
LI_BODY_STYLE_ISSUES1       = '[GENERAL WARNING]: "body" style is not defined in the CSS. ' + \
                              '"body" style, or its class equivalent, must be defined and should contain global default values such as "text-align: justify;" and "text-indent: 0;" to help avoid common Look Inside problems.'
LI_BODY_STYLE_ISSUES2       = '[WARNING]: "body" style has missing declarations. ' + \
                              '"body" style should define global default values such as "text-align: justify;" and "text-indent: 0;" to help avoid common Look Inside problems.'
LI_P_STYLE_ISSUES           = '[WARNING]: "p" style has missing declarations. ' + \
                              '"p" style should define global default values such as "line-height: 120%", "font-size: 1em;", "margin: 0;" and "padding: 0;" etc to help avoid common formatting issues.'
UNSAFE_ABSOLUTE_VALUES      = '[WARNING]: "pt" value detected. All font-sizing and spacing values ' + \
                              'in the CSS should use relative "em" or percentage values. Use of absolute "pt" values should be avoided.'
                              

def KDPEpubValidation(bk, wdir):
    
    prefs = bk.getPrefs()
    
    h1_count = 0
    h2_count = 0      
    
    # copy all epub images to the work dir
    copyImageFiles2Dir(bk, wdir)
    
    # get the xhtml file ids
    files = []
    tids = []
    for id, fn in bk.text_iter():
        tids.append(id)
        files.append(fn)   
    
    # check for illegal style attributes in html <styles>   
    for id, _ in bk.text_iter():   
        data = bk.readfile(id)
        lines = data.splitlines(True)
        index = 0
        for line in lines:
            index += 1
            if not line.strip().startswith('<style'):
                continue
            else:    
                soup = BeautifulSoup(data, 'html.parser')
                for style in soup.find_all('style', limit=1):
                    if style.string != None:
                        sstring = style.string
                        for usa in UNSUPPORTED_STYLE_ATTRIBUTES:
                            if usa in sstring:
                                message = '[ERROR]: "' + usa + '" <style> ' + ILLEGAL_STYLE_ATTRIBUTE
                                href = bk.id_to_href(id)                           
                                options.RESULTS.append(['error', os.path.basename(href), index, message])
    
    # check the opf guide items
    cover_fname = ''
    text_flag = False
    toc_flag = False
    cover_flag = False
    tuple_list = bk.getguide()
    for type, title, href in tuple_list:
        if type == 'cover':
            cover_flag = True
            cover_fname = os.path.basename(href).strip()
        if type == 'text':
            text_flag = True
        if type == 'toc':
            toc_flag = True        
    
    guides_ok = False
    if toc_flag and text_flag and not cover_flag: 
        guides_ok = True
        
    # check for a cover page    
    if prefs['cover_file_warning'] == True:
        if cover_flag == True:
                message = COVER_FILE_NOT_ALLOWED                      
                options.RESULTS.append(['warning', cover_fname, 0, message])
    
    # check for correct guide items            
    if guides_ok == False and len(tuple_list) < 2 and prefs['missing_guide_refs_warning'] == True:
        lin_num = getOPFLineNumber(bk,'<guide>')
        message = MISSING_OPF_GUIDES_REFS                       
        options.RESULTS.append(['warning', 'content.opf', lin_num, message])
    
    # check for too many guide items
    if len(tuple_list) > 2 and prefs['too_many_guide_refs_warning'] == True:
        lin_num = getOPFLineNumber(bk,'<guide>')
        message = TOO_MANY_OPF_GUIDES_REFS                       
        options.RESULTS.append(['warning', 'content.opf', lin_num, message])
    
    # check ebook image format   
    for id, href in bk.text_iter():
        fname = os.path.basename(href)
        if fname != cover_fname:        
            data = bk.readfile(id)
            lines = data.splitlines(True)
            for index, line in enumerate(lines):
                    soup = BeautifulSoup(line, 'html.parser')
                    for img in soup.find_all('img', limit=1):
                        if img != None:
                            href = img['src']
                            base = os.path.basename(href)
                            ext = base.split('.')[1]
                            if ext.lower() not in ' '.join(SUPPORTED_IMAGE_TYPES):
                                message = ILLEGAL_EBOOK_IMAGE_TYPE
                                options.RESULTS.append(['error', os.path.basename(fname), index+1, message])              
                            
    # check xhtml files    
    for id, href in bk.text_iter():
        file = os.path.basename(href)
        data = bk.readfile(id)
        lines = data.splitlines(True)
        for index, line in enumerate(lines):
        
            soup = BeautifulSoup(line, 'html.parser')
                
            # no need for cover image file    
            if line.startswith('<!--') or line.startswith('-->') or \
                file == cover_fname:
                continue

            # check for unsupported html tags
            for tag in soup.find_all(True):
                if tag.name not in ''.join(SUPPORTED_TAGS):
                    message = '[ERROR]: ' + '<' + tag.name + '> ' +  UNSUPPORTED_TAG 
                    href = bk.id_to_href(id)                      
                    options.RESULTS.append(['error', os.path.basename(href), index+1, message]) 
                
                if prefs['svg_warning'] == True:                 
                    if tag.name == 'svg':
                        message = ILLEGAL_SVG_IMAGE  
                        file = bk.id_to_href(id)                  
                        options.RESULTS.append(['error', os.path.basename(file), index+1, message])
            
            # check for unsupported attributes in xhtml files                
            for tag in soup.find_all(True):
                for attribute, _ in dict(tag.attrs).items():
                    if str(attribute) not in ' '.join(SUPPORTED_XHTML_ATTRIBUTES):
                        message = '[ERROR]: "' + str(attribute) + '" ' +  UNSUPPORTED_ATTR   
                        href = bk.id_to_href(id)                          
                        options.RESULTS.append(['error', os.path.basename(href), index+1, message])
        
            # check for illegal inline styling attributes in xhtml files     
            for tag in soup.find_all(True):
                if tag.attrs != {}:
                    for key, value in dict(tag.attrs).items():
                        for usa in UNSUPPORTED_STYLE_ATTRIBUTES:
                            if key == 'style':  
                                if usa in value:
                                    message = '[ERROR] "' + usa + '" style ' + ILLEGAL_STYLE_ATTRIBUTE
                                    href = bk.id_to_href(id)   
                                    options.RESULTS.append(['error', os.path.basename(href), index+1, message])
            
            # check that smaller ebook images are dual formatted  
            small_image = False 
            if prefs['dual_format_warning'] == True:             
                for img in soup.find_all('img', limit=1):
                    img = soup.find('img')
                    filename = img['src']
                    filename = os.path.basename(filename)
                    file_path = os.path.join(wdir, filename)
                    
                    try:
                        width, height = getImageSize(file_path)
                    except:
                        continue
                        
                    perc_width = round(width/650 * 100)       
                    perc_height = round(height/1000 * 100)    
                    
                    if perc_width >= 100:
                        smaller_image = False    
                        
                    if perc_width < 100:
                        smaller_image = True                

                    if not img.has_attr('class') and \
                        hasMediaQuery(bk) == False and \
                        smaller_image == True:
                        message = IMG_NOT_IN_DUAL_FORMAT
                        href = bk.id_to_href(id)
                        options.RESULTS.append(['warning', os.path.basename(href), index+1, message])        
            
            # check for bad internal links      
            for anchor in soup.find_all('a', limit=1):
                if anchor.has_attr('href'):
                    if 'http:' not in anchor['href'] and \
                        'https:' not in anchor['href'] and \
                        'mailto:' not in anchor['href']:
                        href = anchor['href']
                        if '../Text/' not in href:
                            message = BAD_INTERNAL_LINK 
                            href = bk.id_to_href(id)
                            options.RESULTS.append(['error', os.path.basename(href), index+1, message])                                  
            
            # check for non-use of h1 & h2 heading styles     
            if file.lower() != 'contents.xhtml' and \
                file.lower() != 'contents.html' and \
                file.lower() != 'toc.xhtml' and \
                file.lower() != 'toc.html':            
                for h1 in soup.find_all('h1'):
                    if h1:
                        h1_count += 1
            
                for h2 in soup.find_all('h2'):
                    if h2:
                        h2_count += 1
                
    # check h1 & h2 counts
    if prefs['heading_styles_not_found_warning'] == True:        
        if h1_count == 0 and h2_count == 0:
            message = HEADING_STYLES_NOT_FOUND                       
            options.RESULTS.append(['info', '', 0, message])  
    
    # check that 'em' values are used in css
    if prefs['absolute_values_warning'] == True:
        for id, _ in bk.css_iter():
            data = bk.readfile(id)
            lines = data.splitlines()
            for index, line in enumerate(lines):
                if ':' in line and not line.endswith(';'):
                    line = line + ';'
                if 'pt;' in line.strip():
                    message = UNSAFE_ABSOLUTE_VALUES                         
                    href = bk.id_to_href(id)  
                    options.RESULTS.append(['warning', os.path.basename(href), index+1, message])                                            
    # check for illegal css style attributes
    for id, _ in bk.css_iter():
        data = bk.readfile(id)
        lines = data.splitlines(True)
        for index, line in enumerate(lines):
            if ':' in line:
                attribute = line.strip().split(':')[0]
                for usa in UNSUPPORTED_STYLE_ATTRIBUTES:
                    if attribute == usa:
                        message = '[ERROR] "' + attribute + '" style ' + ILLEGAL_STYLE_ATTRIBUTE
                        href = bk.id_to_href(id)   
                        options.RESULTS.append(['error', os.path.basename(href), index+1, message])
    
    # check that css has the appropriate "p" style declarations
    p_flag = False
    pid = ''
    pcount = 0
    for id, href in bk.css_iter():
        data = bk.readfile(id)
        lines = data.splitlines()
        for index, line in enumerate(lines):
            if line.lstrip().startswith('p ') and '{' in line.strip() and 'p.' not in line:
                p_flag = True
                pid = id              
                for line in lines[index:]:
                    if '}' not in line:
                        if 'line-height' in line or 'font-size' in line:
                            pcount += 1
                    else:
                        break                    
                break                        
        break
    
    # missing "p" style declarations 
    if prefs['missing_p_style_declarations_warning'] == True:    
        if p_flag == True and pcount < 2:
            message = LI_P_STYLE_ISSUES
            href = bk.id_to_href(pid)
            options.RESULTS.append(['warning', os.path.basename(href), index+1, message])  
    
    # check that css has "body" style 
    body_flag = False
    bid = ''
    bcount = 0
    for id, href in bk.css_iter():
        data = bk.readfile(id)
        lines = data.splitlines()
        for index, line in enumerate(lines):
            if 'body' in line and '{' in line.strip():
                body_flag = True
                bid = id              
                for line in lines[index:]:
                    if '}' not in line:
                        if 'text-align' in line or 'text-indent' in line:
                            bcount += 1
                    else:
                        break                    
                break                        
        break
    
    # check that body style is in css
    if prefs['missing_body_style_warning'] == True:
        if body_flag == False:
            message = LI_BODY_STYLE_ISSUES1
            options.RESULTS.append(['info', '', 0, message])  

    # missing body declarations  
    if prefs['missing_body_declarations_warning'] == True:    
        if body_flag == True and bcount < 2:
            message = LI_BODY_STYLE_ISSUES2
            href = bk.id_to_href(bid)
            options.RESULTS.append(['warning', os.path.basename(href), index+1, message])  
    
    # check for missing toc file
    toc_loc = ''
    toc_id = ''
    toc_found = False    
    for id in tids:
        if os.path.basename(bk.id_to_href(id)).lower() == 'contents.xhtml' or \
            os.path.basename(bk.id_to_href(id)).lower() == 'contents.html' or \
            os.path.basename(bk.id_to_href(id)).lower() == 'toc.xhtml' or \
            os.path.basename(bk.id_to_href(id)).lower() == 'toc.html':
                toc_id = id
                toc_loc = bk.id_to_href(id)
                toc_found = True
                break
        else:
            for id, file in bk.text_iter():
                data = bk.readfile(id)
                lines = data.splitlines()
                for line in lines:
                    if 'Table of Contents' in line or \
                       'Contents' in line:
                        toc_id = id
                        toc_loc = bk.id_to_href(id)
                        toc_found = True
                        break
                    
    if toc_found == False:   
        message = TOC_NOT_FOUND                     
        options.RESULTS.append(['error', '', 0, message])         
        
    # check that the logical toc list has the same toc items as the epub toc list
    epub_toc_list = []
    ncx_toc_list = [] 
    if prefs['logical_toc_warning'] == True:
        if toc_id != '': 
            toc = bk.readfile(toc_id)
            soup = BeautifulSoup(toc, 'html.parser')
            for anchor in soup.find_all('a'):
                if anchor.has_attr('href') and anchor.string and \
                   'http' not in anchor['href'] and \
                   'https:' not in anchor['href'] and \
                   'mailto' not in anchor['href']:
                    toc_item = anchor.string
                    epub_toc_list.append(toc_item.lower())
           
            ncx_id = bk.gettocid()
            ncx = bk.readfile(ncx_id)
            soup = BeautifulSoup(ncx, 'xml')
            for navPoint in soup.find_all('navPoint'):
                text = navPoint.find('text')
                logical_toc_item = text.string
                ncx_toc_list.append(logical_toc_item.lower())    
             
            for epub_toc_item in epub_toc_list:
                if epub_toc_item not in ' '.join(ncx_toc_list):          
                    message = BAD_LOGICAL_TOC + '(' + os.path.basename(toc_loc) + ').'
                    options.RESULTS.append(['warning', 'toc.ncx', 0, message])   
    
    return(0)
    
def show_msgbox(title, msg, msgtype='info'):
    """ For general information, warnings and errors
    """
    localRoot = tk.Tk()
    localRoot.withdraw()
    localRoot.option_add('*font', 'Helvetica -12')
    localRoot.quit()
    if msgtype == 'info':
        return(mbox.showinfo(title, msg))
    elif msgtype == 'warning':
        return(mbox.showwarning(title, msg))
    elif msgtype == 'error':
        return(mbox.showerror(title, msg))
  
def copyCSS2Dir(bk, wdir):
    
    css_files = []
    for id, css in bk.css_iter():
        output = os.path.join(wdir, os.path.basename(css))
        with open(output, 'wt', encoding='utf-8') as outfp:
            data = bk.readfile(id) 
            data.replace('\n\n', '\n')            
            outfp.writelines(data)
            css_files.append(css)
                    
            
    return(css_files)        
        
def copyTextFiles2Dir(bk, wdir):

    t_ids = list()
    t_hrefs = list()
    t_fnames = list()
    
    for (id, href) in bk.text_iter():
        t_ids.append(id)
        t_hrefs.append(href)
        t_fnames.append(os.path.basename(href))
    
    # copy all xhtml files to the working dir    
    file = str()
    t_fnames_r = list()
    t_ids_r = list()
    i = 0      
    for id in t_ids:
        file = os.path.join(wdir, t_fnames[i])
        with open(file, 'wt', encoding='utf-8') as outfp:
            data = bk.readfile(id)
            html = BeautifulSoup(data, 'html.parser')
            t_fnames_r.append(t_fnames[i])
            t_ids_r.append(id)
            outfp.writelines(str(html))
            i = i + 1
    
    return(t_fnames_r)             
                        
def getLineNumber(file, text):                        

    html = open(file, 'rt', encoding='utf-8').read()     
    lines = html.splitlines()
    linenum = int()
    
    # assign a line no to the line
    for index, line in enumerate(lines):
        if text in line:
            linenum = index+1
    return(linenum)   
    
def getOPFLineNumber(bk, text):                        

    opf = bk.get_opf()     
    lines = opf.splitlines()
    linenum = int()
    
    # assign a line no to the line
    for index, line in enumerate(lines):
        if text in line:
            linenum = index+1
    return(linenum)       
    
def cleanExit(wdir):
    shutil.rmtree(wdir, ignore_errors=True)
    return(0)        
    
def hasMediaQuery(bk):
    
    cid = []
    for id, _ in bk.css_iter():
        cid.append(id)

    for ref in cid:        
        data = bk.readfile(ref)
        lines = data.splitlines()
        for line in lines:
            if '@media' in line and 'amzn-kf8' in line:    
                return(True)
                
    return(False)                        
    
def getImageSize(image):
    """ Uses PIL to get image dimensions 
    """
    image = image.replace('\n', '')
    im = Image.open(image)
    ht = im.size[1]        
    wd = im.size[0]        
    return(wd, ht)    
    
def copyImageFiles2Dir(bk, wdir):

    i_ids = list()
    i_hrefs = list()
    i_fnames = list()
    
    for (i, h, m) in bk.image_iter():
        i_ids.append(i)
        i_hrefs.append(h)
        i_fnames.append(os.path.basename(h))
        
    j = 0    
    for iid in i_ids:
        file = os.path.join(wdir, i_fnames[j])
        with open(file, 'wb') as outfp: 
            data = bk.readfile(iid)
            outfp.write(data)                
            j = j + 1     
            
    return(0) 

def getImageSize(image):
    """ Uses PIL to get image dimensions 
    """
    image = image.replace('\n', '')
    im = Image.open(image)
    ht = im.size[1]        
    wd = im.size[0]        
    return(wd, ht)
                
      
