
#!/Python3/python
# -*- coding: utf-8 -*-

from __future__ import unicode_literals, division, absolute_import, print_function

#********************************************************************************#
#                                                                                #
# MIT Licence(OSI)                                                               #
# Copyright (c) 2017 Bill Thompson                                               #
#                                                                                #
# Permission is hereby granted, free of charge, to any person obtaining a copy   # 
# of this software and associated documentation files (the "Software"), to deal  # 
# in the Software without restriction, including without limitation the rights   #
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell      #
# copies of the Software, and to permit persons to whom the Software is          #
# furnished to do so, subject to the following conditions:                       # 
#                                                                                #
# The above copyright notice and this permission notice shall be included in all #
# copies or substantial portions of the Software.                                #
#                                                                                # 
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR     # 
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,       #
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE    #
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER         # 
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,  # 
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE  # 
# SOFTWARE.                                                                      #
#                                                                                #  
#********************************************************************************#
import os, os.path, sys, codecs, shutil, inspect, time, html
import options
#from tempfile import mkdtemp                 
from updater import updateCheck
import tkinter as tk
import tkinter.messagebox as mbox

try:
    from sigil_bs4 import BeautifulSoup
except:
    from bs4 import BeautifulSoup
    
iswindows = sys.platform.startswith('win')
isosx = sys.platform.startswith('darwin')
islinux = sys.platform.startswith('linux')    

SITE_URL = "https://www.mobileread.com/forums/showpost.php?p=4295985&postcount=1"
PLUGIN_PATH = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
options.PLUGIN_PATH = PLUGIN_PATH

cover_search = ['cover.xhtml',
                'cover.html',
                'coverpage.xhtml',
                'coverpage.html',
                'titlepage.xhtml',
                'titlepage.html'
               ]
              
    
def removeAllTags(bk, epub_version):

    print('In RemoveAllTags()...\n')

    if epub_version == 2:
        top =  '<?xml version="1.0" encoding="utf-8"?>\n' 
        top += '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"\n\n  "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">\n\n'
        top += '<html xmlns="http://www.w3.org/1999/xhtml">\n'
    elif epub_version == 3: 
        top = '<?xml version="1.0" encoding="utf-8"?>\n'
        top += '<!DOCTYPE html>\n\n'
        top += '<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops">\n'
    else:
        msg = 'Error: Unknown file format(not epub format)!' 
        show_msgbox('Error:', msg, msgtype='error')    

    top += "<head>\n"
    top += '  <title></title>\n'
    top += '</head>\n'
    top += '<body>'

    tail = '</body>\n'
    tail += '</html>\n'

    partition = "#-----------------------New Section------------------------#"    
    
    prefs = bk.getPrefs()
   
    #remove the last saved plain text file
    outpath = prefs['save_file_path']
    if os.path.exists(outpath):
        os.remove(outpath)
   
    #get a list of title tag strings for later use
    saved_titles = getTitleStringList(bk)
    
    #remove all html tags from the <body> text only
    for (id, href) in bk.text_iter():  
        
        #ignore the cover file
        fname = bk.href_to_basename(href)
        if fname.strip().lower() in " ".join(cover_search) or \
            'cover' in id.lower():
            continue            
    
        data = bk.readfile(id) 
        soup = BeautifulSoup(data, 'html.parser')
        
        #remove all inner spans first from the html      ######## Improves layout  
        for sp in soup.body.find_all(['span','sub','sup']):
            sp.attrs = {}
            sp.unwrap()
            
        #remove all inner spans first from the html      ######## Improves layout    
        for sp in soup.body.find_all(['pre','code','blockquote']):
            sp.attrs = {}
            sp.unwrap()            
        
        text_only = soup.body.get_text()
        text_only = '\n' + text_only.strip() + '\n'
        
        #add top and tail html to avoid errors in Sigil 
        out_string = top + str(text_only) + tail
        bk.writefile(id, out_string) 
 
        #save all sections to an plain text output file    
        if prefs['save_plain_text_to_file'] == True:    
            lines = text_only.splitlines(True)
            outfile = prefs['save_file_path']
            with open(outfile, 'at', encoding='utf-8') as outfp:
                outfp.write(partition + '\n\n')
                for line in lines:
                    if line.strip() == '':
                        continue
                    outfp.write(line.strip() + '\n\n')
    
    #if the relevant flag is set then remove 
    #files from Styles, Images and Fonts dirs 
    if prefs['remove_unused_files'] == True: 
    
        #remove unwanted epub CSS files        
        for (id, href) in bk.css_iter():
            bk.deletefile(id)
            
        #remove unwanted Image files
        for (id, href, mime) in bk.image_iter():
            bk.deletefile(id)  

        #remove unwanted Font files
        for (id, href, mime) in bk.font_iter():
            bk.deletefile(id)
    
    outs = []
    outlst = [] 
    idx = 0
    #reformat epub html either as plain text or as basic html
    for id, href in bk.text_iter():
        
        #ignore the cover file -- no text
        fname = bk.href_to_basename(href)
        if fname.strip().lower() in " ".join(cover_search) or \
            'cover' in id.lower():
            continue     
 
        outlst = []    
        data = bk.readfile(id)
        lines = data.splitlines(True)   
        for line in lines:
            #format the body text to either plain text or basic html
            if '<' not in line and not line.strip().startswith('"http://'):
                if line.strip() != '':                  
                    if prefs['convert_to_plain_text'] == True:             #reformat as plain text        
                        outlst.append(line.strip() + '\n\n')
                    else:                    
                        line = '<p>' + line.strip() + '</p>'               #reformat as basic html using <p> tags only 
                        outlst.append(line + '\n\n')
            
            #format various lines in the xmlns headers            
            elif '<' in line and not line.strip().startswith('"http://'):
                if line.strip().startswith('<html xmlns'):      
                    line = '\n' + line.strip() + '\n'
                if line.strip().startswith('<body' + '\n'):                #format the <body> tag
                    line = '\n' + line.strip()  
                outlst.append(line)
            
            #format the xmlns line starting with '"http:'        
            elif line.strip().startswith('"http://'):                      #format the '"http://' string in the xmlns
                outlst.append(line.strip() + '\n')    
            
        outs = " ".join(outlst) 
        bk.writefile(id, outs)         
   
    print('Exiting RemoveAllTags()...\n') 
    addTitleTagString(bk, saved_titles)   
    return(0)    


def getTitleStringList(bk):
    
    s_titles = []
    #create title list for later use
    for id, href in bk.text_iter():
    
        #ignore the cover file -- no text
        fname = bk.href_to_basename(href)
        if fname.strip().lower() in " ".join(cover_search) or \
            'cover' in id.lower():
            continue     
    
        data = bk.readfile(id)
        soup = BeautifulSoup(data, 'html.parser')
        for title in soup.find_all('title', limit=1):
            if title.get_text() != None or title.get_text != '':
                s_titles.append(title.get_text())
            else: 
                s_titles.append('Unknown')          
                
    return(s_titles)   
    

def addTitleTagString(bk, s_titles):
    
    print('In addTitleString()...\n')
    
    idx = 0
    prefs = bk.getPrefs()
    #add the relevant title text between the <title></title> tags
    for id, href in bk.text_iter():
    
        #ignore the cover if present
        fname = bk.href_to_basename(href)
        if fname.strip().lower() in " ".join(cover_search) or \
            'cover' in id.lower():
            continue  
 
        #add text to each title tag
        data = bk.readfile(id)
        soup = BeautifulSoup(data, 'html.parser')
        for title in soup.find_all('title', limit=1):
            if s_titles[idx] != None and s_titles[idx] != '':
                title.string = s_titles[idx]          #add new title string
            else:
                title.string = 'Unknown'              #no title text -- add 'Unknown' 
            idx = idx + 1
        
        liners = []         
        linstr = ''
        #reformat the basic html layout display
        if prefs['convert_to_plain_text'] == False:
            body_text = soup.body.get_text()
            lines = body_text.splitlines(True)
            
            for line in lines:
                if line.strip() == '':
                    continue
                    
                line = '\n<p>' + line.strip() + '</p>\n' 
                liners.append(line)
                
            linstr = " ".join(liners)
            soup.body.string = linstr        
            
        bk.writefile(id, html.unescape(str(soup))) 
        
    print('Exiting addTitleTagString()...\n') 
    return(0)
    

def show_msgbox(title, msg, msgtype='info'):
    """ For general information, warnings and errors
    """
    localRoot = tk.Tk()
    localRoot.withdraw()
    localRoot.option_add('*font', 'Helvetica -12')
    localRoot.quit()
    if msgtype == 'info':
        return(mbox.showinfo(title, msg))
    elif msgtype == 'warning':
        return(mbox.showwarning(title, msg))
    elif msgtype == 'error':
        return(mbox.showerror(title, msg))          
  
  
def is_connected():
    try:
        sock = socket.create_connection(('8.8.8.8', 53), 1)
        sock.close()
        return True
    except:
        pass

    return False            
    
  
def run(bk):
    print('Python version: ', sys.version, '\n')
    print('Running VerifyOPFData plugin...')
    error_list = []
    
    if is_connected: 
        #check for new plugin versions
        latest_version, installed_version = updateCheck(SITE_URL, PLUGIN_PATH)
        if latest_version and latest_version != installed_version:
            options.NEW_PLUGIN_VERSION = True
            options.MSG_NEW_VERSION_AVAILABLE = "A new plugin version is now available from MR - v" + latest_version 
            
    epub_version = 0 
    # protect against epub3 input
    epubversion = "2.0"
    if bk.launcher_version() >= 20160102:
        epubversion = bk.epub_version()
    if epubversion.startswith("3"):
        epub_version = 3
    else: 
        epub_version = 2    
    
    prefs = bk.getPrefs()
    if 'convert_to_plain_text' not in prefs:
        prefs['convert_to_plain_text'] = True         
    if 'save_plain_text_to_file' not in prefs:
        prefs['save_plain_text_to_file'] = True            
    if 'save_file_path' not in prefs:
        prefs['save_file_path'] = os.path.join(os.path.expanduser("~/Desktop"), 'textfile.txt')        
    if 'remove_unused_files' not in prefs:
        prefs['remove_unused_files'] = False           
    bk.savePrefs(prefs)
       
    # check and process the OPF file
    removeAllTags(bk, epub_version)
    
    #notify user if plain text file has been saved
    if prefs['save_plain_text_to_file'] == True:    
        msg = 'A plain text file has been saved to:\n\n' + prefs['save_file_path']
        show_msgbox('Information', msg, msgtype='info')      

    # inform user if new plugin version is available
    if options.NEW_PLUGIN_VERSION == True:
        msg = options.MSG_NEW_VERSION_AVAILABLE
        show_msgbox('CheckInternalLinks', msg, msgtype='info') 
    
    print('\n-- Completed SUCCESSFULLY...')
    return(0)                
    
def main():
    print('I reached main when I should not have\n')
    return(-1)

if __name__ == "__main__":
    sys.exit(main())                         