import os
import sys
import regex as re
from contextlib import contextmanager
import pytesseract
from pdf2image import convert_from_path
from PyQt6.QtWidgets import QDialog, QFileDialog, QApplication
from PyQt6 import uic
import sigil_bs4 as bs
from ebooklib import epub

_plat = sys.platform.lower()
iswindows = 'win32' in _plat or 'win64' in _plat


@contextmanager
def make_temp_directory():
    import tempfile
    import shutil
    temp_dir = tempfile.mkdtemp()
    yield temp_dir
    shutil.rmtree(temp_dir)
    
class EnhancedPDFConverter(QDialog):
    def __init__(self, bk, parent=None):
        super().__init__(parent)
        self.bk = bk
        self.plugin_dir = os.path.join(bk._w.plugin_dir, bk._w.plugin_name)
        self._ok_to_close = False
        self.prefs = bk.getPrefs()
        # set default preference values
        if 'win_poppler_path' not in self.prefs:
            self.prefs['win_poppler_path'] = 'C:\\poppler\\Library\\bin'
        if 'win_tesseract_path' not in self.prefs:
            self.prefs['win_tesseract_path'] = 'C:\\Program Files\\Tesseract-OCR\\tesseract.exe'
        self.load_ui()
        self.btnSelectPDF.clicked.connect(self.select_pdf)
        self.btnConvert.clicked.connect(self.start_conversion)
        
    def load_ui(self):
        ui_path = os.path.join(self.plugin_dir, 'dialog.ui')
        self.ui = uic.loadUi(ui_path, self)
        #self.btnSelectPDF = self.ui.findChild(QDialog, 'btnSelectPDF')
        #self.btnConvert = self.ui.findChild(QDialog, 'btnConvert')
    
    def select_pdf(self):
        file_path, _ = QFileDialog.getOpenFileName(self, "PDF Seç", "", "PDF Dosyaları (*.pdf)")
        if file_path:
            self.pdf_path = file_path
    
    def start_conversion(self):
        if not hasattr(self, 'pdf_path'):
            return
        with make_temp_directory() as pth:
            if iswindows:
                images = convert_from_path(self.pdf_path, dpi=300, poppler_path=self.prefs['win_poppler_path'], output_folder=pth)
            else:
                images = convert_from_path(self.pdf_path, dpi=300, output_folder=pth)
            all_text = ""
        
            for img in images:
                try:
                    if iswindows:
                        pytesseract.pytesseract.tesseract_cmd = self.prefs['win_tesseract_path']
                    text = pytesseract.image_to_string(img, lang='tur+eng')
                except:
                    print("Tesseract problem")
                    pass
                processed_text = self.process_content(text)
                all_text += processed_text
            
            self.create_epub_structure(all_text)
            self.close()
    
    def process_content(self, text):
        text = re.sub(r'(Sayfa\s+\d+|-\s+\d+\s+-|©.*)', '', text, flags=re.MULTILINE)
        text = re.sub(r'\n(\d+\.\d+\.?[\s\p{Lu}]+)', r'\n<h2>\1</h2>', text, flags=re.UNICODE)
        text = re.sub(r'\*\*(.+?)\*\*', r'<b>\1</b>', text)
        text = re.sub(r'_([^_]+)_', r'<i>\1</i>', text)
        return text
    
    def create_epub_structure(self, cleaned_text):
        book = epub.EpubBook()
        book.set_title("Enhanced PDF Conversion")
        book.set_identifier("id123456")
        book.set_language('en')
        book.add_author('Me')

        content = f"""<html>
<head><title>Kitap</title></head>
<body>
<div>
{cleaned_text}
</div>
</body>
</html>"""
        
        chapter = epub.EpubHtml(title='Kitap', file_name='text.xhtml', lang="en")
        chapter.content = content
        book.add_item(chapter)
        
        book.spine = ['nav', chapter]
        book.add_item(epub.EpubNcx())
        book.add_item(epub.EpubNav())
        
        with make_temp_directory() as temp_dir:
            epub_path = os.path.join(temp_dir, 'output.epub')
            epub.write_epub(epub_path, book, {})
            with open(epub_path,'rb')as fp:
                data = fp.read()
            self.bk.addotherfile('dummy.epub', data)
        return
    
    def closeEvent(self, event):
        self.bk.savePrefs(self.prefs)
        event.accept()  # let the window close

def run(bk):
    app = QApplication([])
    dialog = EnhancedPDFConverter(bk)
    dialog.exec()
    return 0
