import os
import re
import pytesseract
from pdf2image import convert_from_path
from PyQt6.QtWidgets import QDialog, QFileDialog, QApplication
from PyQt6 import uic
from bs4 import BeautifulSoup as bs
from ebooklib import epub

class EnhancedPDFConverter(QDialog):
    def __init__(self, parent=None):
        super().__init__(parent)
        self.load_ui()
        self.btnSelectPDF.clicked.connect(self.select_pdf)
        self.btnConvert.clicked.connect(self.start_conversion)
        
    def load_ui(self):
        ui_path = os.path.join(os.path.dirname(__file__), 'dialog.ui')
        self.ui = uic.loadUi(ui_path, self)
        self.btnSelectPDF = self.ui.findChild(QDialog, 'btnSelectPDF')
        self.btnConvert = self.ui.findChild(QDialog, 'btnConvert')
    
    def select_pdf(self):
        file_path, _ = QFileDialog.getOpenFileName(self, "PDF Seç", "", "PDF Dosyaları (*.pdf)")
        if file_path:
            self.pdf_path = file_path
    
    def start_conversion(self):
        if not hasattr(self, 'pdf_path'):
            return
        
        images = convert_from_path(self.pdf_path, dpi=300)
        all_text = ""
        
        for img in images:
            text = pytesseract.image_to_string(img, lang='tur+eng')
            processed_text = self.process_content(text)
            all_text += processed_text
        
        self.create_epub_structure(all_text)
    
    def process_content(self, text):
        text = re.sub(r'(Sayfa\s+\d+|-\s+\d+\s+-|©.*)', '', text, flags=re.MULTILINE)
        text = re.sub(r'\n(\d+\.\d+\.?[\s\p{Lu}]+)', r'\n<h2>\1</h2>', text, flags=re.UNICODE)
        text = re.sub(r'\*\*(.+?)\*\*', r'<b>\1</b>', text)
        text = re.sub(r'_([^_]+)_', r'<i>\1</i>', text)
        return text
    
    def create_epub_structure(self, cleaned_text):
        book = epub.EpubBook()
        book.set_title("Enhanced PDF Conversion")
        book.set_language('en')
        
        content = f"""<?xml version='1.0' encoding='utf-8'?>
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml">
<head><title>Kitap</title></head>
<body>
{cleaned_text}
</body>
</html>"""
        
        chapter = epub.EpubHtml(title='Kitap', file_name='text.xhtml', content=content)
        book.add_item(chapter)
        
        book.spine = ['nav', chapter]
        book.add_item(epub.EpubNcx())
        book.add_item(epub.EpubNav())
        
        epub.write_epub('output.epub', book, {})

def run(bk):
    app = QApplication([])
    dialog = EnhancedPDFConverter()
    dialog.exec()
    return 0
