#!/usr/bin/env python
from lxml import etree
import os
import re
import traceback
try:
    from calibre_plugins.opf_helper import DEBUG_OPF_HELPER
    def debug_print(*args, **kwargs):
        if DEBUG_OPF_HELPER:
            from calibre.utils.logging import default_log
            default_log(*args, **kwargs)
except Exception:
    def debug_print(*args, **kwargs):
        pass
from calibre.constants import config_dir
from calibre.utils.config import plugin_dir
import zipfile

# Explicitly export the functions we want to be public
__all__ = ['verify_schemas', 'install_schemas', 'load_schema', 'basic_opf_validation', 'SchemaResolver', 'get_schema_parser']

# Schema files with .txt extension and proper plugin-specific path
PLUGIN_NAME = 'OPF_Helper'
SCHEMA_DIR = os.path.join(config_dir, 'plugins', PLUGIN_NAME, 'schemas')

# Add version mapping dictionary
VERSION_MAP = {
    '2.0': 'opf20.txt',
    '2.0.1': 'opf20.txt',
    '3.0': 'opf30.txt',
    '3.0.1': 'opf30.txt',
    # Add more flexible version mappings
    '2': 'opf20.txt',
    '3': 'opf30.txt'
}

# Define known namespaces
OPF_NS = "http://www.idpf.org/2007/opf"
DC_NS = "http://purl.org/dc/elements/1.1/"
XML_NS = "http://www.w3.org/XML/1998/namespace"

# Schema files dictionary
SCHEMA_FILES = {
    'xml.txt': '''<?xml version="1.0"?>
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
           xmlns:xml="http://www.w3.org/XML/1998/namespace"
           targetNamespace="http://www.w3.org/XML/1998/namespace"
           elementFormDefault="qualified">
    <xs:attribute name="lang" type="xs:language"/>
    <xs:attribute name="base" type="xs:anyURI"/>
    <xs:attribute name="space">
        <xs:simpleType>
            <xs:restriction base="xs:string">
                <xs:enumeration value="default"/>
                <xs:enumeration value="preserve"/>
            </xs:restriction>
        </xs:simpleType>
    </xs:attribute>
</xs:schema>''',

    'dc.txt': '''<?xml version="1.0" encoding="UTF-8"?>
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
           xmlns:dc="http://purl.org/dc/elements/1.1/"
           xmlns:xml="http://www.w3.org/XML/1998/namespace"
           targetNamespace="http://purl.org/dc/elements/1.1/"
           elementFormDefault="qualified">

    <xs:import namespace="http://www.w3.org/XML/1998/namespace"
               schemaLocation="xml.txt"/>

    <xs:annotation>
        <xs:documentation>Schema for Dublin Core metadata elements</xs:documentation>
    </xs:annotation>

    <xs:element name="title">
        <xs:complexType>
            <xs:simpleContent>
                <xs:extension base="xs:string">
                    <xs:attribute ref="xml:lang"/>
                    <xs:attribute name="id" type="xs:ID"/>
                    <xs:attribute name="scheme" type="xs:string"/>
                </xs:extension>
            </xs:simpleContent>
        </xs:complexType>
    </xs:element>

    <xs:element name="creator">
        <xs:complexType>
            <xs:simpleContent>
                <xs:extension base="xs:string">
                    <xs:attribute ref="xml:lang"/>
                    <xs:attribute name="id" type="xs:ID"/>
                    <xs:attribute name="scheme" type="xs:string"/>
                </xs:extension>
            </xs:simpleContent>
        </xs:complexType>
    </xs:element>

    <xs:element name="subject">
        <xs:complexType>
            <xs:simpleContent>
                <xs:extension base="xs:string">
                    <xs:attribute ref="xml:lang"/>
                    <xs:attribute name="id" type="xs:ID"/>
                    <xs:attribute name="scheme" type="xs:string"/>
                </xs:extension>
            </xs:simpleContent>
        </xs:complexType>
    </xs:element>

    <xs:element name="description">
        <xs:complexType>
            <xs:simpleContent>
                <xs:extension base="xs:string">
                    <xs:attribute ref="xml:lang"/>
                    <xs:attribute name="id" type="xs:ID"/>
                    <xs:attribute name="scheme" type="xs:string"/>
                </xs:extension>
            </xs:simpleContent>
        </xs:complexType>
    </xs:element>

    <xs:element name="publisher">
        <xs:complexType>
            <xs:simpleContent>
                <xs:extension base="xs:string">
                    <xs:attribute ref="xml:lang"/>
                    <xs:attribute name="id" type="xs:ID"/>
                    <xs:attribute name="scheme" type="xs:string"/>
                </xs:extension>
            </xs:simpleContent>
        </xs:complexType>
    </xs:element>

    <xs:element name="contributor">
        <xs:complexType>
            <xs:simpleContent>
                <xs:extension base="xs:string">
                    <xs:attribute ref="xml:lang"/>
                    <xs:attribute name="id" type="xs:ID"/>
                    <xs:attribute name="scheme" type="xs:string"/>
                </xs:extension>
            </xs:simpleContent>
        </xs:complexType>
    </xs:element>

    <xs:element name="date">
        <xs:complexType>
            <xs:simpleContent>
                <xs:extension base="xs:string">
                    <xs:attribute ref="xml:lang"/>
                    <xs:attribute name="id" type="xs:ID"/>
                    <xs:attribute name="scheme" type="xs:string"/>
                </xs:extension>
            </xs:simpleContent>
        </xs:complexType>
    </xs:element>

    <xs:element name="type">
        <xs:complexType>
            <xs:simpleContent>
                <xs:extension base="xs:string">
                    <xs:attribute ref="xml:lang"/>
                    <xs:attribute name="id" type="xs:ID"/>
                    <xs:attribute name="scheme" type="xs:string"/>
                </xs:extension>
            </xs:simpleContent>
        </xs:complexType>
    </xs:element>

    <xs:element name="format">
        <xs:complexType>
            <xs:simpleContent>
                <xs:extension base="xs:string">
                    <xs:attribute ref="xml:lang"/>
                    <xs:attribute name="id" type="xs:ID"/>
                    <xs:attribute name="scheme" type="xs:string"/>
                </xs:extension>
            </xs:simpleContent>
        </xs:complexType>
    </xs:element>

    <xs:element name="identifier">
        <xs:complexType>
            <xs:simpleContent>
                <xs:extension base="xs:string">
                    <xs:attribute ref="xml:lang"/>
                    <xs:attribute name="id" type="xs:ID"/>
                    <xs:attribute name="scheme" type="xs:string"/>
                </xs:extension>
            </xs:simpleContent>
        </xs:complexType>
    </xs:element>

    <xs:element name="source">
        <xs:complexType>
            <xs:simpleContent>
                <xs:extension base="xs:string">
                    <xs:attribute ref="xml:lang"/>
                    <xs:attribute name="id" type="xs:ID"/>
                    <xs:attribute name="scheme" type="xs:string"/>
                </xs:extension>
            </xs:simpleContent>
        </xs:complexType>
    </xs:element>

    <xs:element name="language">
        <xs:complexType>
            <xs:simpleContent>
                <xs:extension base="xs:string">
                    <xs:attribute ref="xml:lang"/>
                    <xs:attribute name="id" type="xs:ID"/>
                    <xs:attribute name="scheme" type="xs:string"/>
                </xs:extension>
            </xs:simpleContent>
        </xs:complexType>
    </xs:element>

    <xs:element name="relation">
        <xs:complexType>
            <xs:simpleContent>
                <xs:extension base="xs:string">
                    <xs:attribute ref="xml:lang"/>
                    <xs:attribute name="id" type="xs:ID"/>
                    <xs:attribute name="scheme" type="xs:string"/>
                </xs:extension>
            </xs:simpleContent>
        </xs:complexType>
    </xs:element>

    <xs:element name="coverage">
        <xs:complexType>
            <xs:simpleContent>
                <xs:extension base="xs:string">
                    <xs:attribute ref="xml:lang"/>
                    <xs:attribute name="id" type="xs:ID"/>
                    <xs:attribute name="scheme" type="xs:string"/>
                </xs:extension>
            </xs:simpleContent>
        </xs:complexType>
    </xs:element>

    <xs:element name="rights">
        <xs:complexType>
            <xs:simpleContent>
                <xs:extension base="xs:string">
                    <xs:attribute ref="xml:lang"/>
                    <xs:attribute name="id" type="xs:ID"/>
                    <xs:attribute name="scheme" type="xs:string"/>
                </xs:extension>
            </xs:simpleContent>
        </xs:complexType>
    </xs:element>

</xs:schema>''',

    'opf20.txt': '''<?xml version="1.0" encoding="UTF-8"?>
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
           xmlns:opf="http://www.idpf.org/2007/opf"
           xmlns:dc="http://purl.org/dc/elements/1.1/"
           xmlns:xml="http://www.w3.org/XML/1998/namespace"
           targetNamespace="http://www.idpf.org/2007/opf"
           elementFormDefault="qualified">

    <xs:import namespace="http://www.w3.org/XML/1998/namespace" schemaLocation="xml.txt"/>
    <xs:import namespace="http://purl.org/dc/elements/1.1/" schemaLocation="dc.txt"/>

    <!-- Define common types -->
    <xs:simpleType name="yes-or-no">
        <xs:restriction base="xs:string">
            <xs:enumeration value="yes"/>
            <xs:enumeration value="no"/>
        </xs:restriction>
    </xs:simpleType>

    <!-- All elements should be in opf namespace -->
    <xs:element name="package">
        <xs:complexType>
            <xs:sequence>
                <xs:element ref="opf:metadata"/>
                <xs:element ref="opf:manifest"/>
                <xs:element ref="opf:spine"/>
                <xs:element ref="opf:guide" minOccurs="0"/>
                <xs:element ref="opf:tours" minOccurs="0"/>
            </xs:sequence>
            <xs:attribute name="version" type="xs:string" fixed="2.0" use="required"/>
            <xs:attribute name="unique-identifier" type="xs:string" use="required"/>
            <xs:attribute ref="xml:lang"/>
        </xs:complexType>
    </xs:element>

    <!-- Metadata -->
    <xs:element name="metadata">
        <xs:complexType>
            <xs:choice minOccurs="0" maxOccurs="unbounded">
                <xs:element ref="dc:title"/>
                <xs:element ref="dc:creator"/>
                <xs:element ref="dc:subject"/>
                <xs:element ref="dc:description"/>
                <xs:element ref="dc:publisher"/>
                <xs:element ref="dc:contributor"/>
                <xs:element ref="dc:date"/>
                <xs:element ref="dc:type"/>
                <xs:element ref="dc:format"/>
                <xs:element ref="dc:identifier"/>
                <xs:element ref="dc:source"/>
                <xs:element ref="dc:language"/>
                <xs:element ref="dc:relation"/>
                <xs:element ref="dc:coverage"/>
                <xs:element ref="dc:rights"/>
                <xs:element ref="opf:meta"/>
            </xs:choice>
            <xs:attribute ref="xml:lang"/>
        </xs:complexType>
    </xs:element>

    <!-- Meta element -->
    <xs:element name="meta">
        <xs:complexType>
            <xs:attribute name="name" type="xs:string" use="required"/>
            <xs:attribute name="content" type="xs:string" use="required"/>
        </xs:complexType>
    </xs:element>

    <!-- Manifest -->
    <xs:element name="manifest">
        <xs:complexType>
            <xs:sequence>
                <xs:element ref="opf:item" maxOccurs="unbounded"/>
            </xs:sequence>
        </xs:complexType>
    </xs:element>

    <!-- Item -->
    <xs:element name="item">
        <xs:complexType>
            <xs:attribute name="id" type="xs:ID" use="required"/>
            <xs:attribute name="href" type="xs:anyURI" use="required"/>
            <xs:attribute name="media-type" type="xs:string" use="required"/>
            <xs:attribute name="fallback" type="xs:IDREF"/>
            <xs:attribute name="fallback-style" type="xs:IDREF"/>
            <xs:attribute name="required-namespace" type="xs:anyURI"/>
            <xs:attribute name="required-modules" type="xs:string"/>
        </xs:complexType>
    </xs:element>

    <!-- Spine -->
    <xs:element name="spine">
        <xs:complexType>
            <xs:sequence>
                <xs:element ref="opf:itemref" maxOccurs="unbounded"/>
            </xs:sequence>
            <xs:attribute name="toc" type="xs:IDREF" use="required"/>
        </xs:complexType>
    </xs:element>

    <!-- Itemref -->
    <xs:element name="itemref">
        <xs:complexType>
            <xs:attribute name="idref" type="xs:IDREF" use="required"/>
            <xs:attribute name="linear" type="opf:yes-or-no" default="yes"/>
        </xs:complexType>
    </xs:element>

    <!-- Guide -->
    <xs:element name="guide">
        <xs:complexType>
            <xs:sequence>
                <xs:element ref="opf:reference" maxOccurs="unbounded"/>
            </xs:sequence>
        </xs:complexType>
    </xs:element>

    <!-- Reference -->
    <xs:element name="reference">
        <xs:complexType>
            <xs:attribute name="type" use="required">
                <xs:simpleType>
                    <xs:restriction base="xs:string">
                        <xs:enumeration value="cover"/>
                        <xs:enumeration value="title-page"/>
                        <xs:enumeration value="toc"/>
                        <xs:enumeration value="index"/>
                        <xs:enumeration value="glossary"/>
                        <xs:enumeration value="acknowledgements"/>
                        <xs:enumeration value="bibliography"/>
                        <xs:enumeration value="colophon"/>
                        <xs:enumeration value="copyright-page"/>
                        <xs:enumeration value="dedication"/>
                        <xs:enumeration value="epigraph"/>
                        <xs:enumeration value="foreword"/>
                        <xs:enumeration value="loi"/>
                        <xs:enumeration value="lot"/>
                        <xs:enumeration value="notes"/>
                        <xs:enumeration value="preface"/>
                        <xs:enumeration value="text"/>
                    </xs:restriction>
                </xs:simpleType>
            </xs:attribute>
            <xs:attribute name="title" type="xs:string"/>
            <xs:attribute name="href" type="xs:anyURI" use="required"/>
        </xs:complexType>
    </xs:element>

    <!-- Tours -->
    <xs:element name="tours">
        <xs:complexType>
            <xs:sequence>
                <xs:element ref="opf:tour" maxOccurs="unbounded"/>
            </xs:sequence>
        </xs:complexType>
    </xs:element>

    <xs:element name="tour">
        <xs:complexType>
            <xs:sequence>
                <xs:element ref="opf:site" maxOccurs="unbounded"/>
            </xs:sequence>
            <xs:attribute name="id" type="xs:ID" use="required"/>
            <xs:attribute name="title" type="xs:string" use="required"/>
        </xs:complexType>
    </xs:element>

    <xs:element name="site">
        <xs:complexType>
            <xs:attribute name="title" type="xs:string" use="required"/>
            <xs:attribute name="href" type="xs:anyURI" use="required"/>
        </xs:complexType>
    </xs:element>

</xs:schema>''',

    'opf30.txt': '''<?xml version="1.0" encoding="UTF-8"?>
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
           xmlns:opf="http://www.idpf.org/2007/opf"
           xmlns:dc="http://purl.org/dc/elements/1.1/"
           xmlns:xml="http://www.w3.org/XML/1998/namespace"
           targetNamespace="http://www.idpf.org/2007/opf"
           elementFormDefault="qualified">

    <xs:import namespace="http://www.w3.org/XML/1998/namespace"
               schemaLocation="xml.txt"/>
    <xs:import namespace="http://purl.org/dc/elements/1.1/"
               schemaLocation="dc.txt"/>

    <!-- Define common types -->
    <xs:simpleType name="yes-or-no">
        <xs:restriction base="xs:string">
            <xs:enumeration value="yes"/>
            <xs:enumeration value="no"/>
        </xs:restriction>
    </xs:simpleType>

    <!-- Direction type -->
    <xs:simpleType name="direction">
        <xs:restriction base="xs:string">
            <xs:enumeration value="ltr"/>
            <xs:enumeration value="rtl"/>
            <xs:enumeration value="default"/>
        </xs:restriction>
    </xs:simpleType>

    <!-- All elements should be in opf namespace -->
    <xs:element name="package">
        <xs:complexType>
            <xs:sequence>
                <xs:element ref="opf:metadata"/>
                <xs:element ref="opf:manifest"/>
                <xs:element ref="opf:spine"/>
                <xs:element ref="opf:guide" minOccurs="0"/>
                <xs:element ref="opf:collection" minOccurs="0" maxOccurs="unbounded"/>
            </xs:sequence>
            <xs:attribute name="version" type="xs:string" fixed="3.0" use="required"/>
            <xs:attribute name="unique-identifier" type="xs:string" use="required"/>
            <xs:attribute name="prefix" type="xs:string"/>
            <xs:attribute ref="xml:lang"/>
            <xs:attribute name="dir" type="opf:direction"/>
            <xs:attribute name="id" type="xs:ID"/>
        </xs:complexType>
    </xs:element>

    <!-- Metadata -->
    <xs:element name="metadata">
        <xs:complexType>
            <xs:choice minOccurs="0" maxOccurs="unbounded">
                <xs:element ref="dc:identifier"/>
                <xs:element ref="dc:title"/>
                <xs:element ref="dc:language"/>
                <xs:element ref="dc:creator"/>
                <xs:element ref="dc:contributor"/>
                <xs:element ref="dc:date"/>
                <xs:element ref="dc:subject"/>
                <xs:element ref="dc:description"/>
                <xs:element ref="dc:publisher"/>
                <xs:element ref="dc:type"/>
                <xs:element ref="dc:format"/>
                <xs:element ref="dc:source"/>
                <xs:element ref="dc:relation"/>
                <xs:element ref="dc:coverage"/>
                <xs:element ref="dc:rights"/>
                <xs:element ref="opf:meta"/>
                <xs:element ref="opf:link"/>
            </xs:choice>
            <xs:attribute ref="xml:lang"/>
            <xs:attribute name="dir" type="opf:direction"/>
        </xs:complexType>
    </xs:element>

    <!-- Meta -->
    <xs:element name="meta">
        <xs:complexType>
            <xs:simpleContent>
                <xs:extension base="xs:string">
                    <xs:attribute name="property" type="xs:string"/>
                    <xs:attribute name="scheme" type="xs:string"/>
                    <xs:attribute name="name" type="xs:string"/>
                    <xs:attribute name="content" type="xs:string"/>
                    <xs:attribute name="id" type="xs:ID"/>
                    <xs:attribute name="refines" type="xs:string"/>
                    <xs:attribute ref="xml:lang"/>
                    <xs:attribute name="dir" type="opf:direction"/>
                </xs:extension>
            </xs:simpleContent>
        </xs:complexType>
    </xs:element>

    <!-- Link -->
    <xs:element name="link">
        <xs:complexType>
            <xs:attribute name="href" type="xs:anyURI" use="required"/>
            <xs:attribute name="rel" type="xs:string" use="required"/>
            <xs:attribute name="id" type="xs:ID"/>
            <xs:attribute name="refines" type="xs:string"/>
            <xs:attribute name="media-type" type="xs:string"/>
        </xs:complexType>
    </xs:element>

    <!-- Manifest -->
    <xs:element name="manifest">
        <xs:complexType>
            <xs:sequence>
                <xs:element ref="opf:item" maxOccurs="unbounded"/>
            </xs:sequence>
            <xs:attribute name="id" type="xs:ID"/>
        </xs:complexType>
    </xs:element>

    <!-- Item -->
    <xs:element name="item">
        <xs:complexType>
            <xs:attribute name="id" type="xs:ID" use="required"/>
            <xs:attribute name="href" type="xs:anyURI" use="required"/>
            <xs:attribute name="media-type" type="xs:string" use="required"/>
            <xs:attribute name="fallback" type="xs:IDREF"/>
            <xs:attribute name="properties" type="xs:string"/>
            <xs:attribute name="media-overlay" type="xs:IDREF"/>
        </xs:complexType>
    </xs:element>

    <!-- Spine -->
    <xs:element name="spine">
        <xs:complexType>
            <xs:sequence>
                <xs:element ref="opf:itemref" maxOccurs="unbounded"/>
            </xs:sequence>
            <xs:attribute name="id" type="xs:ID"/>
            <xs:attribute name="toc" type="xs:IDREF"/>
            <xs:attribute name="page-progression-direction" type="opf:direction"/>
        </xs:complexType>
    </xs:element>

    <!-- Itemref -->
    <xs:element name="itemref">
        <xs:complexType>
            <xs:attribute name="idref" type="xs:IDREF" use="required"/>
            <xs:attribute name="linear" type="opf:yes-or-no" default="yes"/>
            <xs:attribute name="id" type="xs:ID"/>
            <xs:attribute name="properties" type="xs:string"/>
        </xs:complexType>
    </xs:element>

    <!-- Guide -->
    <xs:element name="guide">
        <xs:complexType>
            <xs:sequence>
                <xs:element ref="opf:reference" maxOccurs="unbounded"/>
            </xs:sequence>
        </xs:complexType>
    </xs:element>

    <!-- Reference -->
    <xs:element name="reference">
        <xs:complexType>
            <xs:attribute name="type" use="required">
                <xs:simpleType>
                    <xs:restriction base="xs:string">
                        <xs:enumeration value="cover"/>
                        <xs:enumeration value="title-page"/>
                        <xs:enumeration value="toc"/>
                        <xs:enumeration value="index"/>
                        <xs:enumeration value="glossary"/>
                        <xs:enumeration value="acknowledgements"/>
                        <xs:enumeration value="bibliography"/>
                        <xs:enumeration value="colophon"/>
                        <xs:enumeration value="copyright-page"/>
                        <xs:enumeration value="dedication"/>
                        <xs:enumeration value="epigraph"/>
                        <xs:enumeration value="foreword"/>
                        <xs:enumeration value="loi"/>
                        <xs:enumeration value="lot"/>
                        <xs:enumeration value="notes"/>
                        <xs:enumeration value="preface"/>
                        <xs:enumeration value="text"/>
                    </xs:restriction>
                </xs:simpleType>
            </xs:attribute>
            <xs:attribute name="title" type="xs:string"/>
            <xs:attribute name="href" type="xs:anyURI" use="required"/>
        </xs:complexType>
    </xs:element>

    <!-- Collection -->
    <xs:element name="collection">
        <xs:complexType>
            <xs:sequence>
                <xs:element ref="opf:metadata" minOccurs="0"/>
                <xs:choice maxOccurs="unbounded">
                    <xs:element ref="opf:collection"/>
                    <xs:element name="link">
                        <xs:complexType>
                            <xs:attribute name="href" type="xs:anyURI" use="required"/>
                            <xs:attribute name="media-type" type="xs:string"/>
                        </xs:complexType>
                    </xs:element>
                </xs:choice>
            </xs:sequence>
            <xs:attribute name="role" type="xs:string" use="required"/>
            <xs:attribute name="id" type="xs:ID"/>
        </xs:complexType>
    </xs:element>

</xs:schema>'''
}

class SchemaResolver(etree.Resolver):
    """Custom resolver to handle schema imports and includes"""
    def __init__(self):
        super(SchemaResolver, self).__init__()
        debug_print("OPFHelper: Initialized in-memory schema resolver")

    def resolve(self, system_url, public_id, context):
        """Resolve schema references using in-memory schemas"""
        debug_print(f"OPFHelper: Resolving schema reference: {system_url}")

        # Handle both .txt and regular schema file references
        basename = os.path.basename(system_url)
        basename_txt = basename.replace('.xsd', '.txt')

        debug_print(f"OPFHelper: Looking for schema as {basename} or {basename_txt}")

        # First try exact match
        if basename in SCHEMA_FILES:
            debug_print(f"OPFHelper: Found exact schema match for {basename}")
            content = SCHEMA_FILES[basename].encode('utf-8')
            return self.resolve_string(content, context, base_url=os.path.join(SCHEMA_DIR, basename))

        # Then try with .txt extension
        elif basename_txt in SCHEMA_FILES:
            debug_print(f"OPFHelper: Found .txt schema match for {basename}")
            content = SCHEMA_FILES[basename_txt].encode('utf-8')
            return self.resolve_string(content, context, base_url=os.path.join(SCHEMA_DIR, basename_txt))

        debug_print(f"OPFHelper: Could not resolve schema: {system_url}")
        debug_print(f"OPFHelper: Available schemas: {', '.join(SCHEMA_FILES.keys())}")
        return None

def get_schema_parser():
    """Create and return an XML parser configured for schema validation"""
    try:
        # Create parser with proper settings for EPUB XML validation
        parser = etree.XMLParser(
            resolve_entities=True,
            remove_blank_text=True,
            ns_clean=True,
            attribute_defaults=True,
            load_dtd=False
        )
        resolver = SchemaResolver()
        parser.resolvers.add(resolver)
        debug_print("OPFHelper: Created XML parser with schema resolver")
        return parser
    except Exception as e:
        debug_print(f"OPFHelper ERROR: Failed to create parser: {str(e)}")
        return None

def verify_schemas():
    """Verify that all schema content is valid"""
    try:
        debug_print("OPFHelper: Verifying schema content...")
        for schema_name, content in SCHEMA_FILES.items():
            if not content.strip():
                debug_print(f"OPFHelper ERROR: Empty schema content for {schema_name}")
                return False
            if '<xs:schema' not in content:
                debug_print(f"OPFHelper ERROR: Invalid schema content for {schema_name}")
                return False

            # Try parsing each schema to verify it's valid XML
            try:
                parser = get_schema_parser()
                etree.fromstring(content.encode('utf-8'), parser)
                debug_print(f"OPFHelper: Successfully verified {schema_name}")
            except Exception as e:
                debug_print(f"OPFHelper ERROR: Schema {schema_name} is not valid XML: {str(e)}")
                return False

        debug_print("OPFHelper: All schemas verified successfully")
        return True
    except Exception as e:
        debug_print(f"OPFHelper ERROR: Schema verification failed: {str(e)}")
        traceback.print_exc()
        return False

def install_schemas():
    """Verify in-memory schemas are valid"""
    return verify_schemas()

def load_schema(version='2.0'):
    """Load the appropriate OPF schema based on version"""
    try:
        debug_print(f"OPFHelper: Loading schema for OPF {version}")

        # Clean and normalize version string
        version = version.lower().strip()
        version = version.replace('epub', '').replace('opf', '').strip()
        # Get major version number
        major_version = version.split('.')[0]

        # First try exact version match
        schema_name = VERSION_MAP.get(version)
        if not schema_name:
            # Try major version match
            schema_name = VERSION_MAP.get(major_version)

        if not schema_name:
            debug_print(f"OPFHelper ERROR: No schema mapping for version {version}")
            return None

        schema_content = SCHEMA_FILES.get(schema_name)
        if not schema_content:
            debug_print(f"OPFHelper ERROR: No schema content for {schema_name}")
            return None

        # Create parser with resolver
        parser = get_schema_parser()
        if not parser:
            debug_print("OPFHelper ERROR: Failed to create parser")
            return None

        try:
            # Parse the main schema with proper namespace handling
            debug_print(f"OPFHelper: Parsing schema {schema_name}")
            schema_doc = etree.fromstring(schema_content.encode('utf-8'), parser)
            # Create XMLSchema with proper base URL
            schema = etree.XMLSchema(schema_doc)
            debug_print(f"OPFHelper: Successfully loaded schema for version {version}")
            return schema

        except Exception as e:
            debug_print(f"OPFHelper ERROR: Failed to parse schema {schema_name}: {str(e)}")
            traceback.print_exc()
            return None

    except Exception as e:
        debug_print(f"OPFHelper ERROR: Failed to load schema: {str(e)}")
        traceback.print_exc()
        return None

def basic_opf_validation(doc, version='2.0'):
    """Perform basic OPF validation checks"""
    debug_print(f"OPFHelper: Starting basic validation for version {version}")
    results = []
    is_valid = True

    try:
        root = doc.getroot()

        # Get namespace mappings
        nsmap = root.nsmap
        opf_ns = nsmap.get(None) or nsmap.get('opf') or OPF_NS
        dc_ns = nsmap.get('dc') or DC_NS

        # Helper function for element lookup with namespace fallback
        def find_element(parent, name):
            # Try with default namespace first
            elem = parent.find(f".//{{{opf_ns}}}{name}")
            if elem is None:
                # Try with OPF namespace
                elem = parent.find(f".//{{http://www.idpf.org/2007/opf}}{name}")
            if elem is None:
                # Try without namespace
                elem = parent.find(f".//{name}")
            return elem

        def find_elements(parent, name):
            # Try with default namespace first
            elems = parent.findall(f".//{{{opf_ns}}}{name}")
            if not elems:
                # Try with OPF namespace
                elems = parent.findall(f".//{{http://www.idpf.org/2007/opf}}{name}")
            if not elems:
                # Try without namespace
                elems = parent.findall(f".//{name}")
            return elems

        # Validate metadata section
        metadata = find_element(root, "metadata")
        if metadata is not None:
            results.append("✓ Metadata section present")

            # Check required DC elements
            required_fields = {
                'title': None,
                'language': None,
                'identifier': None
            }

            for field in required_fields:
                # Try DC namespace first
                elems = metadata.findall(f".//{{{dc_ns}}}{field}")
                if not elems:
                    # Try without namespace
                    elems = metadata.findall(f".//{field}")

                if elems:
                    required_fields[field] = elems[0].text
                    results.append(f"✓ Required field '{field}' found")
                else:
                    results.append(f"❌ Required field '{field}' missing")
                    is_valid = False

        else:
            results.append("❌ Metadata section missing")
            is_valid = False

        # Validate manifest section
        manifest = find_element(root, "manifest")
        if manifest is not None:
            results.append("✓ Manifest section present")
            items = find_elements(manifest, "item")
            if items:
                results.append(f"✓ Manifest contains {len(items)} items")

                # Additional manifest checks for OPF 3.0
                if version.startswith('3'):
                    nav_items = [item for item in items if item.get('properties') == 'nav']
                    if not nav_items:
                        results.append("❌ OPF 3.0 requires a nav item in manifest")
                        is_valid = False
            else:
                results.append("❌ Manifest contains no items")
                is_valid = False
        else:
            results.append("❌ Manifest section missing")
            is_valid = False

        # Validate spine section
        spine = find_element(root, "spine")
        if spine is not None:
            results.append("✓ Spine section present")
            itemrefs = find_elements(spine, "itemref")
            if itemrefs:
                results.append(f"✓ Spine contains {len(itemrefs)} items")
            else:
                results.append("❌ Spine contains no items")
                is_valid = False
        else:
            results.append("❌ Spine section missing")
            is_valid = False

        return is_valid, results

    except Exception as e:
        debug_print(f"OPFHelper ERROR: Basic validation failed: {str(e)}")
        traceback.print_exc()
        return False, [f"❌ Validation error: {str(e)}"]

def validate_opf(doc, version='2.0'):
    """Validate OPF content against schema and basic rules"""
    debug_print(f"OPFHelper: Validating OPF version {version}")
    results = []
    validation_errors = []

    try:
        # Load appropriate schema
        schema = load_schema(version)
        if schema is None:
            results.append("⚠️ Could not load schema - using basic validation only")
        else:
            try:
                schema.assertValid(doc)
                results.append("✓ Valid against schema")
            except etree.DocumentInvalid as e:
                results.append("❌ Schema validation failed:")
                for error in e.error_log:
                    # Extract element name from error path
                    element_match = re.search(r'\{[^}]+\}(\w+)', error.path)
                    element_name = element_match.group(1) if element_match else "unknown"

                    # Extract namespace from error message if present
                    ns_match = re.search(r'\{([^}]+)\}', error.message)
                    ns = ns_match.group(1) if ns_match else None

                    # Build clearer error message
                    if "This element is not expected" in error.message:
                        expected_match = re.search(r'Expected is \( ([^)]+) \)', error.message)
                        if expected_match:
                            expected = expected_match.group(1)
                            msg = f"❌ Invalid element order - found '{element_name}' but expected one of: {expected}"
                        else:
                            msg = f"❌ Element '{element_name}' is not allowed in this position"
                    elif "Missing child element" in error.message:
                        child = re.search(r'Missing child element\(s\)\. Expected is \( ([^)]+) \)', error.message)
                        if child:
                            msg = f"❌ Required element missing - expected '{child.group(1)}' under '{element_name}'"
                        else:
                            msg = f"❌ Required child element missing under '{element_name}'"
                    else:
                        # General error message cleanup
                        msg = f"❌ {error.message}"
                        msg = msg.replace('Element', f"Element '{element_name}'")
                        if ns:
                            msg = msg.replace(ns, 'dc' if 'dc/elements' in ns else 'opf')

                    results.append(msg)
                    validation_errors.extend(e.error_log)

        # Always perform basic validation
        is_valid, basic_results = basic_opf_validation(doc, version)

        if basic_results:
            results.append("\nBasic OPF Structure:")
            for result in basic_results:
                results.append(result)

        return "\n".join(results), bool(validation_errors)

    except Exception as e:
        debug_print(f"OPFHelper ERROR: Validation failed: {str(e)}")
        traceback.print_exc()
        return f"❌ Validation error: {str(e)}", True