#!/usr/bin/env python
# vim:fileencoding=utf-8
from __future__ import (unicode_literals, division, absolute_import,
                        print_function)

__license__   = 'GPL v3'
__copyright__ = '2011, Rodrigo Coin Curvo. 2019-2022, Thiago Oliveira'
__docformat__ = 'restructuredtext en'

# Standard libraries
import six.moves.http_client
from six.moves.urllib.parse import urlparse
import socket, re, datetime
from threading import Thread
from lxml.html import fromstring, tostring

# Calibre libraries
from calibre.ebooks.metadata.book.base import Metadata
from calibre.library.comments import sanitize_comments_html
from calibre.utils.cleantext import clean_ascii_chars
from calibre_plugins.skoob_metadata import Skoob

# Load translation files (.mo) on the folder 'translations'
load_translations()


# Get details
class Worker(Thread):

    '''
    Get book details from Skoob book page in a separate thread
    '''

    def __init__(self, url, auts, result_queue, browser, log, relevance, plugin, timeout=20):
        Thread.__init__(self)
        self.daemon = True
        self.url, self.result_queue = url, result_queue
        self.log, self.timeout = log, timeout
        self.relevance, self.plugin = relevance, plugin
        self.browser = browser.clone_browser()
        self.cover_url = self.skoob_id = self.isbn = None
        self.auts = auts

    def run(self):
        try:
            self.get_details()
        except:
            self.log.exception(_('get_details failed for url: %r') % self.url)

    def get_root(self, url):
        try:
            self.log.info(_('Skoob url: %r') % url)
            raw = self.browser.open_novisit(url, timeout=self.timeout).read().strip()
            # self.log.exception(_('raw: %r')%raw)
        except Exception as e:
            if callable(getattr(e, 'getcode', None)) and e.getcode() == 404:
                self.log.error(_('URL malformed: %r') % url)
                return
            attr = getattr(e, 'args', [None])
            attr = attr if attr else [None]
            if isinstance(attr[0], socket.timeout):
                msg = _('Skoob timed out. Try again later.')
                self.log.error(msg)
            else:
                msg = _('Failed to make details query: %r') % url
                self.log.exception(msg)
            return

        raw = raw.decode('iso-8859-1', errors='replace')

        if 'Erro 404: p' in raw:
            self.log.error(_('URL malformed: %r') % url)
            return

        try:
            root = fromstring(clean_ascii_chars(raw))
        except:
            msg = _('Failed to parse Skoob details page: %r') % url
            self.log.exception(msg)
            return None

        return root

    # This code is here for reference. This is used in case of online search for iso 639 codes.
    '''
    def get_iso(self, url):
        try:
            raw = self.browser.open_novisit(url, timeout=self.timeout).read().strip()
        except Exception as e:
            if callable(getattr(e, 'getcode', None)) and \
                    e.getcode() == 404:
                self.log.error('URL malformed: %r'%url)
                return
            attr = getattr(e, 'args', [None])
            attr = attr if attr else [None]
            if isinstance(attr[0], socket.timeout):
                msg = 'Wikipedia timed out. Try again later.'
                self.log.error(msg)
            else:
                msg = 'Failed to make details query: %r'%url
                self.log.exception(msg)
            return

        raw = raw.decode('utf-8', errors='replace')

        if 'Erro 404: p' in raw:
            self.log.error('URL malformed: %r'%url)
            return

        try:
            iso = fromstring(clean_ascii_chars(raw))
        except:
            msg = 'Failed to parse Wikipedia ISO 639-2 details page: %r'%url
            self.log.exception(msg)
            return None

        return iso
    '''

    def get_details(self):

        root = self.get_root(self.url)

        if not root:
            return

        try:
            (title, series, series_index) = self.parse_title_series(root)
        except:
            self.log.exception(_('Error parsing title and series for url: %r') % self.url)
            title = series = series_index = None

        authors = self.parse_authors(root)

        try:
            skoob_id = self.parse_skoob_id(self.url)
        except:
            self.log.exception(_('Error parsing Skoob id for url: %r') % self.url)
            skoob_id = None

        if not title or not skoob_id:
            self.log.error(_('Could not find title/authors/Skoob id for %r') % self.url)
            self.log.error(_('Skoob: %r Title: %r Authors: %r') % (skoob_id, title, authors))
            return

        mi = Metadata(title, authors)

        try:
            mi.series = series
            mi.series_index = series_index
        except:
            self.log.exception(_('Error parsing series for url: %r') % self.url)

        mi.set_identifier('skoob', skoob_id)
        self.skoob_id = skoob_id

        try:
            mi.rating = self.parse_rating(root)
        except:
            self.log.exception(_('Error parsing ratings for url: %r') % self.url)

        try:
            mi.comments = self.parse_comments(root)
        except:
            self.log.exception(_('Error parsing comments for url: %r') % self.url)

        try:
            self.cover_url = self.parse_cover(root)
        except:
            self.log.exception(_('Error parsing cover for url: %r') % self.url)
        mi.has_cover = bool(self.cover_url)

        try:
            mi.tags = self.parse_tags(root)
        except:
            self.log.exception(_('Error parsing tags for url: %r') % self.url)

        try:
            isbn = self.parse_isbn(root)
            self.isbn = mi.isbn = isbn
        except:
            self.log.exception(_('Error parsing ISBN for url: %r') % self.url)

        try:
            publisher = self.parse_publisher(root)
            if publisher:
                mi.publisher = publisher
            else:
                self.log(_('No publisher find for url: %r') % self.url)
        except:
            self.log.exception(_('Error parsing publisher for url: %r') % self.url)

        try:
            pubdate = self.parse_year(root)
            if pubdate:
                mi.pubdate = pubdate
            else:
                self.log(_('No publication date find for url: %r') % self.url)
        except:
            self.log.exception(_('Error parsing publication for url: %r') % self.url)

        try:
            language = self.parse_language(root)
            if language:
                mi.language = language
            else:
                self.log(_('No language find for url: %r') % self.url)
        except:
            self.log.exception(_('Error parsing language for url: %r') % self.url)

        mi.source_relevance = self.relevance

        if self.skoob_id:
            if self.isbn:
                self.plugin.cache_isbn_to_identifier(self.isbn, self.skoob_id)

        self.plugin.clean_downloaded_metadata(mi)

        self._log_metadata(mi)

        self.result_queue.put(mi)

    def _log_metadata(self, mi):
        self.log.info('-'*70)
        self.log.info(mi)
        self.log.info('-'*70)

    def parse_skoob_id(self, url):
        url_test = re.search('(/livro/)', url)
        if url_test:
            return re.search('([0-9]+ed[0-9]+)', url.lower()).groups(0)[0]
        else:
            return re.search('([0-9]+ed[0-9]+)(?=.html)', url.lower()).groups(0)[0]

    def parse_title_series(self, root):
        title_node = root.xpath('//strong[@class="sidebar-titulo"]')
        if not title_node:
            return None, None, None
        title_text = title_node[0].text_content().strip()

        series_node = root.xpath('//h3[@class="sidebar-subtitulo"]')
        if series_node:
            series_text = series_node[0].text_content().strip()
            series_check = re.search('#', series_text)
            if series_check:
                try:
                    series = re.search('(.+)(?= #)', series_text).groups(0)[0]
                    series_index = re.search('(\d+)(?!= #)', series_text).groups(0)[0]
                except:
                    series = series_index = None
            else:
                series = series_text
                series_index = None
        else:
            series = series_index = None

        return title_text, series, series_index

    def parse_tags(self, root):
        tags_node = root.xpath("id('tags')/li/*")
        tags = []
        if tags_node:
            for tag_node in tags_node:
                tag = tag_node.text_content().strip()
                tags.append(tag)
                
        return tags

    def parse_authors(self, root):
        author_nodes = root.xpath("id('pg-livro-menu-principal-container')/a[1]")
        if not author_nodes:
            author_nodes = root.xpath("id('pg-livro-menu-principal-container')/i[@class='sidebar-subtitulo']")

        authors = []

        for author_node in author_nodes:
            authors.append(author_node.text_content().strip())
            
        return authors

    def parse_rating(self, root):
        rating_node = root.xpath("id('pg-livro-box-rating')/span")
        if rating_node:
            return float(rating_node[0].text.strip())

    def parse_isbn(self, root):
        isbn_node = root.xpath('//div[@class="sidebar-desc"]/span[1]')
        if not isbn_node:
            return None
        isbn = isbn_node[0].text_content().strip()

        return isbn

    def parse_publisher(self, root):
        publisher = None
        publisher_node = root.xpath('//div[@class="sidebar-desc"]/a')
        if publisher_node:
            publisher = publisher_node[0].text_content().strip()
        else:
            publisher_node = root.xpath('//div[@class="sidebar-desc"]/text()')
            if publisher_node:
                for publisher_text in publisher_node:
                    publisher_test = re.search('(Editora: )', publisher_text)
                    if publisher_test:
                        publisher = re.search('(Editora: )(.+)', publisher_text).groups(0)[1]
        if not publisher:
            return None
        else:
            return publisher

    def parse_year(self, root):
        year = 0
        year_node = root.xpath('//div[@class="sidebar-desc"]/text()')
        if year_node:
            for year_text in year_node:
                year_test = re.search('(Ano: )', year_text)
                if year_test:
                    new_year = re.search('(Ano: )(\d+)(?= )', year_text).groups(0)[1]
                    # Calibre's year range is 101-7999
                    new_year = int(new_year)
                    if 100 < new_year < 8000:
                        year = new_year
        if year:
            from calibre.utils.date import utc_tz
            # We only have the year, so let's use Jan 2nd (Jan 1st is buggy on the results screen).
            return datetime.datetime(year, 1, 2, tzinfo=utc_tz)
        else:
            return None

    def parse_language(self, root):
        language = None
        language_node = root.xpath('//div[@class="sidebar-desc"]/text()')
        if language_node:
            for language_text in language_node:
                language_test = re.search('(Idioma: )', language_text)
                if language_test:
                    language = re.search('(Idioma: )(.+)', language_text).groups(0)[1].strip().capitalize()

        # Online option to retrieve iso 639 codes
        '''
                    language = [language]
        iso_url = 'https://omegat.sourceforge.io/manual-latest/pt_BR/appendix.languages.html'
        iso = self.get_iso(iso_url)
        rows_node = iso.xpath('//tr')
        for row in rows_node:
            lang = row.xpath('td[1]/text()')
            if lang == language:
                language_code = row.xpath('td[3]/text()')[0]
                if not language_code:
                    return None
                else:
                    return language_code
        '''

        def check_iso_code(lang_name):
            from calibre_plugins.skoob_metadata.iso_codes import iso_639_codes
            try:
                code = iso_639_codes[lang_name.capitalize()]
                return code
            except KeyError:
                return None

        language_code = check_iso_code(language)
        if language_code:
            return language_code
        else:
            return None

    def parse_comments(self, root):
        # Look for description in hidden div
        description_node = root.xpath("id('livro-perfil-sinopse-txt')/p")
        if description_node:
            comments = tostring(description_node[0], method='html').decode()
            comments = re.sub(r'<h2.*</h2>', '', comments, flags=re.IGNORECASE)
            comments = re.sub(r'<strong>.*</strong>', '', comments, flags=re.IGNORECASE)
            comments = sanitize_comments_html(comments.encode('utf-8'))
            return comments

    def parse_cover(self, root):
        imgcol_node = root.xpath("//a[@class='capa-link-item']/img/@src")
        if imgcol_node:
            img_url = imgcol_node[0]
            if not urlparse(img_url).netloc and \
                not re.search(Skoob.BASE_DOMAIN, img_url, flags=re.IGNORECASE):
                img_url = Skoob.BASE_URL + img_url
            
            img_url = re.search('(https://skoob.+)', img_url)
            if img_url:
                img_url = img_url.groups(0)[0]

            if self.skoob_id:
                self.plugin.cache_identifier_to_cover_url(self.skoob_id, img_url)
            return img_url

    def _url_exists(self, url):
        r = urlparse(url)
        conn = six.moves.http_client.HTTPConnection(r.netloc)
        conn.request('HEAD', r.path)
        response = conn.getresponse()
        conn.close()
        return response.status == 200
