#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
                        print_function)

__license__   = 'GPL v3'
__copyright__ = '2014, David Forrester <davidfor@internode.on.net>'
__docformat__ = 'restructuredtext en'

import socket, re, datetime
from threading import Thread

from lxml.html import fromstring, tostring

from calibre.ebooks.metadata.book.base import Metadata
from calibre.library.comments import sanitize_comments_html
from calibre.utils.cleantext import clean_ascii_chars
from calibre.utils.icu import lower

from calibre_plugins.kobobooks import KoboBooks
#import calibre_plugins.kobobooks.config as cfg

class Worker(Thread): # Get details

    '''
    Get book details from Kobo Books book page in a separate thread
    '''

    def __init__(self, url, publisher, match_authors, result_queue, browser, log, relevance, category_handling, plugin, timeout=20):
        Thread.__init__(self)
        self.daemon = True
        self.url, self.result_queue = url,  result_queue
        self.publisher, self.match_authors = publisher, match_authors
        self.log, self.timeout = log, timeout
        self.relevance, self.plugin = relevance, plugin
        self.browser = browser.clone_browser()
        self.cover_url = self.kobobooks_id = self.isbn = None
        self.category_handling = category_handling

    def run(self):
        try:
            self.get_details()
        except:
            self.log.exception('get_details failed for url: %r'%self.url)

    def get_details(self):
        try:
            self.log.info('KoboBooks url: %r'%self.url)
            raw = self.browser.open_novisit(self.url, timeout=self.timeout).read().strip()
        except Exception as e:
            if callable(getattr(e, 'getcode', None)) and \
                    e.getcode() == 404:
                self.log.error('URL malformed: %r'%self.url)
                return
            attr = getattr(e, 'args', [None])
            attr = attr if attr else [None]
            if isinstance(attr[0], socket.timeout):
                msg = 'Kobo Books timed out. Try again later.'
                self.log.error(msg)
            else:
                msg = 'Failed to make details query: %r'%self.url
                self.log.exception(msg)
            return

        raw = raw.decode('utf-8', errors='replace')
        #open('E:\\t3.html', 'wb').write(raw)

        if '<title>404 - ' in raw:
            self.log.error('URL malformed: %r'%self.url)
            return

        try:
            root = fromstring(clean_ascii_chars(raw))
        except:
            msg = 'Failed to parse Kobo Books details page: %r'%self.url
            self.log.exception(msg)
            return

        self.parse_details(root)

    def parse_details(self, root):
        try:
            kobobooks_id = self.parse_kobobooks_id(self.url)
            self.log('parse_details - kobobooks_id: "%s" ' % (kobobooks_id))
        except:
            self.log.exception('Error parsing Kobo Books id for url: %r'%self.url)
            kobobooks_id = None

        try:
            title = self.parse_title(root)
        except:
            self.log.exception('Error parsing title for url: %r'%self.url)
            title = None

        try:
#            self.log('parse_details - root: ',tostring(root))
            authors = self.parse_authors(root)
        except:
            self.log.exception('Error parsing authors for url: %r'%self.url)
            authors = []

        if not title or not authors or not kobobooks_id:
            self.log.error('Could not find title/authors/KoboBooks id for %r'%self.url)
            self.log.error('Kobo Books: %r Title: %r Authors: %r'%(kobobooks_id, title,
                authors))
            return

        mi = Metadata(title, authors)
        mi.set_identifier('kobo', kobobooks_id)
        self.kobobooks_id = kobobooks_id

        self.log('parse_details - root: "%s"' % root)
        try:
            mi.pubdate, mi.publisher = self.parse_publisher_and_date(root)
        except:
            self.log.exception('Error parsing publisher and date for url: %r'%self.url)

        try:
            (mi.series, mi.series_index) = self.parse_series(root)
        except:
            self.log.exception('Error parsing series for url: %r'%self.url)

        try:
            mi.tags = self.parse_tags(root)
        except:
            self.log.exception('Error parsing tags for url: %r'%self.url)

        try:
            self.cover_url = self.parse_cover(root)
        except:
            self.log.exception('Error parsing cover for url: %r'%self.url)
        mi.has_cover = bool(self.cover_url)

        try:
            mi.comments = self.parse_comments(root)
        except:
            self.log.exception('Error parsing comments for url: %r'%self.url)

        try:
            isbn = self.parse_isbn(root)
            if isbn:
                self.isbn = mi.isbn = isbn
        except:
            self.log.exception('Error parsing ISBN for url: %r'%self.url)

        mi.source_relevance = self.relevance

        if self.kobobooks_id:
            if self.cover_url:
                self.plugin.cache_identifier_to_cover_url(self.kobobooks_id, self.cover_url)

        self.plugin.clean_downloaded_metadata(mi)

        self.result_queue.put(mi)

    def parse_kobobooks_id(self, url):
        return re.search(KoboBooks.STORE_DOMAIN + KoboBooks.BOOK_PATH + '(.*)', url).groups(0)[0]
#        return re.search('store.kobobooks.com/en-US/ebook/(.*)', url).groups(0)[0]

    def parse_title(self, root):
        title_node = root.xpath('//h1[@class="title"]')
        if title_node:
            return title_node[0].text

    def parse_series(self, root):
        series_node = root.xpath('//h3[@class="series"]')
        if series_node and len(series_node) > 0:
            series_node = series_node[0]
#            self.log('parse_series - series_node: "%s" ' % (tostring(series_node)))
            series_name = series_node.xpath('./a[@class="description-anchor"]/span[1]/text()')[0]
#            self.log('parse_series - series_name: "%s" ' % (series_name))
#            self.log('parse_series - series_name: "%s" ' % (tostring(series_name)))
            series_index = series_node.xpath('./span/text()')[0].strip(' -,')
#            self.log('parse_series - series_index: "%s" ' % (series_index))
#            self.log('parse_series - series_index: "%s" ' % (tostring(series_index)))
#            self.log('parse_series - series_name: "%s" series_index: "%s"' % (series_name, series_index))
            try:
                series_index = float(series_index)
            except:
                series_index = None
            return (series_name, series_index)
        self.log('parse_series - no series info')
        return (None, None)

    def parse_authors(self, root):
#        self.log('parse_authors - root: "%s"' % root)
        author = ''.join(root.xpath('//a[@class="attribution description-anchor"]/text()'))
#        self.log('parse_authors - author: "%s"' % author)
        author = author.split('by ')[-1]
        authors = author.split(' and ')

        def ismatch(authors):
            authors = lower(' '.join(authors))
            amatch = not self.match_authors
            for a in self.match_authors:
                if lower(a) in authors:
                    amatch = True
                    break
            if not self.match_authors: amatch = True
            return amatch

        if not self.match_authors or ismatch(authors):
            return authors
        self.log('Rejecting authors as not a close match: ', ','.join(authors))

    def parse_publisher_and_date(self, root):
        published_node = root.xpath('//dl[@class="oneline"][1]/dd[1]')[0]
        publisher = published_node.xpath('./text()')[0].strip().strip(',')
#        self.log('parse_publisher_and_date - publisher: "%s"' % publisher)
        published_date_node = published_node.xpath('./span/text()')[0]
#        self.log('parse_publisher_and_date - //dl[@class="oneline"][1]/dd[1]/span/text(): "%s"' % published_date_node)
        if published_date_node:
            published_date = datetime.datetime.strptime(published_date_node, "%B %Y")
#            self.log('parse_publisher_and_date - published_date: "%s"' % published_date)
        else:
            published_date = None
        return (published_date, publisher)

    def parse_comments(self, root):
        description_node = root.xpath('//div [@class="synopsis-description-all"]')
#        self.log('parse_comments - description_node: "%s" ' % (description_node))
#        self.log('parse_comments - len(description_node): "%s" ' % (len(description_node)))
#        self.log('parse_comments - tostring(description_node[0]): "%s" ' % (tostring(description_node[0])))
        if description_node:
            comments = tostring(description_node[0], method='html')
            comments = sanitize_comments_html(comments)
            return comments
        self.log('parse_comments - no comments found.')

    def parse_cover(self, root):
        cover_node = root.xpath('//div[@align="center"]/a/@onclick')
        if cover_node:
            match = re.match('popupimg\(\'(.*)\'\)', cover_node[0])
            if match:
                return KoboBooks.BASE_URL + KoboBooks.BOOK_PATH + match.groups(0)[0]
#                return 'http://store.kobobooks.com/en-US/ebook/' + match.groups(0)[0]

    def parse_isbn(self, root):
        isbn_nodes = root.xpath('//dl[@class="oneline"][1]')
#        self.log('parse_isbn - isbn_nodes: "%s" ' % (isbn_nodes))
#        self.log('parse_isbn - tostring(isbn_nodes[1]): "%s" ' % tostring(isbn_nodes[1]))
        have_isbn_dt = False
        for elem in isbn_nodes[0]:
#            self.log('parse_isbn - elem: "%s" ' % (elem))
#            self.log('parse_isbn - elem.tag: "%s" ' % (elem.tag))
#            self.log('parse_isbn - tostring(elem): "%s" ' % tostring(elem))
            if elem.tag == 'dt' and elem.text is not None and elem.text.strip() == 'ISBN:':
#                self.log('parse_isbn - ISBN dt - elem: "%s" ' % (elem))
#                self.log('parse_isbn - ISBN dt - tostring(elem): "%s" ' % tostring(elem))
                have_isbn_dt = True
            elif elem.tag == 'dd' and have_isbn_dt:
                self.log('parse_isbn - ISBN dd - elem: "%s" ' % (elem.text.strip()))
#                self.log('parse_isbn - ISBN dd - tostring(elem): "%s" ' % tostring(elem))
                isbn = elem.text.strip()
                return isbn

    def parse_tags(self, root):
        ans = []
        # There are no exclusions at this point.
        exclude_tokens = {}
        exclude = {}
        seen = set()
        category_node = root.xpath('//ul[@class="category-rankings"]')
#        self.log('parse_tags - category_node: "%s" ' % (category_node))
#        self.log('parse_tags - len(category_node): "%s" ' % (len(category_node)))
#        self.log('parse_tags - tostring(category_node[0]): "%s" ' % (tostring(category_node[0])))
        for li in category_node[0].xpath('./li'):
#            self.log('parse_tags - li: "%s" ' % (li))
#            self.log('parse_tags - len(li): "%s" ' % (len(li)))
#            self.log('parse_tags - tostring(li[0]): "%s" ' % (tostring(li[0])))
            tag = ''
            for i, a in enumerate(li.iterdescendants('a')):
#                self.log('parse_tags - a: "%s" ' % (a))
#                self.log('parse_tags - len(a): "%s" ' % (len(a)))
#                if len(a) > 0:
#                    self.log('parse_tags - tostring(a[0]): "%s" ' % (tostring(a[0])))
                if self.category_handling == 'top_level_only' and i > 0:
                    self.log('parse_tags - top level only and sub level category')
                    continue
                raw = (a.text or '').strip().replace(',', ';')
                if self.category_handling == 'hierarchy' and i > 0:
                    tag = tag + "." + raw
                else:
                    tag = raw
                ltag = icu_lower(tag)
                tokens = frozenset(ltag.split())
                if tag and ltag not in exclude and not tokens.intersection(exclude_tokens) and ltag not in seen:
                    ans.append(tag)
                    seen.add(ltag)
        return ans

