#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
                        print_function)

__license__   = 'GPL v3'
__copyright__ = '2014, David Forrester <davidfor@internode.on.net>'
__docformat__ = 'restructuredtext en'

import socket, re, datetime
from threading import Thread

from lxml.html import fromstring, tostring

from calibre.ebooks.metadata.book.base import Metadata
from calibre.library.comments import sanitize_comments_html
from calibre.utils.cleantext import clean_ascii_chars
from calibre.utils.icu import lower

from calibre_plugins.worldswithoutend import WorldsWithoutEnd

class Worker(Thread): # Get details

    '''
    Get book details from Kobo Books book page in a separate thread
    '''

    def __init__(self, url, publisher, match_authors, result_queue, browser, log, relevance, plugin, timeout=20):
        Thread.__init__(self)
        self.daemon = True
        self.url, self.result_queue = url,  result_queue
        self.publisher, self.match_authors = publisher, match_authors
        self.log, self.timeout = log, timeout
        self.relevance, self.plugin = relevance, plugin
        self.browser = browser.clone_browser()
        self.cover_url = self.worldswithoutend_id = self.isbn = None

    def run(self):
        try:
            self.get_details()
        except:
            self.log.exception('get_details failed for url: %r'%self.url)

    def get_details(self):
        try:
            self.log.info('WorldsWithoutEnd url: %r'%self.url)
            raw = self.browser.open_novisit(self.url, timeout=self.timeout).read().strip()
        except Exception as e:
            if callable(getattr(e, 'getcode', None)) and \
                    e.getcode() == 404:
                self.log.error('URL malformed: %r'%self.url)
                return
            attr = getattr(e, 'args', [None])
            attr = attr if attr else [None]
            if isinstance(attr[0], socket.timeout):
                msg = 'Kobo Books timed out. Try again later.'
                self.log.error(msg)
            else:
                msg = 'Failed to make details query: %r'%self.url
                self.log.exception(msg)
            return

        raw = raw.decode('utf-8', errors='replace')
        #open('E:\\t3.html', 'wb').write(raw)

        if '<title>404 - ' in raw:
            self.log.error('URL malformed: %r'%self.url)
            return

        try:
            root = fromstring(clean_ascii_chars(raw))
        except:
            msg = 'Failed to parse Kobo Books details page: %r'%self.url
            self.log.exception(msg)
            return

        self.parse_details(root)

    def parse_details(self, root):
        try:
            worldswithoutend_id = self.parse_worldswithoutend_id(self.url)
            self.log('parse_details - worldswithoutend_id: "%s" ' % (worldswithoutend_id))
        except:
            self.log.exception('Error parsing Kobo Books id for url: %r'%self.url)
            worldswithoutend_id = None

        try:
            title = self.parse_title(root)
        except:
            self.log.exception('Error parsing title for url: %r'%self.url)
            title = None

        try:
#            self.log('parse_details - root: ',tostring(root))
            authors = self.parse_authors(root)
        except:
            self.log.exception('Error parsing authors for url: %r'%self.url)
            authors = []

        if not title or not authors or not worldswithoutend_id:
            self.log.error('Could not find title/authors/WorldsWithoutEnd id for %r'%self.url)
            self.log.error('Kobo Books: %r Title: %r Authors: %r'%(worldswithoutend_id, title,
                authors))
            return

        mi = Metadata(title, authors)
        mi.set_identifier(WorldsWithoutEnd.IDENTIFIER, worldswithoutend_id)
        self.worldswithoutend_id = worldswithoutend_id

        self.log('parse_details - root: "%s"' % root)
        try:
            mi.pubdate, mi.publisher = self.parse_publisher_and_date(root)
        except:
            self.log.exception('Error parsing publisher and date for url: %r'%self.url)

        try:
            (mi.series, mi.series_index) = self.parse_series(root)
        except:
            self.log.exception('Error parsing series for url: %r'%self.url)

        try:
            mi.tags = self.parse_tags(root)
        except:
            self.log.exception('Error parsing tags for url: %r'%self.url)

        try:
            self.cover_url = self.parse_cover(root)
        except:
            self.log.exception('Error parsing cover for url: %r'%self.url)
        mi.has_cover = bool(self.cover_url)

        try:
            mi.comments = self.parse_comments(root)
        except:
            self.log.exception('Error parsing comments for url: %r'%self.url)

        mi.source_relevance = self.relevance

        if self.worldswithoutend_id:
            if self.cover_url:
                self.log("Caching cover URL")
                self.plugin.cache_identifier_to_cover_url(self.worldswithoutend_id, self.cover_url)

        self.plugin.clean_downloaded_metadata(mi)

        self.result_queue.put(mi)

    def parse_worldswithoutend_id(self, url):
        return url.split('=')[-1].strip()
#        return re.search('store.worldswithoutend.com/en-US/ebook/(.*)', url).groups(0)[0]

    def parse_title(self, root):
        title_node = root.xpath('//h2')
        if title_node:
            return title_node[0].text.strip()

    def parse_series(self, root):
        series_node = root.xpath('//div[@id="mainbody"]/div/table/tr')[2]
        self.log('parse_series - len(series_node): "%s"' % len(series_node))
#        self.log('parse_series - series_node: "%s"' % tostring(series_node))
        series_node = series_node.xpath('./td')[1]
        series_node = series_node.xpath('./a')
        self.log('parse_series - len(series_nod)e: "%s"' % len(series_node))
        if series_node and len(series_node) > 0:
            self.log('parse_series - series_node: "%s"' % series_node)
            series_node = series_node[0]
            series_name = series_node.text #xpath('./text()')[0]
            series_index = series_node.tail.split(' ')[2].strip()
            try:
                series_index = float(series_index)
            except:
                series_index = None
            return (series_name, series_index)
        self.log('parse_series - no series info')
        return (None, None)

    def parse_authors(self, root):
#        self.log('parse_authors - root: "%s"' % root)
#        self.log('parse_authors - root: "%s"' % tostring(root))
        authorsNode = root.xpath('//div[@id="mainbody"]/div/table/tr')[0]
        self.log('parse_authors - len(authorsNode): "%s"' % len(authorsNode))
#        self.log('parse_authors - authorsNode: "%s"' % tostring(authorsNode))
        authorsNode = authorsNode.xpath('./td')[1]
        authors = authorsNode.xpath('./a/text()')
        self.log('parse_authors - authors: "%s"' % authors)
        
        def ismatch(authors):
            authors = lower(' '.join(authors))
            amatch = not self.match_authors
            for a in self.match_authors:
                if lower(a) in authors:
                    amatch = True
                    break
            if not self.match_authors: amatch = True
            return amatch

        if not self.match_authors or ismatch(authors):
            return authors
        self.log('Rejecting authors as not a close match: ', ','.join(authors))

    def parse_publisher_and_date(self, root):
        published_node = root.xpath('//div[@id="mainbody"]/div/table/tr')[1]
        self.log('parse_publisher_and_date - len(published_node): "%s"' % len(published_node))
        published_node = published_node.xpath('./td')[1]
#        self.log('parse_publisher_and_date - published_node: "%s"' % tostring(published_node))
        publisher = published_node.xpath('./a/text()')[0]
        self.log('parse_publisher_and_date - publisher: "%s"' % publisher)
        year = published_node.xpath('./a')[0].tail.strip(',').strip()
        self.log('parse_publisher_and_date - year: "%s"' % year)
        if year:
            published_date = datetime.datetime.strptime(year, "%Y")
            self.log('parse_publisher_and_date - published_date: "%s"' % published_date)
        else:
            published_date = None
        return (published_date, publisher)

    def parse_comments(self, root):
        mainbody_node = root.xpath('//div[@id="mainbody"]')
#        self.log('parse_comments - mainbody_node: "%s" ' % (mainbody_node))
#        self.log('parse_comments - len(mainbody_node): "%s" ' % (len(mainbody_node)))
        if mainbody_node:
            comments = ''
            have_synopsis = False
            for description_paragraph in mainbody_node[0]:
#                self.log('parse_comments - description_paragraph.tag: "%s" ' % (description_paragraph.tag))
#                self.log('parse_comments - tostring(description_paragraph): "%s" ' % (tostring(description_paragraph)))
                if description_paragraph.tag == 'p':
#                    self.log('parse_comments - tostring(description_paragraph): "%s" ' % (tostring(description_paragraph)))
                    comments += tostring(description_paragraph, method='html')
                    have_synopsis = True
                elif description_paragraph.tag == 'h3' and have_synopsis:
#                    self.log('parse_comments - description_paragraph.tag: "%s" ' % (description_paragraph.tag))
                    break
            comments = sanitize_comments_html(comments)
            return comments
        self.log('parse_comments - no comments found.')

    def parse_cover(self, root):
        cover_node = root.xpath('//div[@id="novelblock_big"]/img')
        if cover_node:
            cover_file = cover_node[0].get('src')
            self.log('parse_cover - cover_file="%s"' % cover_file)
            if cover_file:
                return WorldsWithoutEnd.BASE_URL + '/' + cover_file

    def parse_tags(self, root):
        ans = []
        # There are no exclusions at this point.
        exclude_tokens = {}
        exclude = {}
        seen = set()
        genre_node = root.xpath('//div[@id="mainbody"]/div/table/tr')[7]
        self.log('parse_tags - len(genre_node): "%s"' % len(genre_node))
        ans.append(genre_node.xpath('./td')[1].text)
        self.log('parse_tags - ans: "%s"' % ans)
        sub_genre_node = root.xpath('//div[@id="mainbody"]/div/table/tr')[8]
        sub_genre_node = sub_genre_node.xpath('./td')[1]
#        self.log('parse_tags - sub_genre_node): "%s"' % tostring(sub_genre_node))
        for sub_genre in sub_genre_node.xpath('./a'):
#            self.log('parse_tags - sub_genre: "%s" ' % tostring(sub_genre))
            ans.append(sub_genre.text)
        return ans

