#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
                        print_function)

__license__   = 'GPL v3'
__copyright__ = '2014, David Forrester <davidfor@internode.on.net>'
__docformat__ = 'restructuredtext en'

import socket, re, datetime
from threading import Thread

from lxml.html import fromstring, tostring

from calibre.ebooks.metadata.book.base import Metadata
from calibre.utils.cleantext import clean_ascii_chars
from calibre.utils.icu import lower

from calibre_plugins.smashwordsmetadata import SmashwordsMetaData

class Worker(Thread): # Get details

    '''
    Get book details from Smashwords book page in a separate thread
    '''

    def __init__(self, url, publisher, match_authors, result_queue, browser, log, relevance, plugin, timeout=20):
        Thread.__init__(self)
        self.daemon = True
        self.url, self.result_queue = url,  result_queue
        self.publisher, self.match_authors = publisher, match_authors
        self.log, self.timeout = log, timeout
        self.relevance, self.plugin = relevance, plugin
        self.browser = browser.clone_browser()
        self.cover_url = self.smashwords_id = self.isbn = None
        self.category_handling = self.plugin.category_handling
        self.use_long_description = self.plugin.use_long_description

    def run(self):
        try:
            self.get_details()
        except:
            self.log.exception('get_details failed for url: %r'%self.url)

    def get_details(self):
        try:
            self.log.info('SmashwordsMetaData url: %r'%self.url)
            raw = self.browser.open_novisit(self.url, timeout=self.timeout).read().strip()
        except Exception as e:
            if callable(getattr(e, 'getcode', None)) and \
                    e.getcode() == 404:
                self.log.error('URL malformed: %r'%self.url)
                return
            attr = getattr(e, 'args', [None])
            attr = attr if attr else [None]
            if isinstance(attr[0], socket.timeout):
                msg = 'Smashwords timed out. Try again later.'
                self.log.error(msg)
            else:
                msg = 'Failed to make details query: %r'%self.url
                self.log.exception(msg)
            return

        raw = raw.decode('utf-8', errors='replace')
#         open('E:\\t3.html', 'wb').write(raw)

        if '<title>404 - ' in raw:
            self.log.error('URL malformed: %r'%self.url)
            return

        try:
            root = fromstring(clean_ascii_chars(raw))
        except:
            msg = 'Failed to parse Smashwords details page: %r'%self.url
            self.log.exception(msg)
            return

        self.parse_details(root)

    def parse_details(self, root):
        head = root.head
#         self.log("parse_details - head=", tostring(head))
        try:
            smashwords_id = self.parse_smashwords_id(self.url)
#            self.log('parse_details - smashwords_id: "%s" ' % (smashwords_id))
        except:
            self.log.exception('Error parsing URL for Smashwords: %r'%self.url)
            smashwords_id = None

        try:
            import json
            data_pattern = re.compile(r"window.angularData.book = (.*);")
            scripts = root.xpath('//script')
            for script in scripts:
                m = data_pattern.search(script.text or '')
                if m is not None:
                    try:
                        page_metadata = json.loads(m.group(1))
#                         self.log("Script page_metadata=", page_metadata)
                        if page_metadata is not None:
                            break
                    except Exception:
                        continue
            else:
                self.log("No scripts founds????")
        except Exception as e:
            self.log("Exception thrown getting scripts:", e)
            
        try:
#             title = self.parse_title(head)
            title = page_metadata["title"]
        except:
            self.log.exception('Error parsing page for title: url=%r'%self.url)
            title = None

        try:
            authors = self.parse_authors(page_metadata["contributors"])
        except:
            self.log.exception('Error parsing page for authors: url=%r'%self.url)
            authors = []

        if not title or not authors or not smashwords_id:
            self.log.error('Could not find title/authors/SmashwordsMetaData id for %r'%self.url)
            self.log.error('Smashwords: %r Title: %r Authors: %r'%(smashwords_id, title, authors))
            return

        mi = Metadata(title, authors)
        mi.set_identifier(SmashwordsMetaData.ID_NAME, smashwords_id)
        self.smashwords_id = smashwords_id

#        self.log('parse_details - root: "%s"' % root)
        try:
            publisher = page_metadata.get('publisher', None)
            if publisher is not None:
                mi.publisher = publisher['display_name']
            pubdate_epoch = int(page_metadata['publication_date'])/1000
#             self.log("pubdate_epoch=", pubdate_epoch)
            mi.pubdate = datetime.datetime.fromtimestamp(pubdate_epoch)
        except:
            self.log.exception('Error parsing publisher and date for url: %r'%self.url)

        try:
            (mi.series, mi.series_index) = self.parse_series(page_metadata.get('series', None))
        except:
            self.log.exception('Error parsing series for url: %r'%self.url)

        try:
            mi.tags = self.parse_tags(page_metadata["tags"])
        except:
            self.log.exception('Error parsing tags for url: %r'%self.url)

        try:
            mi.rating = self.parse_rating(root)
        except:
            self.log.exception('Error parsing ratings for url: %r'%self.url)

        try:
            self.cover_url = self.parse_cover(head)
        except:
            self.log.exception('Error parsing cover for url: %r'%self.url)
        mi.has_cover = bool(self.cover_url)

        try:
            mi.comments = self.parse_comments(page_metadata)
        except:
            self.log.exception('Error parsing comments for url: %r'%self.url)

        try:
            isbn = page_metadata.get('isbn13', None)
            if isbn:
                self.isbn = mi.isbn = isbn
        except:
            self.log.exception('Error parsing ISBN for url: %r'%self.url)

        try:
            language = self.parse_language(page_metadata['language'])
            if language:
                self.lang = mi.language = language
        except:
            self.log.exception('Error parsing languages for url: %r'%self.url)

        mi.source_relevance = self.relevance

        if self.smashwords_id:
            if self.cover_url:
                self.plugin.cache_identifier_to_cover_url(self.smashwords_id, self.cover_url)

        self.plugin.clean_downloaded_metadata(mi)

        self.result_queue.put(mi)

    def parse_smashwords_id(self, url):
        return re.search(SmashwordsMetaData.STORE_DOMAIN + SmashwordsMetaData.BOOK_PATH + '/(.*)', url).groups(0)[0]

    def parse_series(self, series_info):
#         self.log('parse_series - series_info: "%s"' % series_info)
        series_name = None
        series_index = None
        if series_info is not None:
            series_name = series_info[0]['name']
            try:
                series_index = float(series_info[0]['number'])
            except:
                pass

            custom_membership_statement = series_info[0].get('custom_membership_statement', '').strip()
            if len(custom_membership_statement) > 0:
                series_index_candiates = custom_membership_statement.split()
                for candidate in reversed(series_index_candiates):
                    try:
                        self.log('parse_series - candidate:', candidate)
                        series_index = float(candidate)
                    except:
                        pass

        return (series_name, series_index)

    def parse_authors(self, contributors):
#         self.log('parse_authors - contributors: "%s"' % contributors)
        authors = [a['account']["display_name"].strip() for a in contributors]

        def ismatch(authors):
            authors = lower(' '.join(authors))
            amatch = not self.match_authors
            for a in self.match_authors:
                if lower(a) in authors:
                    amatch = True
                    break
            if not self.match_authors: amatch = True
            return amatch

        if not self.match_authors or ismatch(authors):
#            self.log('parse_authors - authors:', authors)
            return authors
        self.log('Rejecting authors as not a close match: ', ','.join(authors))

    def parse_comments(self, page_metadata):
        # Just get the description from the page metadata.
        # Note: Both descriptions appear to be set, but can be an empty string.
        short_description_text = page_metadata.get('short_description', '')
        description_text = short_description_text
        if self.use_long_description:
            description_text = page_metadata.get('long_description', '')
        if len(description_text) == 0:
            description_text = short_description_text.strip()
        return description_text.strip()

    def parse_rating(self, page_metadata):
        return  page_metadata.get('rating', None)

    def parse_cover(self, head):
        cover_url = head.xpath('//meta[@property="og:image"]/@content')
        if cover_url is not None and len(cover_url) > 0:
            cover_url = cover_url[0].strip()
            return cover_url
        self.log('parse_cover - no cover URL found in metadata.')

    def parse_language(self, language):
#         self.log('parse_language - language: "%s"' % language)
        lang = language['iso_code']
        return lang

    def parse_tags(self, tags):
#         self.log('parse_tags - tags: "%s"' % tags)
        ans = [tag.strip() for tag in tags]
        return ans