#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
                        print_function)

__license__   = 'GPL v3'
__copyright__ = '2015, David Forrester <davidfor@internode.on.net>'
__docformat__ = 'restructuredtext en'

import time, re
try:
    from urllib.parse import quote, unquote
except ImportError:
    from urllib import quote, unquote
try:
    from queue import Empty, Queue
except ImportError:
    from Queue import Empty, Queue
import six
from six import text_type as unicode
from mechanize import HTTPError

from lxml.html import fromstring, tostring

from calibre import as_unicode
from calibre.ebooks.metadata import check_isbn
from calibre.ebooks.metadata.sources.base import (Source, Option)
from calibre.utils.icu import lower
from calibre.utils.cleantext import clean_ascii_chars
from calibre.utils.localization import get_udc


class SmashwordsMetaData(Source):

    name                    = 'Smashwords Metadata'
    description             = _('Downloads metadata and covers from Smashwords')
    author                  = 'David Forrester'
    version                 = (1, 0, 2)
    minimum_calibre_version = (0, 8, 0)

    ID_NAME = 'smashwords'
    capabilities = frozenset(['identify', 'cover'])
    touched_fields = frozenset(['title', 'authors', 'identifier:smashwords', 'rating', 'languages',
                                'comments', 'publisher', 'pubdate', 'series', 'tags'])
    has_html_comments = True
    supports_gzip_transfer_encoding = True

    SAMPLE_SEARCH_ISBN = "https://www.smashwords.com/books/search?query=9781311012364"
    SAMPLE_SEARCH_TEXT = "https://www.smashwords.com/books/search?query=free+wrench"
    SAMPLE_BOOK        = "https://www.smashwords.com/books/view/505656"
    
    STORE_DOMAIN = 'www.smashwords.com'
    BASE_URL = 'https://' + STORE_DOMAIN
    BOOK_PATH = '/books/view'
    SEARCH_PATH = '/books/search'

    CATEGORY_HANDLING = {
                         'top_level_only': _('Top level only'),
                         'hierarchy': _('Hierarchy'),
                         'individual_tags': _('Individual tags')
                         }
    FILTERING = {
                 'no_erotica': _('Exclude erotica'),
                 'no_taboo': _('Include mainstream erotica'),
                 'no_filtering': _('Include all erotica')
                }

    options = (
            Option(
                   'use_long_description',
                   'bool',
                   'false',
                   _('Use long description:'),
                   _('Use the long description if there is one.'),
                   ),
#             Option(
#                    'category_handling',
#                    'choices',
#                    'individual_tags',
#                    _('Category handling:'),
#                    _('How to handle categories if they have more than one level.'),
#                    choices=CATEGORY_HANDLING
#                    ),
#             Option(
#                    'filtering',
#                    'choices',
#                    'no_erotica',
#                    _('Filtering:'),
#                    _('Set filtering level for adult content.'),
#                    choices=FILTERING
#                    ),
               )

    @property
    def category_handling(self):
        x = getattr(self, 'cat_handling', None)
        if x is not None:
            return x
        cat_handling = self.prefs.get('category_handling', self.CATEGORY_HANDLING['individual_tags'])

        return cat_handling

    @property
    def filtering(self):
        x = getattr(self, 'test_filtering', None)
        if x is not None:
            return x
        filtering = self.prefs.get('filtering', 'no_filtering')

        return filtering

    @property
    def use_long_description(self):
        x = getattr(self, 'test_use_long_description', None)
        if x is not None:
            return x
        use_long_description = self.prefs.get('use_long_description', False)

        return use_long_description


    def get_book_url_name(self, idtype, idval, url):
        return 'Smashwords'

    def get_book_url(self, identifiers):
        smashwords_id = identifiers.get(self.ID_NAME, None)
        if smashwords_id:
            return (self.ID_NAME, smashwords_id,
                    '%s%s/%s'%(self.BASE_URL, self.BOOK_PATH, smashwords_id))

    def id_from_url(self, url):
#         print("Smashwords::id_from_url - url=", url)
#         print("Smashwords::id_from_url - generic pattern:", self.BASE_URL + ".*" + self.BOOK_PATH + "(.*)?", url)
        match = re.match(self.BASE_URL + self.BOOK_PATH + "/(.*)?", url)
        if match:
#             print("Smashwords::id_from_url - have match using generic URL")
            return (self.ID_NAME, match.groups(0)[0])
        return None
        

    def get_cached_cover_url(self, identifiers):
        url = None
        smashwords_id = identifiers.get(self.ID_NAME, None)
        if smashwords_id is None:
            isbn = identifiers.get('isbn', None)
            if isbn is not None:
                smashwords_id = self.cached_isbn_to_identifier(isbn)
        if smashwords_id is not None:
            url = self.cached_identifier_to_cover_url(smashwords_id)
        return url

    def create_query(self, log, title=None, identifiers={}):
        q = ''
        isbn = check_isbn(identifiers.get('isbn', None))
        if isbn is not None:
            q = isbn
        elif title:
#             log('create_query - title: "%s"'%(title))
            title = get_udc().decode(title)
#             log('create_query - after decode title: "%s"'%(title))
            tokens = []
            title_tokens = list(self.get_title_tokens(title,
                                strip_joiners=False, strip_subtitle=True))
#             log('create_query - title_tokens: "%s"'%(title_tokens))
            tokens = [quote(t.encode('utf-8') if isinstance(t, unicode) else t) for t in title_tokens]
            q = '+'.join(tokens)
        if not q:
            return None
        return '%s%s?query=%s'%(self.BASE_URL, self.SEARCH_PATH, q)

    def identify(self, log, result_queue, abort, title=None, authors=None,
            identifiers={}, timeout=30):
        '''
        Note this method will retry without identifiers automatically if no
        match is found with identifiers.
        '''
        matches = []
#         log('identify - title: "%s" authors= "%s"'%(title, authors))

        # If we have a Smashwords id then we do not need to fire a "search".
        # Instead we will go straight to the URL for that book.
        smashwords_id = identifiers.get(self.ID_NAME, None)
        br = self.browser
        if smashwords_id:
            matches.append(('%s%s/%s'%(self.BASE_URL, self.BOOK_PATH, smashwords_id), None))
#            log("identify - smashwords_id=", smashwords_id)
#            log("identify - matches[0]=", matches[0])
        else:
            query = self.create_query(log, title=title, identifiers=identifiers)
            if query is None:
                log.error('Insufficient metadata to construct query')
                return
            try:
                log.info('Querying: %s'%query)
                br.set_cookie('erotica', self.filtering, '.smashwords.com')
                raw = br.open_novisit(query, timeout=timeout).read()
#                 raw = br.open(query, timeout=timeout).read()
#                 open('E:\\t.html', 'wb').write(raw)
            except HTTPError as e:
                if e.code == 404: # If the ISBN is not found, a 404 error will be returned.
                    raw = None
                else:
                    err = 'Error running query: %r, result code =%d' % (query, e.code)
                    log.exception(err)
                    return as_unicode(e)
            except Exception as e:
                err = 'Failed to make identify query: %r'%query
                log.exception(e)
                log.exception(err)
                return as_unicode(e)

            if raw is not None:
                root = fromstring(clean_ascii_chars(raw))
                # Now grab the match from the search result, provided the
                # title appears to be for the same book
                self._parse_search_results(log, title, authors, root, matches, timeout)

        if abort.is_set():
            return

        if not matches:
            if identifiers and title:
                log('No matches found with identifiers, retrying using'
                        ' title and authors. Query: %r'%query)
                return self.identify(log, result_queue, abort, title=title, authors=authors, timeout=timeout)
            log.error('No matches found with query: %r'%query)
            return

        from calibre_plugins.smashwordsmetadata.worker import Worker
        author_tokens = list(self.get_author_tokens(authors))
        workers = [Worker(data[0], data[1], author_tokens, result_queue, br, log, i, self) for i, data in
                enumerate(matches)]

        for w in workers:
            w.start()
            # Don't send all requests at the same time
            time.sleep(0.1)

        while not abort.is_set():
            a_worker_is_alive = False
            for w in workers:
                w.join(0.2)
                if abort.is_set():
                    break
                if w.is_alive():
                    a_worker_is_alive = True
            if not a_worker_is_alive:
                break

        return None

    def _parse_search_results(self, log, orig_title, orig_authors, root, matches, timeout):
        '''
        Search results are broken into several sections:
            - Title Matches
            - Series Matches
            - Author Matches
            - Top Category Matches
            - Full Search 
        
        The first three will be there if there is a match. The last will always be there. The
        plugin checks the title matches. If there are no matches, it then checks the full search
        section. 
        '''

        title_tokens = list(self.get_title_tokens(orig_title))
        author_tokens = list(self.get_author_tokens(orig_authors))

        def ismatch(title, authors):
            authors = lower(' '.join(authors))
            title = lower(title)
#             log.error("ismatch - title: '%s'" % (title))
#             log.error("ismatch - title_tokens: '%s'" % (title_tokens))
#             log.error("ismatch - authors: '%s'" % (authors))
#             log.error("ismatch - author_tokens: '%s'" % (author_tokens))
            match = not title_tokens
            for t in title_tokens:
                if lower(t) in title:
                    match = True
                    break
            amatch = not author_tokens
            for a in author_tokens:
                if lower(a) in authors:
                    amatch = True
                    break
            if not author_tokens: amatch = True
            return match and amatch

        search_sections = ["Title Matches"]
        found_ids = []
        max_results = 5
        # Check if the search result is the book.
#         log.error('root: %s' % (tostring(root)))
        head_section = root.head
#         log.error('head_section: %s' % (tostring(head_section)))
        meta_url = head_section.xpath('//meta[@property="og:url"]')
        if len(meta_url) > 0:
#             log.error("Have URL in meta properties: %s" % tostring(meta_url[0]))
            url = meta_url[0].xpath('./@content')[0]
#             log.error("Have URL in meta properties: url=%s" % url)
            matches.append((url, ''))
        else:
            title_match_section = root.xpath('//div[@id="pageContent"]/div/div[@class="well"]/h2')
#             if len(title_match_section) > 0 and title_match_section[0].text.strip() == "Title Matches":
#                 log.error('title_match_section: %s' % (tostring(title_match_section[0])))
            for search_section in root.xpath('//div[@id="pageContent"]/div/div[@class="well"]'):
#                 log.error('search_section: %s' % (tostring(search_section)))
                search_section_title = search_section.xpath('./h2')[0].text.strip()
                if search_section_title not in search_sections:
#                     log.error('Have search section: "%s" - Skipping' % search_section_title)
                    continue
#                 try:
#                     log.error('search_section - div library-book: %s' % (tostring(search_section.xpath('./div[@class="library-book"]')[0])))
#                 except:
#                     log.error('Exception looking for library-book')
#                     pass
                for data in search_section.xpath('./div[@class="library-book"]'):
#                     log.error('data: %s' % (tostring(data)))
                    item_info = data.xpath('./div')[1]
                    title_ref = item_info.xpath('./a[@class="library-title"]')[0]
#                     log.error("title_ref: ", tostring(title_ref))
                    smashwords_id = title_ref.xpath('./@href')[0]
#                     log.error("smashwords_id: ", smashwords_id)
                    smashwords_id = smashwords_id.split('/')
#                     log.error("smashwords_id: ", smashwords_id)
                    smashwords_id = smashwords_id[-1].strip()
#                     log.error("smashwords_id: '%s'" % (smashwords_id))
                    if not smashwords_id:
                        log.error("Skipping in '%s' section as have no Smashwords ID" % (search_section_title, ))
                        continue
        
                    authors_ref = item_info.xpath('./span[@class="library-by-line"]/a[@itemprop="author"]/span')
#                     log.error("authors_ref: ", authors_ref)
                    authors = [author.text.strip() for author in authors_ref]
#                     log.error("authors: ", authors)

                    title = title_ref.text.strip()
#                     authors = authors_ref.text.strip()
#                     log.error("title: '%s'" % (title))
#                     log.error("authors: '%s'" % (authors))
                    if not ismatch(title, authors):
#                         log.error("Rejecting as not close enough match - title='%s', authors='%s'" % (title, authors))
                        continue
#                     log.error("Have close enough match - title='%s', authors='%s', id='%s'" % (title, authors, smashwords_id))
                    if not smashwords_id in found_ids:
#                         log.error("Adding match to found_ids - title='%s', authors='%s', id='%s'" % (title, authors, smashwords_id))
                        found_ids.append(smashwords_id)
                    if len(matches) >= max_results:
                        break

            title_match_section = root.xpath('//div[@id="pageCenterContent"]/h2')
            if len(title_match_section) > 0 and title_match_section[0].text.strip() == "Full Search":
                log.error('Search results section: %s' % (tostring(title_match_section[0])))
                for search_section in root.xpath('//div[@id="pageCenterContent"]'):
                    log.error('search_section: %s' % (tostring(search_section)))
                    for data in search_section.xpath('./div[contains(@class,"library-book")]'):
                        log.error('data: %s' % (tostring(data)))
                        item_info = data.xpath('./div')[1]
                        title_ref = item_info.xpath('./div/span[@class="library-title"]/a')[0]
                        log.error("title_ref: ", tostring(title_ref))
                        smashwords_id = title_ref.xpath('./@href')[0]
                        log.error("smashwords_id: ", smashwords_id)
                        smashwords_id = smashwords_id.split('/')
                        log.error("smashwords_id: ", smashwords_id)
                        smashwords_id = smashwords_id[-1].strip()
                        log.error("smashwords_id: '%s'" % (smashwords_id))
            #            smashwords_id = smashwords_id[len(smashwords_id) - 1]
            #            log.error("smashwords_id: ", smashwords_id)
            #            log('_parse_search_results - smashwords_id: %s'%(smashwords_id))
                        if not smashwords_id:
                            continue
            
                        authors_ref = item_info.xpath('./div/span[@class="library-by-line"]/a')
                        log.error("authors_ref: ", authors_ref)
                        authors = [author.text.strip() for author in authors_ref]
                        log.error("authors: ", authors)
#                         authors_ref = item_info.xpath('./div/span[@class="library-by-line"]/a')[0]
#                         log.error("authors_ref: ", tostring(authors_ref))
    
                        title = title_ref.text.strip()
                        log.error("title: '%s'" % (title))
#                         authors = authors_ref.text.strip()
#                         log.error("authors: '%s'" % (authors))
                        if not ismatch(title, authors):
                            log.error("Rejecting as not close enough match - title='%s', authors='%s'" % (title, authors))
                            continue
                        log.error("Have close enough match - title='%s', authors='%s', id='%s'" % (title, authors, smashwords_id))
#                         if not smashwords_id in found_ids:
#                             log.error("Adding match to found_ids - title='%s', authors='%s', id='%s'" % (title, authors, smashwords_id))
                        if not ismatch(title, authors):
                            log.error('Rejecting as not close enough match: %s'%(title))
                            continue
                        log.error("Have close enough match - title='%s', id='%s'" % (title, smashwords_id))
                        if not smashwords_id in found_ids:
                            log.error("Adding match to found_ids - title='%s', id='%s'" % (title, smashwords_id))
                            found_ids.append(smashwords_id)
                        if len(matches) >= max_results:
                            break
            else:
                log.error('No Full Search section. Has the page changed?')

            log.error("Building match list from found_ids:", found_ids)
            for smashwords_id in found_ids:
                matches.append(('%s%s/%s'%(self.BASE_URL, self.BOOK_PATH, smashwords_id), ''))
            log.error("Matches:", matches)

    def download_cover(self, log, result_queue, abort,
            title=None, authors=None, identifiers={}, timeout=30):
        cached_url = self.get_cached_cover_url(identifiers)
        if cached_url is None:
            log.info('No cached cover found, running identify')
            rq = Queue()
            self.identify(log, rq, abort, title=title, authors=authors,
                    identifiers=identifiers)
            if abort.is_set():
                return
            results = []
            while True:
                try:
                    results.append(rq.get_nowait())
                except Empty:
                    break
            results.sort(key=self.identify_results_keygen(
                title=title, authors=authors, identifiers=identifiers))
            for mi in results:
                cached_url = self.get_cached_cover_url(mi.identifiers)
                if cached_url is not None:
                    break
        if cached_url is None:
            log.info('No cover found')
            return

        if abort.is_set():
            return
        br = self.browser
        log('Downloading cover from:', cached_url)
        try:
            cdata = br.open_novisit(cached_url, timeout=timeout).read()
            result_queue.put((self, cdata))
        except:
            log.exception('Failed to download cover from:', cached_url)


if __name__ == '__main__': # tests
    # To run these test use:
    # calibre-debug -e __init__.py
    from calibre.ebooks.metadata.sources.test import (test_identify_plugin,
            title_test, authors_test, series_test)
    test_identify_plugin(self.name,
        [

            ( # A book with no ISBN specified
                {'title':"Turn Coat", 'authors':['Jim Butcher']},
                [title_test("Turn Coat",
                    exact=True), authors_test(['Jim Butcher']),
                    series_test('Dresden Files', 11.0)]

            ),

            ( # A book with an ISBN
                {'identifiers':{'isbn': '9780748111824'},
                    'title':"Turn Coat", 'authors':['Jim Butcher']},
                [title_test("Turn Coat",
                    exact=True), authors_test(['Jim Butcher']),
                    series_test('Dresden Files', 11.0)]

            ),

            ( # A book with a Smashwords id
                {'identifiers':{'smashwords': 'across-the-sea-of-suns-1'},
                    'title':'Across the Sea of Suns', 'authors':['Gregory Benford']},
                [title_test('Across the Sea of Suns',
                    exact=True), authors_test(['Gregory Benford']),
                    series_test('Galactic Centre', 2.0)]

            ),

        ])


