#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
                        print_function)

__license__   = 'GPL v3'
__copyright__ = '2014, David Forrester <davidfor@internode.on.net>'
__docformat__ = 'restructuredtext en'

import time, re, HTMLParser
from urllib import quote, unquote
from Queue import Queue, Empty

from lxml.html import fromstring, tostring

from calibre import as_unicode
from calibre.ebooks.metadata import check_isbn
from calibre.ebooks.metadata.sources.base import (Source, Option)
from calibre.utils.icu import lower
from calibre.utils.cleantext import clean_ascii_chars
from calibre.utils.localization import get_udc



class WorldsWithoutEnd(Source):

    name                    = 'WorldsWithoutEnd'
    description             = _('Downloads metadata and covers from WorldsWithoutEnd.com')
    author                  = 'David Forrester'
    version                 = (0, 0, 1)
    minimum_calibre_version = (0, 8, 0)

    capabilities = frozenset(['identify', 'cover'])
    touched_fields = frozenset(['title', 'authors', 'identifier:wwend',
                                'comments', 'publisher', 'pubdate', 'series', 'tags'])
    has_html_comments = True
    supports_gzip_transfer_encoding = True

    STORE_DOMAIN = 'worldswithoutend.com'
    BASE_URL = 'https://' + STORE_DOMAIN
    BOOK_PATH = '/novel.asp?id='
    SEARCH_PATH = '/searchwwe.asp'
    IDENTIFIER = 'wwend'


    def get_book_url(self, identifiers):
        worldswithoutend_id = identifiers.get(WorldsWithoutEnd.IDENTIFIER, None)
        if worldswithoutend_id:
            return ('WorldsWithoutEnd', worldswithoutend_id,
                    '%s%s%s'%(WorldsWithoutEnd.BASE_URL, WorldsWithoutEnd.BOOK_PATH, worldswithoutend_id))

    def get_cached_cover_url(self, identifiers):
        url = None
        worldswithoutend_id = identifiers.get(WorldsWithoutEnd.IDENTIFIER, None)
        if worldswithoutend_id is not None:
            url = self.cached_identifier_to_cover_url(worldswithoutend_id)
        return url

    def create_query(self, log, title=None, identifiers={}):
        title = get_udc().decode(title)
        title_tokens = list(self.get_title_tokens(title,
                            strip_joiners=False, strip_subtitle=True))
        tokens = [quote(t.encode('utf-8') if isinstance(t, unicode) else t) for t in title_tokens]
        q = '+'.join(tokens)
        q = 'st=%s' % q
        if not q:
            return None
        return '%s%s?%s'%(WorldsWithoutEnd.BASE_URL, WorldsWithoutEnd.SEARCH_PATH, q)

    def identify(self, log, result_queue, abort, title=None, authors=None,
            identifiers={}, timeout=30):
        '''
        Note this method will retry without identifiers automatically if no
        match is found with identifiers.
        '''
        matches = []

        # If we have a WorldsWithoutEnd id then we do not need to fire a "search".
        # Instead we will go straight to the URL for that book.
        worldswithoutend_id = identifiers.get(WorldsWithoutEnd.IDENTIFIER, None)
        br = self.browser
        if worldswithoutend_id:
            matches.append(('%s%s%s'%(WorldsWithoutEnd.BASE_URL, WorldsWithoutEnd.BOOK_PATH, worldswithoutend_id), None))
#            log("identify - worldswithoutend_id=", worldswithoutend_id)
#            log("identify - matches[0]=", matches[0])
        else:
            query = self.create_query(log, title=title, identifiers=identifiers)
            if query is None:
                log.error('Insufficient metadata to construct query')
                return
            try:
                log.info('Querying: %s'%query)
                raw = br.open_novisit(query, timeout=timeout).read()
#                open('E:\\t.html', 'wb').write(raw)
            except Exception as e:
                err = 'Failed to make identify query: %r'%query
                log.exception(err)
                return as_unicode(e)
            root = fromstring(clean_ascii_chars(raw))
            # Now grab the match from the search result, provided the
            # title appears to be for the same book
            self._parse_search_results(log, title, authors, root, matches, timeout)

        if abort.is_set():
            return

        if not matches:
            log.error('No matches found with query: %r'%query)
            return

        from calibre_plugins.worldswithoutend.worker import Worker
        author_tokens = list(self.get_author_tokens(authors))
        workers = [Worker(data[0], data[1], author_tokens, result_queue, br, log, i, self) for i, data in
                enumerate(matches)]

        for w in workers:
            w.start()
            # Don't send all requests at the same time
            time.sleep(0.1)

        while not abort.is_set():
            a_worker_is_alive = False
            for w in workers:
                w.join(0.2)
                if abort.is_set():
                    break
                if w.is_alive():
                    a_worker_is_alive = True
            if not a_worker_is_alive:
                break

        return None

    def _parse_search_results(self, log, orig_title, orig_authors, root, matches, timeout):

        title_tokens = list(self.get_title_tokens(orig_title))
        author_tokens = list(self.get_author_tokens(orig_authors))
        max_results = 5

        def ismatch(title, authors):
            authors = lower(' '.join(authors))
            title = lower(title)
            match = not title_tokens
            for t in title_tokens:
                if lower(t) in title:
                    match = True
                    break
            amatch = not author_tokens
            for a in author_tokens:
                if lower(a) in authors:
                    amatch = True
                    break
            if not author_tokens: amatch = True
            return match and amatch

        searchTitles = root.xpath('//form[@name="sjax"]/div/p[@class="searchtitle"]')
        searchAuthors = root.xpath('//form[@name="sjax"]/div/p[@class="searchauthor"]')
        log.info('len(searchTitles): %d' % (len(searchTitles)))
        log.info('len(searchAuthors): %d' % (len(searchAuthors)))
        for searchIndex, title in enumerate(searchTitles):
            log.info('title: ', title)
#            log.info('title: %s'%(tostring(title)))
#            log.error('data: %s'%(data.xpath('./a')))
#            log.error('data: %s'%(data.xpath('./a/@href')))
            worldswithoutend_id = ''.join(title.xpath('./a/@href'))
            worldswithoutend_id = worldswithoutend_id.split('=')
            worldswithoutend_id = worldswithoutend_id[-1].strip()
            log('_parse_search_results - worldswithoutend_id: %s' % (worldswithoutend_id))
            if not worldswithoutend_id:
                continue

#            log.error('data: %s'%(tostring(data.xpath('./a'))))
#            log.error('data: %s'%(tostring(data.xpath('./a/p'))))
#            log.error('data: %s'%(data.xpath('./a/p/span/text()')))
            title = ''.join(title.xpath('./a/text()'))
            log('_parse_search_results - title: %s'%(title))
            authors = searchAuthors[searchIndex]
            authors = authors.xpath('./a/text()')
            log('_parse_search_results - authors: %s'%(authors))
            if not ismatch(title, authors):
                log.error('Rejecting as not close enough match: %s - %s' % (title, authors))
                continue
            publisher = ''#.join(data.xpath('./li/a/a/text()'))
            url = '%s%s%s'%(WorldsWithoutEnd.BASE_URL, WorldsWithoutEnd.BOOK_PATH, worldswithoutend_id)
            matches.append((url, publisher))
            if len(matches) >= max_results:
                break


    def download_cover(self, log, result_queue, abort,
            title=None, authors=None, identifiers={}, timeout=30):
        cached_url = self.get_cached_cover_url(identifiers)
        log.info('download_cover - start')
        if cached_url is None:
            log.info('No cached cover found, running identify')
            rq = Queue()
            self.identify(log, rq, abort, title=title, authors=authors,
                    identifiers=identifiers)
            if abort.is_set():
                return
            results = []
            while True:
                try:
                    results.append(rq.get_nowait())
                except Empty:
                    break
            results.sort(key=self.identify_results_keygen(
                title=title, authors=authors, identifiers=identifiers))
            for mi in results:
                cached_url = self.get_cached_cover_url(mi.identifiers)
                if cached_url is not None:
                    break
        if cached_url is None:
            log.info('No cover found')
            return

        if abort.is_set():
            return
        br = self.browser
        log('Downloading cover from:', cached_url)
        try:
            cdata = br.open_novisit(cached_url, timeout=timeout).read()
            result_queue.put((self, cdata))
        except:
            log.exception('Failed to download cover from:', cached_url)


if __name__ == '__main__': # tests
    # To run these test use:
    # calibre-debug -e __init__.py
    from calibre.ebooks.metadata.sources.test import (test_identify_plugin,
            title_test, authors_test, series_test)
    test_identify_plugin(WorldsWithoutEnd.name,
        [

            ( # A book with no ISBN specified
                {'title':"Turn Coat", 'authors':['Jim Butcher']},
                [title_test("Turn Coat",
                    exact=True), authors_test(['Jim Butcher']),
                    series_test('The Dresden Files', 11.0)]

            ),

            ( # A book with an ISBN
                {'identifiers':{'isbn': '9780748111824'},
                    'title':"Turn Coat", 'authors':['Jim Butcher']},
                [title_test("Turn Coat",
                    exact=True), authors_test(['Jim Butcher']),
                    series_test('The Dresden Files', 11.0)]

            ),

            ( # A book with a WorldsWithoutEnd id
                {'identifiers':{WorldsWithoutEnd.IDENTIFIER: 'across-the-sea-of-suns-1'},
                    'title':'Across the Sea of Suns', 'authors':['Gregory Benford']},
                [title_test('Across the Sea of Suns',
                    exact=True), authors_test(['Gregory Benford']),
                    series_test('Galactic Center Saga', 2.0)]

            ),

        ])


