# coding=utf-8

"""Anobii Fetcher - Anobii metadata fetcher plugin for calibre."""

#########################################################################
#  The MIT License (MIT)
#
#  Copyright (c) 2013-2016 CIVA LIN (林雪凡)
#  The Anobii Fetcher is licensed under the MIT license provided
#  below by supporting from the sponsor "Open Source Software Foundry, OSSF"
#  of Academia Sinica at: http://www.openfoundry.org/
#
#  Permission is hereby granted, free of charge, to any person obtaining a
#  copy of this software and associated documentation files
#  (the "Software"), to deal in the Software without restriction, including
#  without limitation the rights to use, copy, modify, merge, publish,
#  distribute, sublicense, and/or sell copies of the Software, and to
#  permit persons to whom the Software is furnished to do so,
#  subject to the following conditions:
#
#  The above copyright notice and this permission notice shall be included
#  in all copies or substantial portions of the Software.
#
#  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
#  OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
#  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
#  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
#  CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
#  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
#  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
##########################################################################

import urllib
import urllib2
import urlparse as UP
import threading
import re

import lxml.html
import lxml.etree

from calibre.ebooks.metadata.sources.base import Source
from calibre.ebooks.metadata.book.base import Metadata
from calibre.utils.date import parse_date


SITEID_NAME = 'anobii_id'  # site used id name, should add to touched_field


def percentilize(word):
    """Encode the "word" as percentage encoding."""
    return urllib.quote_plus(
        word.encode('utf-8') if isinstance(word, unicode) else word,
        safe='/')


def normalized(url):
    pr = UP.urlparse(url)
    new_pr = UP.ParseResult(
        scheme=pr.scheme,
        netloc=pr.netloc,
        path=percentilize(pr.path),
        params=pr.params,
        query=urllib.urlencode(
            {k: ''.join(v) for k, v in UP.parse_qs(pr.query).items()}),
        fragment=pr.fragment)
    return new_pr.geturl()


def get_content(url):
    url = normalized(url)
    f = urllib2.urlopen(url)
    content = unicode(f.read(), 'utf-8', 'replace')
    f.close()
    return content


def get_root_elem(url):
    return lxml.html.fromstring(get_content(url))


class AnobiiMetaParser:
    """Parsing anobii book page."""

    @staticmethod
    def __get_book_elem(root):
        return root.xpath(u'.//div[@itemtype="http://schema.org/Book"]')[0]

    @classmethod
    def get_title(cls, root):
        """Get title from root element."""
        return cls.__get_book_elem(root).findtext(
            u'.//h1[@itemprop="name"]').strip()

    @classmethod
    def get_authors(cls, root):
        """Get authors from root element."""
        return [e.text.strip() for e
                in cls.__get_book_elem(root).findall(
                    u'.//*[@itemprop="author"]')]

    @classmethod
    def get_siteid(cls, root):
        """Get anobii site id from root element."""
        script_text = u'\n'.join([
            e.text or u'' for e in root.findall(u'.//script')])
        return re.search(u'encryptItemId:\s*"(\w+)"', script_text).group(1)

    @classmethod
    def get_isbn(cls, root):
        """Get isbn from root element."""
        isbns = cls.__get_book_elem(root).xpath(
            u'.//span[@itemprop="isbn"]/text()')
        isbns = sorted(isbns, cmp=lambda x, y: len(x) - len(y), reverse=True)
        if isbns:
            isbn = isbns[0].strip()
        else:
            isbn = None
        return isbn

    @classmethod
    def get_publisher(cls, root):
        """Get publisher from root element."""
        publishers = cls.__get_book_elem(root).xpath(
            u'.//span[@itemprop="publisher"]/text()')
        if publishers:
            publisher = publishers[0].strip()
        else:
            publisher = None
        return publisher

    @classmethod
    def get_pubdate(cls, root):
        """Get published date from root element."""
        pubdates = cls.__get_book_elem(root).xpath(
            u'.//span[@itemprop="datePublished"]/@content')
        if pubdates:
            pubdate = parse_date(pubdates[0], assume_utc=True)
        else:
            pubdate = None
        return pubdate

    @classmethod
    def get_desc(cls, root):
        """Get description (comment) from root element."""
        e_desc = cls.__get_book_elem(root).find(
            u'.//div[@itemprop="description"]')
        if e_desc.text_content().strip():
            desc = lxml.etree.tostring(e_desc).strip()
        else:
            desc = None
        return desc

    @classmethod
    def get_metadata(cls, root, source_relevance=0):
        """return all metadata from root element."""
        mi = Metadata(
            title=AnobiiMetaParser.get_title(root),
            authors=AnobiiMetaParser.get_authors(root))
        mi.identifiers = {}
        mi.identifiers[SITEID_NAME] = AnobiiMetaParser.get_siteid(root)
        mi.isbn = AnobiiMetaParser.get_isbn(root)
        mi.publisher = AnobiiMetaParser.get_publisher(root)
        mi.pubdate = AnobiiMetaParser.get_pubdate(root)
        mi.comments = AnobiiMetaParser.get_desc(root)
        mi.source_relevance = source_relevance
        return mi


class AnobiiMetaSource(Source):
    """Anobii Metadata Source plugin"""

    name = 'Anobii Fetcher'
    description = 'Get the metadata of book from anobii.com'
    supported_platforms = ['windows', 'osx', 'linux']
    author = u'Civa Lin'
    version = (1, 1, 0)
    minimum_calibre_version = (0, 8, 60)

    capabilities = frozenset(['identify', 'cover'])
    touched_fields = frozenset([
        'title', 'authors',
        'pubdate', 'comments', 'publisher',
        'identifier:isbn', 'identifier:{}'.format(SITEID_NAME)])
    has_html_comments = True
    cached_cover_url_is_reliable = True
    supports_gzip_transfer_encoding = True

    def identify(self, log, result_queue, abort,
                 title=None, authors=None, identifiers={}, timeout=30):
        """Inherited."""
        urls = self.__get_book_urls(title=title, authors=authors,
                                    identifiers=identifiers)
        threads = []
        for order, url in enumerate(urls):
            thread = threading.Thread(
                target=self.__fetch_book_metadata,
                kwargs={
                    'url': url,
                    'result_queue': result_queue,
                    'order': order})
            thread.start()
            threads.append(thread)
        for t in threads:
            t.join()

    def download_cover(
            self, log, result_queue, abort, title=None, authors=None,
            identifiers={}, timeout=30, get_best_cover=False):
        """Inherited from parent."""
        cached_url = self.get_cached_cover_url(identifiers=identifiers)
        if abort.is_set():
            return

        br = self.browser
        try:
            cdata = br.open_novisit(cached_url, timeout=timeout).read()
            if cdata:
                result_queue.put((self, cdata))
        except:
            log.exception('Failed to download cover from:', cached_url)

    def get_cached_cover_url(self, identifiers):
        """Inherited from parent."""
        return ('http://image.anobii.com/anobi/image_book.php?item_id={}'
                .format(identifiers.get('anobii_id')))

    def __fetch_book_metadata(self, url, result_queue, order):
        """Push metadata from url to result_queue."""
        root = get_root_elem(url)
        mi = AnobiiMetaParser.get_metadata(root, source_relevance=order)
        self.clean_downloaded_metadata(mi)
        result_queue.put(mi)

    def __get_book_urls(self, title, authors, identifiers):
        """Get all book urls in search result page."""
        def get_search_url(title, authors, identifiers):
            """Get search url."""
            keywords = [title or u'']
            keywords.extend(authors or [])
            return u'http://www.anobii.com/search?keyword={}'.format(
                ' '.join(keywords))

        search_url = get_search_url(title, authors, identifiers)
        root = get_root_elem(search_url)
        urls = []
        books = root.findall('.//div[@class="shelf"]//tr')
        urls.extend([
            normalized(e.find('.//a[@class="cover_image"]').get('href'))
            for e in books])

        return urls
