#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai

__license__ = "GPL v3"
__copyright__ = "2011, Kovid Goyal <kovid@kovidgoyal.net>; 2011, Li Fanxi <lifanxi@freemindworld.com>"
__docformat__ = "restructuredtext en"

import time

try:
    from queue import Empty, Queue
except ImportError:
    from Queue import Empty, Queue

from calibre.ebooks.metadata import check_isbn
from calibre.ebooks.metadata.sources.base import Option, Source
from calibre.ebooks.metadata.book.base import Metadata
from calibre import as_unicode

from bs4 import BeautifulSoup
import time
import random
import urllib.request
import sys
import re

NAMESPACES = {
    "openSearch": "http://a9.com/-/spec/opensearchrss/1.0/",
    "atom": "http://www.w3.org/2005/Atom",
    "db": "https://www.douban.com/xmlns/",
    "gd": "http://schemas.google.com/g/2005",
}


class Douban(Source):
    name = "Douban Books v2"
    author = "Li Fanxi, xcffl, jnozsc, else"
    version = (4, 2, 1)
    minimum_calibre_version = (5, 0, 0)

    description = (
        "Downloads metadata and covers from Douban.com. "
        "Useful only for Chinese language books."
    )

    capabilities = frozenset(["identify", "cover"])
    touched_fields = frozenset(
        [
            "title",
            "authors",
            "tags",
            "pubdate",
            "comments",
            "publisher",
            "identifier:isbn",
            "rating",
            "identifier:douban",
        ]
    )  # language currently disabled
    supports_gzip_transfer_encoding = True
    cached_cover_url_is_reliable = True

    ISBN_URL = "http://douban.com/isbn/"
    SUBJECT_URL = "http://book.douban.com/subject/"

    options = (
        Option(
            "include_subtitle_in_title",
            "bool",
            True,
            ("Include subtitle in book title:"),
            ("Whether to append subtitle in the book title."),
        ),
    )

    def identify(
        self,
        log,
        result_queue,
        abort,
        title=None,
        authors=None,
        identifiers={},
        timeout=30,
    ):
        import json

        log.info("start get metadata from douban...")
        log.info(str(identifiers))
        # query = self.create_query(log, title=title, authors=authors, identifiers=identifiers)
        book = self.get_book(log, identifiers)

        # There is no point running these queries in threads as douban
        # throttles requests returning 403 Forbidden errors
        self.get_all_details(log, book, abort, result_queue, timeout)

        return None

    def to_metadata(self, log, entry_, timeout):  # {{{
        from calibre.utils.date import parse_date, utcnow

        log.info("to_metadata")
        douban_id = entry_.get("id")
        title = entry_.get("title")
        description = entry_.get("summary")
        # subtitle = entry_.get('subtitle')  # TODO: std metada doesn't have this field
        publisher = entry_.get("publisher")
        isbn = entry_.get("isbn13")  # ISBN11 is obsolute, use ISBN13
        pubdate = entry_.get("pubdate")
        authors = entry_.get("author")
        book_tags = entry_.get("tags")
        rating = entry_.get("rating")
        cover_url = entry_.get("images", {}).get("large")
        series = entry_.get("series")

        if not authors:
            authors = [("Unknown")]
        if not douban_id or not title:
            # Silently discard this entry
            return None

        mi = Metadata(title, authors)
        mi.identifiers = {"douban": douban_id}
        mi.publisher = publisher
        mi.comments = description
        # mi.subtitle = subtitle

        # ISBN
        isbns = []
        if isinstance(isbn, (type(""), bytes)):
            if check_isbn(isbn):
                isbns.append(isbn)
        else:
            for x in isbn:
                if check_isbn(x):
                    isbns.append(x)
        if isbns:
            mi.isbn = sorted(isbns, key=len)[-1]
        mi.all_isbns = isbns

        # Tags
        mi.tags = [tag["name"] for tag in book_tags]

        # pubdate
        if pubdate:
            try:
                default = utcnow().replace(day=15)
                mi.pubdate = parse_date(pubdate, assume_utc=True, default=default)
            except:
                log.error("Failed to parse pubdate %r" % pubdate)

        if rating:
            try:
                my_rate = float(rating["average"])
                mi.publisher += "#PrB.rating#" + str(my_rate)
                mi.rating = float(rating["average"]) / 2.0
            except:
                log.exception("Failed to parse rating")
                mi.rating = 0

        # Cover
        mi.has_douban_cover = None
        u = cover_url
        if u:
            # If URL contains "book-default", the book doesn't have a cover
            if u.find("book-default") == -1:
                mi.has_douban_cover = u

        # Series
        if series:
            mi.series = series["title"]

        return mi

    # }}}

    def get_book_url(self, isbn):  # {{{
        if isbn is not None:
            return self.ISBN_URL + isbn
        else:
            return ""

    # }}}

    def get_book_isbn(self, identifiers):
        isbn = check_isbn(identifiers.get("isbn", None))
        return isbn

    def download_cover(
        self,
        log,
        result_queue,
        abort,  # {{{
        title=None,
        authors=None,
        identifiers={},
        timeout=30,
        get_best_cover=False,
    ):
        cached_url = self.get_cached_cover_url(identifiers)
        if cached_url is None:
            log.info("No cached cover found, running identify")
            rq = Queue()
            self.identify(
                log, rq, abort, title=title, authors=authors, identifiers=identifiers
            )
            if abort.is_set():
                return
            results = []
            while True:
                try:
                    results.append(rq.get_nowait())
                except Empty:
                    break
            results.sort(
                key=self.identify_results_keygen(
                    title=title, authors=authors, identifiers=identifiers
                )
            )
            for mi in results:
                cached_url = self.get_cached_cover_url(mi.identifiers)
                if cached_url is not None:
                    break
        if cached_url is None:
            log.info("No cover found")
            return

        if abort.is_set():
            return
        br = self.browser
        log("Downloading cover from:", cached_url)
        try:
            cdata = br.open_novisit(cached_url, timeout=timeout).read()
            if cdata:
                result_queue.put((self, cdata))
        except:
            log.exception("Failed to download cover from:", cached_url)

    # }}}

    def get_cached_cover_url(self, identifiers):  # {{{
        url = None
        db = identifiers.get("douban", None)
        if db is None:
            isbn = identifiers.get("isbn", None)
            if isbn is not None:
                db = self.cached_isbn_to_identifier(isbn)
        if db is not None:
            url = self.cached_identifier_to_cover_url(db)

        return url

    # }}}

    def get_all_details(self, log, book, abort, result_queue, timeout):  # {{{
        try:
            log.info("get_all_details")
            ans = self.to_metadata(log, book, timeout)
            if isinstance(ans, Metadata):
                ans.source_relevance = 0
                db = ans.identifiers["douban"]
                for isbn in getattr(ans, "all_isbns", []):
                    self.cache_isbn_to_identifier(isbn, db)
                if ans.has_douban_cover:
                    self.cache_identifier_to_cover_url(db, ans.has_douban_cover)
                self.clean_downloaded_metadata(ans)
                result_queue.put(ans)
        except:
            log.exception("Failed to get metadata for identify entry:", book["id"])
        if abort.is_set():
            return

    # }}}

    def get_book(self, log, identifiers={}):
        isbn = self.get_book_isbn(identifiers)
        log.info(isbn)
        url = self.get_book_url(isbn)
        log.info(url)
        html = self.__get_html(url)
        if html == -1:  # 无此书
            log.info("book not found: " + url)
            return -1

        soup = self.__get_soup(html=html)
        book = {"isbn13": isbn}
        book["id"] = self.__getId(soup=soup)
        book["tags"] = self.__getTags(soup=soup)
        book["series"] = self.__getSeries(soup=soup)
        book["rating"] = self.__get_score(soup=soup)
        book["title"] = self.__get_title(soup=soup)
        book["author"] = self.__get_author(soup=soup)
        book["summary"] = self.__get_intro(soup=soup)
        book["publisher"] = self.__get_publisher(soup=soup)
        book["pubdate"] = self.__get_publish_date(soup=soup)
        book["cover"] = self.__getCover(soup=soup)
        log.info(book)
        return book

    def __get_html(self, url):
        headers_ = {
            "User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36"
        }
        request = urllib.request.Request(url, headers=headers_)
        try:
            response = urllib.request.urlopen(request)
        except:
            return -1
        html = response.read().decode("utf-8")
        return html

    def __get_soup(self, html=""):
        soup = BeautifulSoup(html, "lxml", exclude_encodings="utf-8")
        return soup

    def __getSeries(self, soup):
        seriesSelect = str(soup.select("#info > a"))
        seriesTexts = seriesSelect.split(",")
        seriesText = seriesTexts[len(seriesTexts) - 1]
        r_seriel = r"<a href=(.*?)</a>]"
        ans = re.findall(r_seriel, seriesText)
        if len(ans) == 0:
            return ""
        else:
            series = ans[0]
            index = series.find('">') + 2
            serie = series[index:]
            return serie

    def __getId(self, soup):
        idSelects = str(soup.select("meta")).split()
        for item in idSelects:
            idIndex = item.find("douban.com/book/subject/")
            if idIndex != -1:
                id = item[idIndex + 24 : -2]
                return id
        return 0

    def __getTags(self, soup):
        tagSelect = str(soup.select("#db-tags-section > div"))
        tagTemp = BeautifulSoup(str(tagSelect), "lxml", exclude_encodings="utf-8")
        tagText = tagTemp.text
        tags = tagText.split()
        tags.remove("[")
        tags.remove("]")
        return tags

    def __getCover(self, soup):
        coverSelect = str(soup.select("#mainpic > a > img"))
        tempCover = str(
            BeautifulSoup(str(coverSelect), "lxml", exclude_encodings="utf-8")
        )
        index1 = tempCover.find("src=")
        tempCover = tempCover[index1 + 5 :]
        index2 = tempCover.find('"')
        tempCover = tempCover[:index2]
        return tempCover

    def __get_score(self, soup):
        soupSelect = str(
            soup.select("#interest_sectl > div > div.rating_self.clearfix > strong")
        )
        soupTemp = BeautifulSoup(str(soupSelect), "lxml", exclude_encodings="utf-8")
        score = soupTemp.find(property="v:average").contents[0].strip()
        if score:
            s = float(str(score).strip())
            return s
        else:
            return 0.0

    def __get_title(self, soup):
        soupSelect = str(soup.select("body>div>h1>span"))
        soupTemp = BeautifulSoup(str(soupSelect), "lxml", exclude_encodings="utf-8")
        return str(soupTemp.text).strip("[] \n\t")

    def __get_author(self, soup):
        soupSelect = str(soup.select("body>div>div>div>div>div>div>div>div>span>a")[0])
        soupTemp = BeautifulSoup(str(soupSelect), "lxml", exclude_encodings="utf-8")
        return str(soupTemp.text).strip()

    def __get_publisher(self, soup):
        r_publisher = r"出版社:</span>(.*?)<br/>"
        soupSelect = str(soup.select("body>div>div>div>div>div>div>div>div"))
        ans = re.findall(r_publisher, soupSelect)
        if len(ans) == 0:
            return ""
        else:
            return str(ans[0]).strip("[] \n\t")

    def __get_publish_date(self, soup):
        r_publish_date = r"出版年:</span>(.*?)<br/>"
        soupSelect = str(soup.select("body>div>div>div>div>div>div>div>div"))
        ans = re.findall(r_publish_date, soupSelect)
        if len(ans) == 0:
            return ""
        else:
            return str(ans[0]).strip("[] \n\t")

    def __get_intro(self, soup):
        # soupSelect = soup.select("body>div>div>div>div>div>div>span>div>div")
        soupSelect = soup.select("#link-report > div:nth-child(1) > div")
        if not soupSelect:
            soupSelect = soup.select("#link-report > span.all.hidden > div > div")
        soupTemp = BeautifulSoup(str(soupSelect), "lxml", exclude_encodings="utf-8")
        return str(soupTemp.text).strip("[] \n\t")


if __name__ == "__main__":  # tests {{{
    # To run these test use: calibre-debug -e src/calibre/ebooks/metadata/sources/douban.py
    from calibre.ebooks.metadata.sources.test import (
        test_identify_plugin,
        title_test,
        authors_test,
    )

    test_identify_plugin(
        Douban.name,
        [
            (
                {
                    "identifiers": {"isbn": "9787536692930"},
                    "title": "三体",
                    "authors": ["刘慈欣"],
                },
                [title_test("三体", exact=True), authors_test(["刘慈欣"])],
            ),
        ],
    )
# }}}