# -*- coding: UTF-8 -*-
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
                        print_function)

__license__   = 'GPL v3'
__copyright__ = '2014, Roman Cupisz <roman.cupisz+calibre@gmail.com>'
__docformat__ = 'restructuredtext en'

import socket, re, datetime
from collections import OrderedDict
from threading import Thread

from lxml.html import fromstring, tostring

from calibre.ebooks.metadata.book.base import Metadata
from calibre.library.comments import sanitize_comments_html
from calibre.utils.cleantext import clean_ascii_chars
from calibre.ebooks.metadata import MetaInformation, check_isbn
from calibre.utils.icu import capitalize, lower

from calibre.ebooks.metadata.sources.base import fixcase, fixauthors, cap_author_token

import calibre_plugins.lubimyczytac.config as cfg

class Worker(Thread): # Get details

    '''
    Get book details from LubimyCzytac.pl book page in a separate thread
    '''

    def __init__(self, url, result_queue, browser, log, relevance, plugin, timeout=20):
        Thread.__init__(self)
        self.daemon = True
        self.url, self.result_queue = url, result_queue
        self.log, self.timeout = log, timeout
        self.relevance, self.plugin = relevance, plugin
        self.browser = browser.clone_browser()
        self.cover_url = self.lubimy_czytac_id = self.isbn = None

    def run(self):
        try:
            self.get_details()
        except:
            self.log.exception('get_details failed for url: %r'%self.url)

    def parse_first(self, root, xpath, loginfo, convert=lambda x: x[0].strip()):
        try:
            nodes = root.xpath(xpath)
            self.log.info('Found %s: %s' % (loginfo,nodes))
            return convert(nodes) if nodes else None
        except Exception as e:
            self.log.exception('Error parsing for %s with xpath: %s' % (loginfo, xpath))

    def get_details(self):
        try:
            self.log.info('LubimyCzytac.pl   url: %r'%self.url)
            raw = self.browser.open_novisit(self.url, timeout=self.timeout).read().strip()

        except Exception as e:
            if callable(getattr(e, 'getcode', None)) and \
                    e.getcode() == 404:
                self.log.error('URL malformed: %r'%self.url)
                return
            attr = getattr(e, 'args', [None])
            attr = attr if attr else [None]
            if isinstance(attr[0], socket.timeout):
                msg = 'LubimyCzytac.pl timed out. Try again later.'
                self.log.error(msg)
            else:
                msg = 'Failed to make details query: %r'%self.url
                self.log.exception(msg)
            return

        raw = raw.decode('utf-8', errors='replace')
        open('D:\\Tools\\WORKSPACE\\LubimyCzytac\\log_rc2.html', 'wb').write(raw)

        if 'Przepraszamy, nie możemy znaleźć strony, której szukasz.' in raw:
            self.log.error('URL malformed: %r'%self.url)
            return

        try:
            root = fromstring(clean_ascii_chars(raw))
        except:
            msg = 'Failed to parse LubimyCzytac.pl details page: %r'%self.url
            self.log.exception(msg)
            return

        self.parse_details(root)

    def parse_details(self, root):
        try:
            lubimy_czytac_id = self.parse_lubimy_czytac_id(self.url)
        except:
            self.log.exception('Error parsing LubimyCzytac.pl id for url: %r'%self.url)
            lubimy_czytac_id = None

        try:
            title = self.parse_title(root)
        except:
            self.log.exception('Error parsing title for url: %r'%self.url)
            title = None

        try:
            authors = self.parse_authors(root)
        except:
            self.log.exception('Error parsing authors for url: %r'%self.url)
            authors = []

        if not title or not authors or not lubimy_czytac_id:
            if not title :
                self.log.error('LubimyCzytac.pl_id: %r Title: %r Authors: %r'%(lubimy_czytac_id, title, authors))
                self.log.error('Could not find LubimyCzytac.pl_id or title or authors for %r'%self.url)
            return

        mi = Metadata(title, authors)
        mi.set_identifier('lubimyczytac', lubimy_czytac_id)
        self.lubimy_czytac_id = lubimy_czytac_id

        try:
            (series, series_index) = self.parse_series(root)
        except:
            self.log.exception('Error parsing series for url: %r'%self.url)
            series = series_index = None
        if series:
            mi.series = series
        if series_index:
            mi.series_index = series_index

        try:
            isbn = self.parse_isbn(root)
            if isbn:
                self.isbn = mi.isbn = isbn
        except:
            self.log.exception('Error parsing ISBN for url: %r'%self.url)

        try:
            mi.rating = self.parse_rating(root)
        except:
            self.log.exception('Error parsing ratings for url: %r'%self.url)

        try:
            mi.comments = self.parse_comments(root)
        except:
            self.log.exception('Error parsing comments for url: %r'%self.url)

        try:
            tags = self.parse_tags(root)
            if tags:
                mi.tags = tags
        except:
            self.log.exception('Error parsing tags for url: %r'%self.url)

        try:
            self.cover_url = self.parse_cover(root)
        except:
            self.log.exception('Error parsing cover for url: %r'%self.url)
        mi.has_cover = bool(self.cover_url)

        try:
            mi.publisher = self.parse_publisher(root)
        except:
            self.log.exception('Error parsing publisher for url: %r'%self.url)

        try:
            mi.pubdate = self.parse_published_date(root)
        except:
            self.log.exception('Error parsing published date for url: %r'%self.url)

        try:
            languages = self.parse_languages(root)
            if languages:
                mi.languages = languages
        except:
            self.log.exception('Error parsing languages for url: %r'%self.url)

        mi.source_relevance = self.relevance

        if self.lubimy_czytac_id:
            if self.isbn:
                self.plugin.cache_isbn_to_identifier(self.isbn, self.lubimy_czytac_id)
            if self.cover_url:
                self.plugin.cache_identifier_to_cover_url(self.lubimy_czytac_id, self.cover_url)

        self.clean_downloaded_metadata(mi)
        self.result_queue.put(mi)

    def parse_lubimy_czytac_id(self, url):
        return re.search('lubimyczytac.pl/ksiazka/(\d+)/.*', url).groups(0)[0]

    def parse_title(self, root):
        title_node = root.xpath('//div[@class="grid_6 alpha omega"]/h1')
        if not title_node:
            return None
        title_text = title_node[0].text.strip()
        self.log.info('Found title: %s'%title_text)
        return title_text

    def parse_authors(self, root):
        author_nodes = root.xpath('//div[@class="grid_6 alpha omega"]/span/a')
        if author_nodes:
            authors = []
            for author_node in author_nodes:
                author = author_node.text.strip()
                self.log.info('Found author: %s'%author)
                authors.append(author)
            return authors


    def parse_series(self, root):
        try:
            #self.log.info('try parse series')
            series_node = root.xpath('//div[@class="grid_6 alpha omega"]//a[contains(@href,"/cykl/")]')
            #self.log.info('try parse series series_node: %s'%series_node)
            if series_node:
                series_lst = root.xpath('//div[@class="grid_6 alpha omega"]//a[contains(@href,"/cykl/")]/text()')
                if series_lst:
                    series_txt = series_lst[0]
                else:
                    series_txt = None
                #self.log.info('try parse series 1 series_txt: %s'%series_txt)
            else:
                self.log.info('Not found series')
                return (None, None)
            if series_txt:
                ser_string = root.xpath('//div[@class="grid_6 alpha omega"]//text()')
                #self.log.info('try parse series ser_string: %s'%ser_string)
                for ser in ser_string:
                    if 'tom ' in ser:
                        self.log.info('try parse series ser: %s'%ser)
                        ser_info = ser.split(' (tom ', 1)
                        found = 1
                        break
            #self.log.info('try parse series ser_info: %s'%ser_info)
            #self.log.info('try parse series ser_info[1]: %s'%ser_info[1])
            if ser_info:
                series_index_unicode = ser_info[1]
                series_index_string = str(series_index_unicode.replace(" ", "").replace(")", ""))
                series_index = int(series_index_string)
                #self.log.info('try parse series series_index, value and type: %s %s' %(series_index,type(series_index)))
            else:
                series_index = 0
            series = series_txt
            #roman_dict = {'X':10, 'IX':9, 'VIII':8, 'VII':7, 'VI':6, 'V':5, 'IV':4, 'III':3, 'II':2, 'I':1 }
            #series_index = roman_dict.get( series_index,  series_index)
            self.log.info('Found series: (%s, %s)' % (series, series_index))
            return (series, series_index)
        except:
            return (None, None)


    def parse_rating(self, root):
        rating_node = root.xpath('//span[@class="verdana"]/strong[@itemprop="ratingValue"]/text()')
        if rating_node:
            rating_value = round(float((rating_node[0]).replace(',','.'))/2)
            self.log.info('Found rating: %s'%rating_value)
            return rating_value
        return None


    def parse_isbn(self, root):
        try:
            #isbn = root.xpath('//span[@itemprop="isbn"]').text.strip()
            isbn =  self.parse_first(root,'//span[@itemprop="isbn"]/text()','isbn')
            self.log.info('isbn: %s'%isbn)
            return isbn
        except:
            #try <meta>
            isbn_node = root.xpath('//meta[@property="book:isbn"]/@content')
            self.log.info('isbn from meta: %s'%isbn)
            return isbn

    def parse_publisher(self, root):
        return self.parse_first(root,'//span[@itemprop="publisher"]//span/text()','publisher')


    def parse_published_date(self, root):
        pub_date_text = None
        pub_date_text = self.parse_first(root,'//dd[@itemprop="datePublished"]/text()','datePublished')
        if pub_date_text is not None:
            if "(data przybliżona)" in pub_date_text:
                pub_date_text = pub_date_text.split('(')[0].strip()
                year = int(pub_date_text[-4:])
                month = 7
                day = 1
                pub_date = datetime.datetime(year, month, day)
                self.log.info('Found date published (data przybliżona): %s'%pub_date)
            else:
                pub_date = self._convert_date_text(pub_date_text.strip())
                self.log.info('Found date published: %s'%pub_date)
        else:
            self.log.info('Not found date published')
            pub_date = None
        return pub_date

    def _convert_date_text(self, date_text):
        year = int(date_text[-4:])
        day = 1
        month = 7
        month_dict = dict(
            {"styczeń":1, "stycznia":1, "luty":2,"lutego":2, "marzec":3, "marca":3, "kwiecień":4, "kwietnia":4,
             "maj":5, "maja":5, "czerwiec":6, "czerwca":6, "lipiec":7, "lipca":7, "sierpień":8, "sierpnia":8,
             "wrzesień":9,  "września":9, "październik":10, "października":10, "listopad":11, "listopada":11,
             "grudzień":12, "grudnia":12}
        )
        if len(date_text) > 4:
            text_parts = date_text.split()
            self.log.debug('Found date part[0]: %s'%text_parts[0])
            self.log.debug('Found date part[1]: %s'%text_parts[1])
            if len(text_parts) > 2:
                self.log.debug('Found date part[2]: %s'%text_parts[2])
                try:
                    day = int(text_parts[0])
                    month = 1
                    year = int(text_parts[2])
                    self.log.debug('try date part[1] as month_name: %s'%text_parts[1])
                    month = month_dict[text_parts[1]]
                except Exception as e:
                    self.log.exception ('Key Error: %s'%e)
            else:
                self.log.debug('try date part[0] as month name: %s'%text_parts[0])
                try:
                    day = 1
                    year = int(text_parts[1])
                    month = month_dict[text_parts[0]]
                except Exception as e:
                    self.log.exception ('Key Error: %s'%e)
        return datetime.datetime(year, month, day)

    def parse_comments(self, root):
        comments = ''
        description_node = root.xpath('//div[@id="sBookDescriptionLong"]')
        if description_node:
            comments = tostring(description_node[0], method='html')
            comments = sanitize_comments_html(comments)
        else:
            #try <meta>
            description_node = root.xpath('//meta[@property="og:description"]/@content')
            if description_node:
                comments = description_node[0]
                comments = sanitize_comments_html(comments)

        return comments

    def parse_tags(self, root):
        tags = list()
        try:
            #tags_from_genre = root.xpath('//a[@itemprop="genre"]').text.strip()
            tags_from_genre =  self.parse_first(root,'//a[@itemprop="genre"]/text()','genre')
            self.log.info('tags(genre): %s'%tags_from_genre)
            tags = tags_from_genre.split(',')
            return list(tags)
        except:
            return list(tags)

    def parse_languages(self, root):
        lang = self.parse_first(root,'//dd[@itemprop="inLanguage"]/text()','inLanguage')
        self.log.info('lang: %s'%lang)
        languages = list()
        if lang:
            if "polski" in lang:
                languages.append("pol")
            if "angielski" in lang:
                languages.append("eng")
        return languages


    def parse_cover(self, root):
        imgcol_node = root.xpath('//div[@id="bookDetails"]/div/a/@href')
        if not imgcol_node:
            # Try <meta/>
            imgcol_node = root.xpath('//meta[@property="og:image"]/@content')

        if imgcol_node:
            img_url = imgcol_node[0]
            info = self.browser.open_novisit(img_url, timeout=self.timeout).info()
            if int(info.getheader('Content-Length')) > 1000:
                return img_url
            else:
                self.log.warning('Broken image for url: %s'%img_url)


    def clean_downloaded_metadata(self, mi):
        '''
        Custom implementation - without title capitalizing
        '''
        mi.authors = fixauthors(mi.authors)
        mi.tags = list(map(fixcase, mi.tags))
        mi.isbn = check_isbn(mi.isbn)

