#!/usr/bin/env python3
#vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai

from __future__ import (unicode_literals, division, absolute_import,
                       print_function)

__license__   = 'GPL v3'
__copyright__ = 'Christophe'
__docformat__ = 'restructuredtext en'

import time, re
from urllib.parse import quote

from lxml.html import fromstring, tostring
from calibre import browser

from calibre import as_unicode
from calibre.utils.icu import lower
from calibre.ebooks.metadata.sources.base import Source
from calibre.utils.cleantext import clean_ascii_chars
from calibre.utils.localization import get_udc

class DownloadBabelioWorker(Source):

    def __init__(self, title, authors, timeout=20):
        self.timeout = timeout
        self.notes = None
        self.votes = None
        self.title = title
        self.authors = authors
        self.run()

    def run(self):

        matches = []
        br = browser()
        print(('self authors %s' %self.authors))
        print(('self title %s' %self.title))
        query = self.create_query(title=self.title, authors=self.authors)
        print(('query %s' %query))
        # execption levée dans quelques cas http error 403 : Forbidden
        try:
            response = br.open_novisit(query, timeout=self.timeout)
        except:
            return None

        try:
            raw = response.read().strip()
            raw = raw.decode('iso-8859-1', errors='replace')
            #print('raw %s' %raw)
            root = fromstring(clean_ascii_chars(raw))
        except:
            return None

        try:
            self._parse_search_results(root, matches)
        except:
            print('erreur parse')
            return None
            raise
        print('avant notice')

        if len(matches) == 0:
            print('liste vide')
        else:
            save_vote = 0
            for notice in matches:
                print(('notice %s' %notice))
                response = br.open_novisit(notice, timeout=self.timeout)
                raw = response.read().strip()
                raw = raw.decode('iso-8859-1', errors='replace')
                root = fromstring(clean_ascii_chars(raw))

                vote = root.xpath('//span[@itemprop="aggregateRating"]//span[@itemprop="ratingCount"]')
                #ne conserver que les votes les plus élevés
                if vote:
                    votes_notice = vote[0].text_content().strip()
                    print(('votes_notice %s' %votes_notice))
                    votes_float = float(votes_notice)
                    if votes_float > save_vote:
                        self.votes = votes_notice
                        save_vote = votes_float
                        print(('self.votes %s' %self.votes))
                        note = root.xpath('//span[@itemprop="aggregateRating"]/span[@itemprop="ratingValue"]')
                        if note:
                            self.notes = note[0].text_content().strip()
                        print(('self.notes %s' %self.notes))
                else:
                    print('votes non trouvés')


    def create_query(self, title=None, authors=None):


        BASE_URL = 'http://www.babelio.com/resrecherche.php?Recherche='
        BASE_URL_MID = '+'
        BASE_URL_LAST = '&page=1&item_recherche=livres&tri=auteur'

        q = ''
        au = ''
        authors = self.authors
        title = title.replace('\'é','\'e')
        title = title.replace('\'è','\'e')
        title = title.replace('\'ê','\'e')
        title = title.replace('\'É','\'e')
        title = title.replace('\'â','\'a')
        title = title.replace('\'à','\'a')
        title = title.replace('\'î','\'i')
        title = title.replace('\œ','oe')

        print(('title %s' %self.title))
        if title:
            #title = get_udc().decode(title)
            title_tokens = list(self.get_title_tokens(title,
                                strip_joiners=False, strip_subtitle=True))
            if title_tokens:
                #except UnicodeEncodError 'iso-8859-1' codec => pb d'encodage dans le titre : Solution resaisir le même titre pour ce livre
                try:
                    tokens = [quote(t.encode('iso-8859-1') if isinstance(t, str) else t) for t in title_tokens]
                    q='+'.join(tokens)
                except:
                    return None

        print(('tokens title: %s'%tokens))
        if authors:
            #authors = [get_udc().decode(a) for a in authors]
            author_tokens = self.get_author_tokens(authors,
                    only_first_author=True)
            if author_tokens:
                #except UnicodeEncodError 'iso-8859-1' codec
                try:
                    tokens = [quote(t.encode('iso-8859-1') if isinstance(t, str) else t) for t in author_tokens]
                    au='+'.join(tokens)
                except:
                    return None

        print(('tokens author: %s'%tokens))
        if not q:
            return None
        return '%s%s%s%s%s'%(BASE_URL,au,BASE_URL_MID,q,BASE_URL_LAST)

    def _parse_search_results(self, root, matches):


        BASE_URL0 = 'http://www.babelio.com'
        print('parse')
        results = root.xpath('//*[@id="page_corps"]/div/div[3]/div[2]/table/tbody/tr/td[1]')
        print(('results %s' %results))
        if not results:
            print('not results')
            return
        for result in results:
           print('in results')
           result_url=result.xpath('a/@href')
           print(('result_url %s' %result_url))
           matches.append( '%s%s'%(BASE_URL0,result_url[0]))
           print(('matches : %r' %matches))
        print('fin parse')
