|
|
#1 | |
|
Connoisseur
![]() ![]() ![]() ![]() ![]() ![]() ![]() ![]() ![]() ![]() ![]() Posts: 79
Karma: 24492
Join Date: Jul 2013
Location: France
Device: Kindle 4, PBk Lux 2, PBk Lux 3, K Aura, K Libra H2O, K Libra2
|
Metadata plugin Babelio
Hello,
I try to convert the babelio plugin. I did conversion using the command Code:
python-modernize -w __init__.py Code:
python-modernize -w --future-unicode __init__.py Quote:
But, as I do not know anything in code I am not able to find the way to solve this issue, I did some tries but without any success ![]() Can someone help me to find a solution, please ? ![]() Here below the code of __init__.py Code:
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
print_function)
from six.moves import range
from six.moves import zip
__license__ = 'GPL v3'
__copyright__ = '2014, VdF>'
__docformat__ = 'restructuredtext'
import time, six.moves.http_cookiejar, unicodedata
from six.moves.urllib.parse import quote, unquote
from six.moves.queue import Queue, Empty
from difflib import SequenceMatcher
from lxml.html import fromstring, tostring
from calibre.ebooks.metadata import check_isbn
from calibre.ebooks.metadata.sources.base import Source
from calibre.utils.cleantext import clean_ascii_chars
from calibre.utils.config import JSONConfig
class Babelio(Source):
name = 'Babelio'
description = 'Telecharge les metadonnees et couverture depuis Babelio.com'
author = 'VdF'
version = (0, 4, 0)
minimum_calibre_version = (0, 8, 0)
capabilities = frozenset(['identify', 'cover'])
touched_fields = frozenset(['title', 'authors', 'identifier:isbn', 'rating', 'comments', 'publisher', 'pubdate', 'tags'])
has_html_comments = False
supports_gzip_transfer_encoding = True
BASE_URL = 'https://www.babelio.com'
def config_widget(self):
from calibre_plugins.babelio.config import ConfigWidget
return ConfigWidget(self)
def create_query(self, log, title=None, authors=None, identifiers={}):
q = ''
isbn = check_isbn(identifiers.get('isbn', None))
tokens = []
if title is not None:
tokens += title.replace('\u2019', ' ').replace("'", ' ').replace(' ', ' ').replace('\u2013', ' ').replace('\u0153', '\u006f\u0065')
if authors is not None and len(authors) >= 1:
for i in range(0, len(authors)):
tokens += ' '
if ',' in authors[i] :
auteur = authors[i].split(',')[0]
elif ' ' in authors[i] :
auteur = authors[i].rsplit(' ')[-1]
else :
auteur = authors[i]
tokens += auteur
tokens = [quote(t.encode('iso-8859-1')) for t in tokens]
q = ''.join(tokens)
q = '/resrecherche.php?Recherche=' + q + '&page=1&item_recherche=livres&tri=titre'
if not q:
return None
if isinstance(q, str):
q = q.encode('utf-8')
return Babelio.BASE_URL + q
def identify(self, log, result_queue, abort, title=None, authors=None,
identifiers={}, timeout=30):
matches = []
br = self.browser
cj = six.moves.http_cookiejar.LWPCookieJar()
br.set_cookiejar(cj)
query = self.create_query(log, title=title, authors=authors, identifiers=identifiers)
if query is None:
log.error(b'Metadonnees insuffisantes pour la requete'.encode('latin-1'))
return
log.info(b'Recherche de : %s' % unquote(query).encode('latin-1'))
response = br.open_novisit(query, timeout=timeout)
try:
raw = response.read().strip()
raw = raw.decode('latin-1', errors='replace')
#open('E:\\babelio.html', 'wb').write(raw)
if not raw:
log.error(b'Pas de resultat pour la requete : %r'.encode('latin-1') % unquote(query).encode('latin-1'))
return
root = fromstring(clean_ascii_chars(raw))
except:
msg = b'Impossible de parcourir la page babelio avec la requete : %r'.encode('latin-1') % unquote(query).encode('latin-1')
log.exception(msg)
return msg
self._parse_search_results(log, title, authors, root, matches, timeout)
if abort.is_set():
return
if not matches:
if title and authors and len(authors) > 1:
log.info(b'Pas de resultat avec les auteurs, on utilise uniquement le premier.'.encode('latin-1'))
return self.identify(log, result_queue, abort, title=title,
authors=[authors[0]], timeout=timeout)
elif authors and len(authors) == 1 :
log.info(b'Pas de resultat, on utilise uniquement le titre.'.encode('latin-1'))
return self.identify(log, result_queue, abort, title=title, timeout=timeout)
log.error(b'Pas de resultat pour la requete : %r'.encode('latin-1') % unquote(query.encode('latin-1')))
return
from calibre_plugins.babelio.worker import Worker
workers = [Worker(url, result_queue, br, log, i, self) for i, url in
enumerate(matches)]
for w in workers:
w.start()
# Don't send all requests at the same time
time.sleep(0.1)
while not abort.is_set():
a_worker_is_alive = False
for w in workers:
w.join(0.1)
if abort.is_set():
break
if w.is_alive():
a_worker_is_alive = True
if not a_worker_is_alive:
break
return None
def _parse_search_results(self, log, orig_title, orig_authors, root, matches, timeout):
orig_aut = None
if orig_authors is not None:
orig_aut = [author.split(',')[0] for author in orig_authors if (',' in author)] \
+ [author.split(' ')[1] for author in orig_authors if (' ' in author)]
# log.info([author.split(',')[0] for author in orig_authors if (',' in author)])
# log.info([author.split(' ')[1] for author in orig_authors if (' ' in author)])
non_trouve = root.xpath('//div[@class="module_t1"]/h2')
'''if non_trouve :
non_trouve_text = non_trouve[0].text_content()
if '(0)' in non_trouve_text :
return'''
def minussa(chaine):
chaine = str(chaine.lower())
chnorm = unicodedata.normalize('NFKD', chaine)
return "".join([car for car in chnorm if not unicodedata.combining(car)])
def simil(mot1, mot2, ratio):
mot1, mot2 = minussa(mot1), minussa(mot2)
return SequenceMatcher(None, mot1, mot2).ratio() >= ratio
def is_simil(orig_aut, dict_res, ratio):
for aut_compl in (v.text for v in dict_res.values()) :
for a in orig_aut :
if simil(aut_compl.split()[-1], a, ratio):
return True
return False
titre_res = root.xpath(".//*[@id='page_corps']/div/div[3]/div[2]/table/tbody/tr/td[2]/a[1]")
# log.info('t_res', titre_res)
if len(titre_res) == 0 :
return
else :
matches.append(Babelio.BASE_URL + titre_res[0].get('href'))
return
aut_res = root.xpath(".//*[@id='page_corps']/div/div[3]/div[3]/table/tbody/tr/td[3]/a")
dict_res = dict(list(zip(titre_res, aut_res)))
# log.info('dict', dict_res)
if orig_aut is not None :
ratio = 0.7
for k in dict_res.keys():
if is_simil(orig_aut, dict_res, ratio):
matches.append(Babelio.BASE_URL + k.get('href'))
else :
for i in range(0, len(titre_res)):
matches.append(Babelio.BASE_URL + titre_res[i].get('href'))
matches = matches[:5]
# log.info('mat', matches)
def get_cached_cover_url(self, identifiers):
if JSONConfig('plugins/Babelio').get('cover', False) == False:
return None
url = None
bab_id = identifiers.get('babelio', None)
if bab_id is None:
isbn = identifiers.get('isbn', None)
if isbn is not None:
bab_id = self.cached_isbn_to_identifier(isbn)
if bab_id is not None:
url = self.cached_identifier_to_cover_url(bab_id)
return url
def download_cover(self, log, result_queue, abort,
title=None, authors=None, identifiers={}, timeout=30, get_best_cover=False):
if JSONConfig('plugins/Babelio').get('cover', False) == False:
return
cached_url = self.get_cached_cover_url(identifiers)
log.info('cache :', cached_url)
if cached_url is None:
log.info('Pas de cache, on lance identify')
rq = Queue()
self.identify(log, rq, abort, title=title, authors=authors,
identifiers=identifiers)
if abort.is_set():
return
results = []
while True:
try:
results.append(rq.get_nowait())
except Empty:
break
# results.sort(key=self.identify_results_keygen(
# title=title, authors=authors, identifiers=identifiers))
for mi in results:
cached_url = self.get_cached_cover_url(mi.identifiers)
if cached_url is not None:
break
if cached_url is None:
log.info(b'Pas de couverture trouvee.'.encode('latin-1'))
return
if abort.is_set():
return
br = self.browser
log.info(b'On telecharge la couverture depuis :'.encode('latin-1'), cached_url)
try:
cdata = br.open_novisit(cached_url, timeout=timeout).read()
result_queue.put((self, cdata))
except:
log.exception(b'Impossible de telecharger la couverture depuis :'.encode('latin-1'), cached_url)
Last edited by druss67; 10-04-2020 at 11:32 AM. |
|
|
|
|
|
|
#2 |
|
creator of calibre
![]() ![]() ![]() ![]() ![]() ![]() ![]() ![]() ![]() ![]() ![]() Posts: 45,598
Karma: 28548962
Join Date: Oct 2006
Location: Mumbai, India
Device: Various
|
remove the two lines
Code:
if isinstance(q, str):
q = q.encode('utf-8')
|
|
|
|
|
|
#3 |
|
Connoisseur
![]() ![]() ![]() ![]() ![]() ![]() ![]() ![]() ![]() ![]() ![]() Posts: 79
Karma: 24492
Join Date: Jul 2013
Location: France
Device: Kindle 4, PBk Lux 2, PBk Lux 3, K Aura, K Libra H2O, K Libra2
|
Hi Kovid,
![]() You've got it ![]() I wish you a very nice evening. Druss |
|
|
|
|
|
#4 |
|
Connoisseur
![]() ![]() ![]() ![]() ![]() ![]() ![]() ![]() ![]() ![]() ![]() Posts: 79
Karma: 24492
Join Date: Jul 2013
Location: France
Device: Kindle 4, PBk Lux 2, PBk Lux 3, K Aura, K Libra H2O, K Libra2
|
Sorry I talked to fast...
I have this error about unicode, I will try to find and I will come if I need help ![]() Code:
Running identify query with parameters:
{'title': 'Victime 2117', 'authors': ['Jussi Adler-Olsen'], 'identifiers': {'isbn': '9782226396334', 'mobi-asin': 'B0814FYJJJ'}, 'timeout': 30}
Using plugins: Babelio (0, 4, 1)
The log from individual plugins is below
****************************** Babelio (0, 4, 1) ******************************
Found 1 results
Downloading from Babelio took 0.3860633373260498
---
Title : Victime 2117
Author(s) : Jussi Adler-Olsen
Tags : Serie , Roman , Roman Noir , Thriller , Romans Policiers Et Polars , Légendes , Réfugiés , Vengeance , Obsessions , Enquêtes , Immigration , Mystère , Département v , Terrorisme , Danois , Danemark , Allemagne , Littérature Nordique , Littérature Danoise , Policier Nordique
Languages : fr
Identifiers : babelio:1190636
Recherche de : https://www.babelio.com/resrecherche.php?Recherche=Victime 2117 Adler-Olsen&page=1&item_recherche=livres&tri=titre
Url Babelio: 'https://www.babelio.com/livres/Adler-Olsen-Victime-2117/1190636'
Erreur en cherchant ISBN, editeur et date de publication dans : 'https://www.babelio.com/livres/Adler-Olsen-Victime-2117/1190636'
Traceback (most recent call last):
File "calibre_plugins.babelio.worker", line 106, in parse_details
File "calibre_plugins.babelio.worker", line 232, in parse_meta
NameError: name 'unicode' is not defined
Erreur en cherchant la note dans : 'https://www.babelio.com/livres/Adler-Olsen-Victime-2117/1190636'
Traceback (most recent call last):
File "calibre_plugins.babelio.worker", line 117, in parse_details
File "calibre_plugins.babelio.worker", line 179, in parse_rating
NameError: name 'unicode' is not defined
Erreur en cherchant le resume : 'https://www.babelio.com/livres/Adler-Olsen-Victime-2117/1190636'
Traceback (most recent call last):
File "calibre_plugins.babelio.worker", line 122, in parse_details
File "calibre_plugins.babelio.worker", line 193, in parse_comments
AttributeError: module 'urllib' has no attribute 'urlencode'
Telechargement de la couverture desactive
********************************************************************************
The identify phase took 0.45 seconds
The longest time (0.386063) was taken by: Babelio
Merging results from different sources
We have 1 merged results, merging took: 0.00 seconds
|
|
|
|
|
|
#5 |
|
Connoisseur
![]() ![]() ![]() ![]() ![]() ![]() ![]() ![]() ![]() ![]() ![]() Posts: 79
Karma: 24492
Join Date: Jul 2013
Location: France
Device: Kindle 4, PBk Lux 2, PBk Lux 3, K Aura, K Libra H2O, K Libra2
|
I have got it, it works fine, now.
Thank you. Last edited by druss67; 10-04-2020 at 03:01 PM. |
|
|
|
|
|
#6 |
|
Junior Member
![]() Posts: 9
Karma: 10
Join Date: Jun 2017
Location: France
Device: Vivlio Touch HD+ [PB632]
|
Error plugin Babelio
Hello,
Thanks for your work, but I have an error with this plugin: Code:
Running identify query with parameters:
{'title': 'La symphonie pastorale', 'authors': ['André Gide'], 'identifiers': {}, 'timeout': 30}
Using plugins: Babelio (0, 4, 2), Google (1, 0, 3), Amazon.com (1, 2, 22)
The log from individual plugins is below
****************************** Babelio (0, 4, 2) ******************************
Found 0 results
Downloading from Babelio took 0.24242830276489258
Recherche de : https://www.babelio.com/resrecherche.php?Recherche=La symphonie pastorale Gide&page=1&item_recherche=livres&tri=titre
Plugin Babelio failed
Traceback (most recent call last):
File "calibre/ebooks/metadata/sources/identify.py", line 46, in run
File "calibre_plugins.babelio.__init__", line 101, in identify
log.info(b'Pas de resultat, on utilise uniquement le titre.'.encode('latin-1'))
AttributeError: 'bytes' object has no attribute 'encode'
|
|
|
|
![]() |
|
Similar Threads
|
||||
| Thread | Thread Starter | Forum | Replies | Last Post |
| Software extension calibre note Babelio | kiki34420 | Forum Français | 1 | 02-27-2020 12:24 PM |
| [Metadata Source Plugin] Empty Plugin? (Fake Identifier) | mneimeyer | Plugins | 3 | 11-11-2019 08:07 PM |
| Regarding using metadata objects in identify method of metadata download plugin api | aprekates | Development | 1 | 07-06-2014 03:35 AM |
| [Metadata Download Plugin] Goodreads Metadata **Deprecated** | kiwidude | Plugins | 30 | 04-23-2011 02:10 PM |
| metadata plugin | redneck_momma | Plugins | 1 | 05-21-2010 08:41 PM |