MobileRead Forums - View Single Post

kovidgoyal · 08-04-2022, 12:00 PM

I think the google server in portugal is using a different URL encoding. Try applying the following patch:

Code:

diff --git a/src/calibre/ebooks/metadata/sources/search_engines.py b/src/calibre/ebooks/metadata/sources/search_engines.py
index 55e0053d50..6f6152d4b9 100644
--- a/src/calibre/ebooks/metadata/sources/search_engines.py
+++ b/src/calibre/ebooks/metadata/sources/search_engines.py
@@ -25,7 +25,7 @@
 from calibre.utils.lock import ExclusiveFile
 from calibre.utils.random_ua import accept_header_for_ua
 
-current_version = (1, 2, 0)
+current_version = (9, 9, 9)
 minimum_calibre_version = (2, 80, 0)
 webcache = {}
 webcache_lock = Lock()
@@ -297,6 +297,7 @@ def urepl(m):
     seen = set()
     ans = {}
     for m in pat.finditer(raw):
+        print(11111111111, repr(m.group(1)))
         cache_url = upat.sub(urepl, m.group(1))
         m = cache_pat.search(cache_url)
         cache_id, src_url = m.group(1), m.group(2)
@@ -326,6 +327,7 @@ def google_parse_results(root, raw, log=prints, ignore_uncached=True):
         else:
             try:
                 c = div.xpath('descendant::*[@role="menuitem"]//a[@class="fl"]')[0]
+                print(2222222, repr(c.get('href')))
             except IndexError:
                 if ignore_uncached:
                     log('Ignoring {!r} as it has no cached page'.format(title))
@@ -367,14 +369,14 @@ def google_search(terms, site=None, br=None, log=prints, safe_search=False, dump
     return google_parse_results(root, r[0], log=log), url
 
 
-def google_develop(search_terms='1423146786', raw_from=''):
+def google_develop(search_terms='B00EAQUPEQ', raw_from=''):
     if raw_from:
         with open(raw_from, 'rb') as f:
             raw = f.read()
         results = google_parse_results(parse_html(raw), raw)
     else:
         br = browser()
-        results = google_search(search_terms.split(), 'www.amazon.com', dump_raw='/t/raw.html', br=br)[0]
+        results = google_search(search_terms.split(), 'www.amazon.com.br', dump_raw='raw.html', br=br)[0]
     for result in results:
         if '/dp/' in result.url:
             print(result.title)

Then run

Code:

calibre-debug -c "from calibre.ebooks.metadata.sources.search_engines import *; google_develop()"

and post the output and also the raw.html file which this command will create in the current directory