﻿#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai

from __future__ import (unicode_literals, division, absolute_import, print_function)

import collections
import dateutil.parser
import dateutil.tz
import re
import time

from calibre.utils.date import parse_only_date
from calibre.utils.config_base import tweaks

from calibre_plugins.overdrive_link.numbers import (value_unit, word_number)
from calibre_plugins.overdrive_link.link import IDENT_AMAZON
from calibre_plugins.overdrive_link.formats import FORMAT_KINDLE_BOOK
from calibre_plugins.overdrive_link.book import (LibraryBook, InfoBook)
from calibre_plugins.overdrive_link.library import SearchableLibrary
from calibre_plugins.overdrive_link.author_prep import normalize_author
from calibre_plugins.overdrive_link.title_prep import normalize_title
from calibre_plugins.overdrive_link.net import (AMAZON_UAGENTS, open_url)
from calibre_plugins.overdrive_link.parseweb import (
        LibraryError, must_find, text_only, double_quote, class_contains, beautiful_soup_fix)
from calibre_plugins.overdrive_link.language import ALL_LANGUAGE_NAMES
from calibre_plugins.overdrive_link.tweak import (TWEAK_DISABLE_KU_CHECK, TWEAK_SAVE_RESPONSES_ON_ERROR)
from calibre_plugins.overdrive_link.cache import AmazonUserAgent

from .python_transition import (IS_PYTHON2)
if IS_PYTHON2:
    from .python_transition import (http, repr, urllib)
else:
    import http.cookiejar
    import urllib.parse

try:
    from calibre_plugins.overdrive_link_debug.config import DEBUG_MODE
    from calibre_plugins.overdrive_link_debug.amazon2 import examine_response
except ImportError:
    DEBUG_MODE = False
    examine_response = None


__license__ = 'GPL v3'
__copyright__ = '2012-2022, John Howell <jhowell@acm.org>'


# Amazon sends a captcha when requests are received too quickly. Take a long delay when this is detected to make Amazon happy.
# Pace requests to try to avoid this condition.

#OVERALL_MAX_QPS = 0.17      # maximum queries per second for Amazon across all jobs (< 1 per 5 sec)
OVERALL_MAX_QPS = None

THROTTLE_DELAY = 120.0      # seconds to delay when throttled by Amazon


'''
Browse node IDs are locale-specific and may change without warning. Arranged in a hierarchy.
http://docs.aws.amazon.com/AWSECommerceService/latest/DG/BrowseNodeIDs.html

Find the name of the browse node:
http://www.amazon.com/exec/obidos/tg/browse/-/[Browse Node ID]

See contents of node:
http://www.amazon.com/b/?node=[Browse Node ID]
'''

# search index types
SI_BOOKS = 'Books'
SI_KINDLE_STORE = 'KindleStore'

# Locale
LOCALE_USA = ''
LOCALE_AUSTRALIA = 'au'
LOCALE_BRAZIL = 'br'
LOCALE_CANADA = 'ca'    # not supported by calibre
LOCALE_CHINA = 'cn'     # not supported by calibre
LOCALE_GERMANY = 'de'
LOCALE_SPAIN = 'es'
LOCALE_FRANCE = 'fr'
LOCALE_INDIA = 'in'     # not supported by calibre
LOCALE_ITALY = 'it'
LOCALE_JAPAN = 'jp'
LOCALE_UK = 'uk'

WEB_HOSTS = {
    LOCALE_USA: 'www.amazon.com',
    LOCALE_UK: 'www.amazon.co.uk',
    LOCALE_CANADA: 'www.amazon.ca',
    LOCALE_AUSTRALIA: 'www.amazon.com.au',

    # Unable to detect prime eligible books at these sites
    #LOCALE_GERMANY: 'www.amazon.de',
    #LOCALE_FRANCE: 'www.amazon.fr',

    # Enable the following amazon sites if they support prime lending
    #LOCAL_BRAZIL: 'www.amazon.com.br',
    #LOCALE_SPAIN': 'www.amazon.es',
    #LOCALE_ITALY: 'www.amazon.it',
    #LOCALE_JAPAN: 'www.amazon.co.jp',
    }

LOCALE_MAIN_BROWSE_BIN = {              # must use "Kindle eBooks" browse bin for language to be applied
    LOCALE_USA: '154606011',            # Kindle Store (133140011)->Kindle eBooks (154606011)
    LOCALE_UK: '341689031',             # 341677031
    LOCALE_CANADA: '2980423011',        # 2972705011
    LOCALE_AUSTRALIA: '2496751051',     # 2490359051
    }

COLLECTION_UNLIMITED = 'unlimited'                  # kindle unlimited
COLLECTION_UNLIMITED_WITH_NARRATION = 'kuwn'        # kindle unlimited with Audible narration
COLLECTION_STORE = 'store'                          # kindle e-book store
COLLECTION_STORE_WITH_NARRATION = 'w4v.store'       # kindle e-book store with Audible narration
COLLECTION_PRIME_READING = 'reading'

COLLECTION_NAMES = {
    COLLECTION_UNLIMITED: ["Kindle Unlimited Eligible"],
    COLLECTION_UNLIMITED_WITH_NARRATION: ["Kindle Unlimited Eligible", "eBooks with Audible Narration"],
    COLLECTION_STORE: ["Kindle eBooks"],
    COLLECTION_STORE_WITH_NARRATION: ["eBooks with Audible Narration"],  # formerly "Whispersync for Voice"
    COLLECTION_PRIME_READING: ["Prime Reading Eligible"],
    }

COLLECTIONS = set(COLLECTION_NAMES.keys())

PURCHASABLE_COLLECTIONS = {COLLECTION_STORE, COLLECTION_STORE_WITH_NARRATION}

LOCAL_COLLECTION_RH_FEATURES = {       # entry must be present for valid locale/collection combinations
    (LOCALE_USA, COLLECTION_UNLIMITED): {'p_n_feature_nineteen_browse-bin': '9045887011'},
    (LOCALE_USA, COLLECTION_UNLIMITED_WITH_NARRATION): {
        'n': '9630682011', 'p_n_feature_nineteen_browse-bin': '9045887011', 'p_n_feature_three_browse-bin': '6577679011'},
    (LOCALE_USA, COLLECTION_STORE): {},
    (LOCALE_USA, COLLECTION_STORE_WITH_NARRATION): {'p_n_feature_three_browse-bin': '6577679011'},
    (LOCALE_USA, COLLECTION_PRIME_READING): {'p_n_special_merchandising_browse-bin': '14807080011'},

    (LOCALE_UK, COLLECTION_UNLIMITED): {'p_n_feature_nineteen_browse-bin': '4768069031'},
    (LOCALE_UK, COLLECTION_UNLIMITED_WITH_NARRATION): {
        'n': '5232194031', 'p_n_feature_nineteen_browse-bin': '4768069031', 'p_n_feature_sixteen_browse-bin': '4824710031'},
    (LOCALE_UK, COLLECTION_STORE): {},
    (LOCALE_UK, COLLECTION_STORE_WITH_NARRATION): {'p_n_feature_sixteen_browse-bin': '4824710031'},

    (LOCALE_CANADA, COLLECTION_UNLIMITED): {'p_n_feature_nineteen_browse-bin': '9209819011'},
    (LOCALE_CANADA, COLLECTION_STORE): {},

    (LOCALE_AUSTRALIA, COLLECTION_UNLIMITED): {'p_n_feature_nineteen_browse-bin': '3164900051'},
    (LOCALE_AUSTRALIA, COLLECTION_STORE): {},
    }

LIBRARY_ID_SEPERATOR = '-'

USA_SEARCH_LANGUAGES = {    # values for p_n_feature_nine_browse-bin
    # German=36, English=37, French=38, Spanish=39, italian=40, russian=41, Chinese (Simplified)=42, Japanese=43, Latin=44,
    # Portuguese=45, Dutch=46, Serbian=48, Swedish=49, Danish=51, Catalan=52, Norwegian=53, etc.
    'English': '3291437011',
    'French': '3291438011',
    'German': '3291436011',
    'Spanish': '3291439011',
    }

AU_SEARCH_LANGUAGES = {     # values for p_n_feature_seven_browse-bin
    'English': '6084466051',
    }

DESIRED_ROLES = {'Editor', 'Author', 'Collaborator', 'Contributor', 'Illustrator', 'Artist'}

UNDESIRED_ROLES = {'Preface', 'Introduction', 'Foreword', 'Afterword', 'CoverArt',
                   'Photographer', 'Translator', 'Narrator', 'Reader', 'Compiler',
                   '寄稿', 'イラスト', '翻訳', 'OtherContributor', 'Colorist', 'Penciller',
                   'Letterer', 'Inker'}

KNOWN_ROLES = DESIRED_ROLES | UNDESIRED_ROLES


def lib_locale(library_id):
    locale = library_id.rpartition(LIBRARY_ID_SEPERATOR)[0]

    if locale not in WEB_HOSTS:
        raise ValueError('Invalid Amazon library id (country): "%s"' % locale)

    return locale


def lib_collection(library_id):
    collection = library_id.rpartition(LIBRARY_ID_SEPERATOR)[2]

    if collection not in COLLECTIONS:
        raise ValueError('Amazon library id (collection) may only be %s, found: "%s"' % (
            ' or '.join(list(COLLECTIONS)), collection))

    return collection


class ResultSet(object):
    def __init__(self):
        self.total_pages = None
        self.pages_have = set()

    def pages_needed(self):
        return set(range(1, self.total_pages + 1)) - self.pages_have


class Amazon(SearchableLibrary):
    # Amazon.com

    id = 'ak'
    name = 'Amazon'
    formats_supported = {FORMAT_KINDLE_BOOK}

    is_amazon = True

    ADD_HEADERS = [
        # Chrome Windows

        #:method: GET
        #:authority: www.amazon.com
        #:scheme: https
        #:path: /dp/B001CN8DRQ

        ('sec-ch-ua', '"Chromium";v="88", "Google Chrome";v="88", ";Not A Brand";v="99"'),
        ('sec-ch-ua-mobile', '?0'),
        ('upgrade-insecure-requests', '1'),
        #('user-agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.150 Safari/537.36'),
        ('accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,'
            '*/*;q=0.8,application/signed-exchange;v=b3;q=0.9'),
        ('sec-fetch-site', 'none'),
        ('sec-fetch-mode', 'navigate'),
        ('sec-fetch-user', '?1'),
        ('sec-fetch-dest', 'document'),
        ('accept-encoding', 'gzip, deflate, br'),
        ('accept-language', 'en-US,en;q=0.9'),
        ]

    OLD_ADD_HEADERS = [
        # Chrome Windows
        ('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'),
        ('Upgrade-Insecure-Requests', '1'),
        ('Accept-Language', 'en-US,en;q=0.8'),
        ('Pragma', 'no-cache'),
        ('Cache-Control', 'no-cache'),
        ('Connection', 'keep-alive'),
        ]

    @staticmethod
    def validate_library_id(library_id, migrate=True, config=None):
        library_id = library_id.lower()

        locale = lib_locale(library_id)             # check country
        collection = lib_collection(library_id)     # check collection

        if (locale, collection) not in LOCAL_COLLECTION_RH_FEATURES:
            raise ValueError('Invalid Amazon locale-collection combination: "%s"' % library_id)

        if locale:
            return '%s-%s' % (locale, collection)

        return collection

    @staticmethod
    def validate_book_id(book_id, library_id):
        # book id for Amazon is 10-character alphanumeric ASIN
        if not re.match(r'^([0-9A-Za-z]{10})$', book_id):
            raise ValueError('Amazon book id must be 10 alphanumberic characters: "%s"' % book_id)

        return book_id

    @staticmethod
    def book_url(library_id, book_id):
        return 'https://%s/dp/%s' % (WEB_HOSTS[lib_locale(library_id)], book_id)

    @staticmethod
    def book_key_library_id(library_id):
        return lib_locale(library_id)     # book ids differ between countries for Amazon

    @staticmethod
    def amazon_ident(library_id):
        locale = lib_locale(library_id)
        if locale:
            return IDENT_AMAZON + '_' + locale

        return IDENT_AMAZON         # us - no locale

    @staticmethod
    def supports_purchase(library_id):
        return lib_collection(library_id) in PURCHASABLE_COLLECTIONS

    def __init__(self):
        self.use_cookies = False    # having cookies enabled causes Amazon to give more consistent results, but increases chance of captcha
        self.cookiejar = http.cookiejar.CookieJar()

        self.signed_in = False
        self.item_cache = {}

    def sign_in(self, use_credentials):
        self.locale = lib_locale(self.library_id)
        self.collection = lib_collection(self.library_id)

        self.web_host = WEB_HOSTS[self.locale]

    def open_amazon_url(self, url=None, book_id=None, expect_errors=[], qps=OVERALL_MAX_QPS):
        retries = 1
        amazon_user_agent = AmazonUserAgent(self.log)

        while True:
            if book_id:
                url = self.book_url(self.library_id, book_id)

            if not self.use_cookies:
                self.cookiejar.clear()  # flush cookies

            response = open_url(self.log, url, qps=qps, cookiejar=self.cookiejar,
                                addheaders=self.ADD_HEADERS, expect_errors=expect_errors + [503],
                                uagent=AMAZON_UAGENTS[amazon_user_agent.read() % len(AMAZON_UAGENTS)])

            if response.is_httperror_exception:
                if response.code == 503:
                    self.log.info('Amazon service unavailable')
                else:
                    break
            elif ('<h4>Enter the characters you see' not in response.data_string and
                    '<h4>Type the characters you see' not in response.data_string):
                break
            elif examine_response is not None:
                if examine_response(self, url, expect_errors, response):
                    self.use_cookies = True
                    continue

            user_agent_sequence = amazon_user_agent.read() + 1
            amazon_user_agent.write(user_agent_sequence)

            if examine_response is not None:
                self.log.warn('Switching Amazon user agent to %d' % user_agent_sequence)

            delay_sec = THROTTLE_DELAY * retries
            self.log.info('Delaying %d seconds due to throttling' % int(delay_sec))
            time.sleep(delay_sec)

            self.use_cookies = False
            retries += 1

        return response

    def find_books(self, books, search_author, search_title, keyword_search):
        page_num = 1
        results_processed = 0
        RESULTS_PER_PAGE = 16
        MAX_RESULTS_ALLOWED = 500
        search_language = ''

        while (True):
            data = collections.OrderedDict()
            rh = collections.OrderedDict()

            data["i"] = "digital-text"  # Kindle e-book

            #data["bbn"] = LOCALE_MAIN_BROWSE_BIN[self.locale]      # redundant, but does not seem to hurt
            rh["n"] = LOCALE_MAIN_BROWSE_BIN[self.locale]          # browse node (multiple allowed as separate "n:###" entries)

            if keyword_search:
                data['k'] = double_quote(search_title)    # double quote grouping only works for keywords, not author

            if search_author:
                rh['p_27'] = search_author

            if search_title and not keyword_search:
                rh['p_28'] = search_title

            # p_20 is language, p_30 is publisher

            for k, v in LOCAL_COLLECTION_RH_FEATURES[(self.locale, self.collection)].items():
                rh[k] = v

            if self.locale == LOCALE_USA and self.config.search_language in USA_SEARCH_LANGUAGES:
                #rh['p_20'] = self.config.search_language
                rh['p_n_feature_nine_browse-bin'] = USA_SEARCH_LANGUAGES[self.config.search_language]   # not working?
                search_language = self.config.search_language

            if len(rh) > 0:
                data['rh'] = ','.join('%s:%s' % i for i in rh.items())

            data['sort'] = 'price-asc-rank'     # low to high
            data['unfiltered'] = '1'            # don't filter out less relevant results
            data['fap'] = '1'                   # allow adult content

            if page_num > 1:
                data['page'] = "%d" % page_num

            response = self.open_amazon_url(url='https://%s/s?%s' % (self.web_host, urllib.parse.urlencode(data)))

            # Parse the html results for analysis
            soup = beautiful_soup_fix(response.data_string)
            soup_text = text_only(soup)

            if 'did not match any products' in soup_text:
                break

            result_count_h = soup.find('span', attrs={"data-component-type": "s-result-info-bar"})
            if result_count_h:
                # 1 result for Prime Eligible : Books : Kindle Edition : English : "john brunner"
                # 1–12 of 23 results for Prime Eligible : Books : Kindle Edition : English : "kurt vonnegut"
                # Showing results for Kindle Store : Kindle Unlimited : "stephen baxter" : English
                result_count_text = text_only(result_count_h)
                result_count_text = result_count_text.partition("Sort by:")[0].strip()

                if not result_count_text:
                    total_results = None    # probably zero results
                else:
                    # 7/2020 - sometimes Amazon does not return the category that was searched for, assume OK for now
                    for name in COLLECTION_NAMES.get(self.collection, []):
                        if name not in result_count_text:
                            self.log.info("result_count_text: %s" % result_count_text)
                            break

                    result_count = re.sub('[-–‒—―]', ' - ', result_count_text.lower()).split()   # various dashes to simple dash

                    if re.search("over [0-9],000 results", result_count_text):
                        total_results = 9999
                        first_result = 1

                    elif len(result_count) >= 1 and result_count[0] == "showing":
                        total_results = None

                    elif len(result_count) >= 1 and result_count[0] == "results" and result_count[1] == "for":
                        total_results = None

                    elif len(result_count) >= 5 and result_count[1] == '-' and result_count[3] == "results" and result_count[4] == "for":
                        total_results = None

                    elif len(result_count) >= 1 and (result_count[1] == 'results' or result_count[1] == 'result'):
                        first_result = 1
                        if result_count[0] == 'one':
                            total_results = 1
                        else:
                            total_results = int(result_count[0].replace(',', ''))

                    elif len(result_count) >= 5 and result_count[1] == '-' and result_count[3] == 'of' and \
                            (result_count[5] == 'results' or result_count[5] == 'result'):

                        first_result = int(result_count[0].replace(',', ''))
                        total_results = int(result_count[4].replace(',', ''))

                    else:
                        self.log_warn_debug('Unexpected result count: %s' % result_count_text)
                        total_results = None
            else:
                self.log.error("Failed to locate result count")
                #print(html.escape(soup.prettify(), quote=False))
                total_results = None

            if total_results is not None:
                total_pages = ((total_results - 1) // RESULTS_PER_PAGE) + 1  # floor division
                self.log.info('Response: page %d of %d. %d total results' % (page_num, total_pages, total_results))

                if total_results > MAX_RESULTS_ALLOWED:
                    return True

                expected_first_result = ((page_num - 1) * RESULTS_PER_PAGE) + 1
                if first_result != expected_first_result:
                    raise LibraryError('Unexpected first result %d instead of %d' % (first_result, expected_first_result))
            else:
                break

            book_elems = []

            s_search_results = soup.find('span', attrs={'data-component-type': 's-search-results'})
            if s_search_results:
                book_elems.extend(s_search_results.findAll('div', attrs={
                        'data-component-type': 's-search-result', 'data-asin': re.compile('^B[0-9A-Z]{9}$'),
                        'data-index': re.compile('^[0-9]+$')}))
            else:
                for result_type in ['atfResults', 'btfPreResults', 'btfResults']:
                    xtf_results = soup.find('div', attrs={'id': result_type})
                    if xtf_results:
                        book_elems.extend(xtf_results.findAll('div', attrs={'id': re.compile('^result_[0-9]+')}))
                        book_elems.extend(xtf_results.findAll('li', attrs={'id': re.compile('^result_[0-9]+')}))
                    elif result_type == 'atfResults':
                        self.log.error('Missing %s' % result_type)

            hero_results = len(soup.findAll('div', attrs=class_contains('sx-hero-container'), recursive=True))
            page_results = 0

            for book_elem in book_elems:
                kindle_edition = False
                kindle_unlimited = False
                amazon_prime = False
                voice = False

                available = False
                purchasable = False
                title = ''
                authors = []
                series = ''
                series_index = 0
                language = search_language

                if book_elem.get("data-index"):     # has_key in bs3, has_attr in bs4, get works in both
                    result_num = int(book_elem['data-index'])
                    expected_result_num = page_results
                elif book_elem.get("id"):
                    result_num = int(book_elem['id'][7:])     # "result_123"
                    expected_result_num = first_result - 1 + page_results
                else:
                    self.log.info("book_elem: %s" % book_elem)
                    raise LibraryError('Missing data-index/id')

                if result_num != expected_result_num:
                    if result_num == 0:
                        break   # restart of result counting indicates a new result table, possibly suggested books

                    # missing results may occur due to adult content filtering
                    # results occasionally do not start at 0 for some authors, eg. C. S. Lewis
                    #self.log_warn_debug('Expected result %d, found %d' % (expected_result_num, result_num))

                if book_elem.get("data-asin"):
                    book_id = book_elem["data-asin"]
                elif book_elem.get("name"):
                    book_id = book_elem["name"]
                else:
                    raise LibraryError('Missing data-asin/name')

                book_text = text_only(book_elem)
                new_aps = book_elem.find('h3', attrs={'class': "newaps"})

                title_elem = book_elem.find('a', attrs={'class': "title"})

                if (not title_elem) and new_aps:
                    title_elem = new_aps.find('span', attrs={'class': "lrg bold"})

                if not title_elem:
                    title_elem = book_elem.find('h2', attrs=class_contains('s-access-title'))

                if title_elem:
                    title_plus = text_only(title_elem).strip()

                    if title_plus.startswith("[Sponsored]"):
                        page_results += 1
                        continue    # not a real result

                    title_words = title_plus.split()    # look for "(German Edition)" suffix
                    if len(title_words) > 2 and title_words[-1] == 'Edition)' and title_words[-2][0] == '(':
                        lang = title_words[-2][1:]
                        if lang in ALL_LANGUAGE_NAMES:
                            language = lang
                            title_plus = ' '.join(title_words[:-2])

                    if '...' not in title_plus:
                        title = normalize_title(title_plus)  # not truncated

                author_elem = book_elem.find('span', attrs={'class': "ptBrand"})

                if (not author_elem) and new_aps:
                    author_elem = new_aps.find('span', attrs={'class': "med reg"})

                if not author_elem:
                    author_elem = book_elem.find('div', attrs={'class': "a-row a-spacing-none"})

                if author_elem:
                    author = text_only(author_elem)
                    if ' (' in author:
                        author = author.partition(' (')[0]   # " (Feb 1, 2011) - Kindle eBook"

                    if (re.match(r'^by [a-zàáâãäåāăąèéêëēĕėęěìíîïĩīĭįıòóôõöōŏőơùúûüũūŭůűųñńņňýÿƴçćĉċč .-]+$', author.lower()) and
                            (' and ' not in author)):
                        authors = [normalize_author(author[3:])]  # simple case of single author
                    else:
                        # Cannot parse: "by Isaac Asimov, Philip K. Dick, John Gregory Betancourt and Milton Lesser"
                        pass

                series_li = book_elem.find('li', attrs={'class': "seriesInfo"})
                if series_li:
                    # Book 3 of 4 in the Space Odyssey Series
                    series_text = text_only(series_li)
                    split_text = series_text.split()
                    if len(split_text) > 6 and split_text[0] == 'Book' and split_text[2] == 'of' and \
                            split_text[4] == 'in' and split_text[5] == 'the':

                        series = ' '.join(split_text[6:-1 if split_text[-1] == 'Series' else None])
                        series_index = int(split_text[1])

                # book type flags change over time and are often not present in search results

                if not kindle_edition:
                    for a in book_elem.findAll('a'):
                        if a.get("title", "") == "Kindle Edition" or "s=digital-text" in a.get("href", ""):
                            kindle_edition = True
                            break

                if not kindle_edition:
                    for type_td in book_elem.findAll('td', attrs={'class': re.compile("tpType")}):
                        if 'Kindle Edition' in text_only(type_td):
                            kindle_edition = True
                            break

                if not kindle_edition:
                    for kindle_tag in [
                            'Auto-delivered wirelessly', 'Whispersync for Voice-ready', 'Print Price',
                            'Subscribers read for free.', 'Kindle Purchase', 'Kindle Edition', 'Kindle eBook']:
                        if kindle_tag in book_text:
                            kindle_edition = True
                            break

                if book_elem.find('span', attrs=class_contains('sprPrime')):
                    amazon_prime = True     # seems only present if signed in

                if book_elem.find('span', attrs=class_contains('sprKindleUnlimited')) or \
                        book_elem.find('span', attrs=class_contains('s-icon-kindle-unlimited')) or \
                        "Subscribers read for free." in book_text:
                    kindle_unlimited = True

                if 'Whispersync for Voice-ready' in book_text:
                    voice = True

                # assume results will match what we a looking for
                purchasable = (self.collection in PURCHASABLE_COLLECTIONS)
                available = (not purchasable)

                lbook = LibraryBook(
                        authors=authors, title=title, formats=set(),
                        language=language, series=series, series_index=series_index,
                        available=available, purchasable=purchasable, lib=self, book_id=book_id,
                        search_author=search_author)

                flags = ('k' if kindle_edition else '') + ('p' if amazon_prime else '') + \
                        ('u' if kindle_unlimited else '') + ('v' if voice else '')

                self.log.info('Found (%s) %s' % (flags, repr(lbook)))
                books.add(lbook)

                page_results += 1
                results_processed += 1

            if page_num < total_pages:
                expected_results = RESULTS_PER_PAGE
            else:
                expected_results = total_results - ((page_num - 1) * RESULTS_PER_PAGE)

            if (page_results + hero_results) != expected_results and page_results != expected_results:
                self.log_warn_debug('Expected %s but found %d actual and %d hero' % (
                            value_unit(expected_results, 'page result'), page_results, hero_results))

            page_num += 1

            if page_num > total_pages:
                break

        return False

    def log_warn_debug(self, msg):
        if DEBUG_MODE:
            self.log.warn(msg)
        else:
            self.log.info(msg)

    def get_book_info(self, book_id, cache):
        MAX_RETRIES = 4
        retries = 0

        while (True):

            response = self.open_amazon_url(book_id=book_id)

            # Parse page

            authors = []
            title = ''
            language = ''
            publisher = ''
            pubdate = None
            isbn = ''
            series = ''
            series_index = 0.0
            formats = {FORMAT_KINDLE_BOOK}

            soup = beautiful_soup_fix(response.data_string)

            alert_div = soup.find('div', attrs={'class': 'a-alert-content'})
            if alert_div:
                alert_text = text_only(alert_div)
                # "Please retry" is the default message when there is no real alert -- ignore

                if (alert_text and alert_text != '"Please retry"' and ('Thank you for your feedback' not in alert_text) and
                        alert_text != 'This item has a maximum order quantity limit.' and
                        alert_text != 'Sorry, there was a problem saving your cookie preferences. Try again.' and
                        (not alert_text.startswith("Added to")) and
                        (not alert_text.startswith("You've subscribed to"))):
                    self.log.info('Alert: %s' % alert_text)

                    if 'Please reload this page' in alert_text:
                        # We're sorry, an error has occurred. Please reload this page and try again.

                        retries += 1
                        if retries > MAX_RETRIES:
                            raise LibraryError('Repeating alert: %s' % alert_text)

                        time.sleep(30)  # delay before retry to give temporary problems time to clear
                        continue

            break

        if (
                soup.find('div', attrs=class_contains('series-header')) or
                soup.find('div', attrs={"id": "series-common-atf"}) or
                soup.find('h1', attrs=class_contains('aos-collection-title'))):     # Amazon original stories collection (B07VST6B9S)
            # page representing a series, rather than an individual book
            self.log.info('%s at %s is a series, not a book' % (book_id, self.name))
            return None

        title_div = soup.find('div', attrs={'id': 'titleblock_feature_div'})
        if title_div is None:
            title_div = soup.find('div', attrs={'id': 'titleSection'})

        if title_div:
            # new Kindle book web page format starting 04/2020
            title_span = must_find(title_div, 'span', attrs={'id': re.compile('productTitle|ebooksProductTitle')})
            title = normalize_title(text_only(title_span))

            byline_div = must_find(soup, 'div', attrs={'id': 'bylineInfo_feature_div'})
            bylines = byline_div.findAll('span', attrs={'class': re.compile('contribution')})
        else:
            title_div = soup.find('div', attrs={'id': 'booksTitle'})
            if title_div:
                # new Kindle book web page format starting 05/2015
                title_span = must_find(title_div, 'span', attrs={'id': re.compile('productTitle|ebooksProductTitle')})
                title = normalize_title(text_only(title_span))

                bylines = title_div.findAll('span', attrs={'class': re.compile('contribution')})

            else:
                title_h1 = soup.find('h1', attrs={'class': re.compile('parseasinTitle')})
                if title_h1:
                    title_span = must_find(title_h1, 'span', attrs={'id': 'btAsinTitle'})
                    title = normalize_title(text_only(title_span))

                    bylines = soup.findAll('span', attrs={'class': re.compile("byLinePipe")})

                else:
                    raise LibraryError('Unable to determine Amazon title format')

        for byline in bylines:
            if byline.get('id') == "pubdatelabel":
                continue    # not an author

            role_m = re.search(r'\((.+)\)', text_only(byline).replace(' ', ''))
            if not role_m:
                self.log.warn('Failed to parse creator roles: %s' % text_only(byline))
                continue

            roles = set(role_m.group(1).split(','))

            author_tag = byline.previousSibling   # author precedes byline

            while author_tag and (getattr(author_tag, 'name', '') not in ['a', 'b', 'div', 'span']):
                author_tag = author_tag.previousSibling     # skip backward to tag containing the author name

            if author_tag:
                author = normalize_author(text_only(author_tag).strip())
            else:
                author = None

            if author and (author not in KNOWN_ROLES):
                for role in list(roles):
                    if role not in KNOWN_ROLES:
                        self.log_warn_debug('Unexpected role %s for creator %s' % (role, author))

                    if role in UNDESIRED_ROLES:
                        roles.remove(role)

                if len(roles) > 0:
                    authors.append(author)
            #else:
            #    self.log.warn('Failed to parse %s name %s' % (','.join(roles), author))

        content_div = soup.find('div', attrs={'id': 'ps-content'})
        if content_div:
            metadata_div = content_div.find('div', attrs={'id': 'bookmetadata'})
            if metadata_div:
                pub_date_input = metadata_div.find('input', attrs={'id': 'pubdate'})
                if pub_date_input:
                    #self.log.info('pubdate from div id-pubdate: %s' % pub_date_input['value'])
                    pubdate = dateutil.parser.parse(pub_date_input['value']).replace(tzinfo=dateutil.tz.tzutc())  # ISO 8601 format
                    #self.log.info('pubdate = %s' % pubdate.isoformat())

                else:
                    pub_date_span = metadata_div.find('span', attrs={'id': 'pubdatevalue'})
                    if pub_date_span:
                        #self.log.info('pubdate from span id-pubdatevalue: %s' % text_only(pub_date_span))
                        pubdate = parse_only_date(text_only(pub_date_span), assume_utc=True)
                        #self.log.info('pubdate = %s' % pubdate.isoformat())

        details = []
        product_details_table = soup.find('table', attrs={'id': 'productDetailsTable'})
        if product_details_table:
            details.extend(product_details_table.findAll('li', recursive=True))

        etextbook_bullet_list = soup.find('ul', attrs={'id': 'eTextbookBulletList'})
        if etextbook_bullet_list:
            details.extend(etextbook_bullet_list.findAll('li', recursive=True))

        for details_bullet_list in soup.findAll('ul', attrs=class_contains('detail-bullet-list'), recursive=True):
            details.extend(details_bullet_list.findAll('li', recursive=True))

        for buying_div in soup.findAll('div', attrs={'class': 'buying'}, recursive=True):
            details.extend(buying_div.findAll('li', attrs={'class': 'listItem'}, recursive=True))

        about_ebooks = soup.find('div', attrs={'id': 'aboutEbooksSection'})
        if about_ebooks:
            details.extend(about_ebooks.findAll('td', recursive=True))
            details.extend(about_ebooks.findAll('li', recursive=True))

        review_feature = soup.find('div', attrs={'id': 'reviewFeatureGroup'})
        if review_feature:
            details.extend(review_feature.findAll('span', attrs=class_contains('a-size-small'), recursive=True))

        rich_product_info = soup.find('div', attrs={'id': 'rich_product_information'})
        if rich_product_info:
            for rpi in rich_product_info.findAll('li', attrs={'role': 'listitem'}, recursive=True):
                key = rpi.find('div', attrs=class_contains('rpi-attribute-label'))
                if key:
                    key.append(' : ')

                details.append(rpi)

        series_bullet_widget_feature = soup.find('div', attrs={'id': 'seriesBulletWidget_feature_div'})
        if series_bullet_widget_feature:
            details.append(series_bullet_widget_feature)

        for detail in details:
            t = text_only(detail)
            t = t.replace('\u200e', '').replace('\u200f', '')   # drop extra chars 11/2022

            if t:
                while True:
                    key, sep, val = t.partition(':')
                    key = key.strip().lower()       # case changes 10/2020
                    val = val.strip()

                    while val.startswith(":"):
                        val = val[1:].strip()       # extra colon in language 10/2020

                    if key == 'publisher' or (key == 'sold by' and not publisher):
                        publisher = val
                        if publisher and publisher[-1] == ')':
                            publisher, sep, date = publisher.rpartition('(')
                            date = date.replace(')', '').strip()
                            if date and (not pubdate) and ('19' in date or '20' in date):
                                #self.log.info('pubdate from publisher: %s' % date)
                                pubdate = parse_only_date(date, assume_utc=True)
                                #self.log.info('pubdate = %s' % pubdate.isoformat())

                            publisher = publisher.strip()

                            if ';' in publisher:
                                publisher = publisher.partition(';')[0].strip()    # drop edition
                        break

                    if key == 'publication date':
                        if not pubdate:
                            pubdate = parse_only_date(val, assume_utc=True)
                        break

                    if key == 'language':
                        language = val
                        break

                    if key == 'isbn-10' or key == 'isbn-13' or (key == 'page numbers source isbn' and not isbn):
                        isbn = val.replace('-', '')
                        break

                    match = re.match(r'^(Series|Collection): (.+) \(Book ([0-9]+)\)$', t, flags=re.IGNORECASE)
                    if match:
                        series = match.group(2)
                        series_index = float(match.group(3))
                        break

                    match = re.match(r'^(Book|Chapter|Volume) ([0-9]+) of ([0-9]+) *(in|:) (.+)$', t, flags=re.IGNORECASE)
                    if match:
                        series = match.group(5)
                        series_index = float(match.group(2))
                        break

                    detail_a = detail.find('a')
                    if detail_a:
                        href = detail_a.get('href', '')
                        if ('/s/ref=series' in href) or ('?ref=series' in href):
                            detail_a_text = text_only(detail_a)
                            if detail_a_text != 'Similar books':
                                series = re.sub(r' \([0-9]+ Book Series\)', '', detail_a_text)
                                series_index = 0.0
                            break

                    if tweaks.get(TWEAK_SAVE_RESPONSES_ON_ERROR, False):
                        for msg in [
                                    r'Length$', r"Don't have a Kindle", r'Whispersync for Voice$', r'Word Wise$',
                                    r'Available only on these devices$', r'Due to its large file size$',
                                    r'Prime members can borrow$', r'Available on these devices$',
                                    r'File Size$', r'Print Length$', r'See all details',
                                    r'Sold by$', r'ASIN$', r'Text-to-Speech$', r'X-Ray$', r'Lending$',
                                    r'Amazon Best Sellers Rank$', r'#[0-9]+ in Books$', r'Format$',
                                    r'Average Customer Review$', r'#[0-9]+ in Kindle Store$',
                                    r'Amazon Bestsellers Rank$', r'Simultaneous Device Usage$',
                                    r'Optimised for larger screens$', r'ISBN-10$', r'ISBN-13$',
                                    r'Optimized for larger screens$', r'Due to large size of this book,',
                                    r'Due to large file size,', r'Enhanced Typesetting$', r'Lexile measure$',
                                    r'Matchbook Price$', r'Age Level$', r'Grade Level$', r'Reading age$',
                                    r'Audible Narration$', r'X-Ray for Textbooks$', r'Similar books$',
                                    r'Page Flip$', r'Highlight, take notes, and search in the book',
                                    r'Thousands of books are eligible,', r'Look for the Kindle MatchBook icon',
                                    r'Read the Kindle edition on any Kindle device', r'Print edition must be purchased new',
                                    r'Gifting of the Kindle edition at the Kindle MatchBook price is not available.',
                                    r'Page numbers are just like the physical edition', r'Create digital flashcards instantly',
                                    r'Kindle', r'Fire', r'Free Kindle Reading Apps',
                                    r'Use X-Ray to get the most important concepts', r'Screen Reader$',
                                    r'Audible book$', r'#[0-9]+ in', r'Edition$', r'Sticky notes$',
                                    r'In this edition, page numbers are just like the physical edition',
                                    r'.*\([0-9]+ Book Series\)', r'Part of', r'Customer Reviews$', r'Best-sellers rank$',
                                    r'#?[0-9,]+ in ', r'Best sellers rank$', r'Date First Available$',
                                    ]:
                            if re.match(msg, key, flags=re.IGNORECASE):
                                break
                        else:
                            self.log.warn('unexpected detail: %s = %s from %s' % (repr(key), repr(val), repr(t)))

                    break

        if re.search(r'book [0-9]+', series, flags=re.IGNORECASE) or series == 'With Active Table of Contents':
            self.log.info('Ignoring incorrect series name: %s' % series)
            series = ''
            series_index = 0.0

        if series:
            series = re.sub(r' \([0-9]+ Book Series\)$', '', series)    # drop optional series count
            #self.log.info('found series "%s" [%02d]' %(series, int(series_index)))

            # Try variations of how the series may have been combined with the title to remove it

            index_pat = '(%d|%s)' % (int(series_index), word_number(int(series_index)))

            for pat1 in [
                        '(The |)%s( Series| Trilogy)?(,|:|) (Book |Volume |Vol |Issue |)%s' % (re.escape(series), index_pat),
                        '(Book|Volume|Vol|Issue) %s of( The)? %s' % (index_pat, re.escape(series)),
                        '(The |)%s' % re.escape(series),
                        'Book %s' % index_pat,
                        ]:

                for pat in [
                        ': %s$' % pat1,
                        r' \(%s\)' % pat1,
                        ]:

                    #self.log.info('Try pattern: %s' % pat)
                    new_title = re.sub(pat, '', title, flags=re.IGNORECASE).strip()
                    if new_title != title:
                        break

                if new_title != title:
                    #self.log.info('shortened title from "%s" to "%s" for series "%s"' % (title, new_title, series))
                    title = new_title
                    break

            #else:
                #self.log.warn('title unchanged "%s" for series "%s" [%02d]' % (title, series, int(series_index)))

            if series.endswith(' series'):
                series = series[:-len(' series')]   # e.g.: "The Dagger and the Coin series"
                #self.log.info('shortened series to "%s"' % series)

        # drop any extraneous content that could prevent matching title (makes prior code mostly obsolete)
        while True:
            m = re.search(r' \([^()]+\)$', title)
            if (not m) or ("Edition" in m.group(0)):
                break

            title = re.sub(r' \([^()]+\)$', '', title)

        warning_msg = soup.find('div', attrs={'class': 'message warning'})
        if warning_msg:
            msg_text = text_only(warning_msg)
            self.log.info('Book warning message: ' + msg_text)

            if "book is currently unavailable" in msg_text:
                formats = set()     # not available

        no_offer_msg = soup.find('span', attrs=class_contains('no-kindle-offer-message'))
        if no_offer_msg:
            self.log.info('Book no-offer message: ' + text_only(no_offer_msg))
            formats = set()     # not available

        if self.is_expected_in_ku_but_not(soup):
            formats = set()     # not available

        return InfoBook(
            authors=authors, title=title, publisher=publisher, pubdate=pubdate,
            language=language, isbn=isbn, series=series, series_index=series_index,
            formats=formats, lib=self, book_id=book_id)

    def get_current_book_availability(self, book_id):
        response = self.open_amazon_url(book_id=book_id, expect_errors=[404])

        if response.is_httperror_exception:
            self.log.info('Book does not exist')
            return False

        soup = beautiful_soup_fix(response.data_string)

        no_offer_msg = soup.find('span', attrs=class_contains('no-kindle-offer-message'))
        if no_offer_msg:
            self.log.info('Book no-offer message: ' + text_only(no_offer_msg))
            return False    # not available

        if self.is_expected_in_ku_but_not(soup):
            return False

        release_date = None
        buy_elems = soup.findAll('div', attrs={'class': 'buying'})
        buy_elems.extend(soup.findAll('div', attrs={'id': 'deliverTo'}))
        buy_elems.extend(soup.findAll('form', attrs={'id': 'buyOneClick'}))    # 05/2015

        for buying in buy_elems:
            m = re.search(r'This title will be auto-delivered to your Kindle( and you will be charged)? on ([a-z0-9, ]+)\.',
                          text_only(buying), flags=re.IGNORECASE)
            if m:
                release_date = parse_only_date(m.group(2), assume_utc=True)
                break

        return self.calculate_wait_weeks(library_copies=True, release_date=release_date)

    def is_expected_in_ku_but_not(self, soup):
        if tweaks.get(TWEAK_DISABLE_KU_CHECK, False):
            return False

        if self.collection in {COLLECTION_UNLIMITED, COLLECTION_UNLIMITED_WITH_NARRATION} and not self.is_kindle_unlimited(soup):
            self.log.info('Book is not in Kindle Unlimited')
            return True

        return False

    def is_kindle_unlimited(self, soup):
        for div in soup.findAll('div', attrs={'id': 'tmmSwatches'}):
            for elem in div.find_all(recursive=True):
                if elem.get("aria-label") == "kindle unlimited logo":
                    return True

        for span in soup.findAll('span'):
            if (re.match(r"This title.*available with Kindle Unlimited", text_only(span)) or
                    re.match(r"^Read( and Listen)? for Free$", text_only(span))):
                return True

        for form in soup.findAll('form'):
            if re.match(r"Read for Free with Kindle Unlimited", text_only(form)):
                return True

        if soup.find('div', id='kindleunlimitedOfferAccordionRow') or soup.find('tr', attrs=class_contains('ku-price')):
            return True

        return False


def asins_with_names(asins, names):
    result = {}
    for asin in sorted(list(asins)):
        result[asin] = names.get(asin, "")

    return result
