﻿#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai

from __future__ import (unicode_literals, division, absolute_import, print_function)

import json
import re

from calibre.utils.date import parse_only_date

from calibre_plugins.overdrive_link.numbers import value_unit
from calibre_plugins.overdrive_link.formats import FORMAT_AUDIBLE_AUDIOBOOK
from calibre_plugins.overdrive_link.book import (LibraryBook, InfoBook)
from calibre_plugins.overdrive_link.library import SearchableLibrary
from calibre_plugins.overdrive_link.author_prep import normalize_author
from calibre_plugins.overdrive_link.title_prep import normalize_title
from calibre_plugins.overdrive_link.net import open_url
from calibre_plugins.overdrive_link.parseweb import (LibraryError, must_find, must_findAll, text_only, class_contains, beautiful_soup)
#from calibre_plugins.overdrive_link.json import js_value

from .python_transition import (IS_PYTHON2)
if IS_PYTHON2:
    from .python_transition import (http, repr, urllib)
else:
    import http.cookiejar
    import urllib.parse


__license__ = 'GPL v3'
__copyright__ = '2012-2025, John Howell <jhowell@acm.org>'


# Locale
LOCALE_USA = ''
LOCALE_UK = 'uk'

LANGUAGE_CODES = {
    LOCALE_USA: {
        'Danish': '9178182011',
        'Dutch': '9178185011',
        'English': '9178177011',
        'French': '9178184011',
        'German': '9178183011',
        'Greek': '9178188011',
        'Italian': '9178180011',
        'Japanese': '9178187011',
        'Portuguese': '9178186011',
        'Russian': '9178179011',
        'Spanish': '9178178011',
        'Swedish': '9178181011',
        },
    LOCALE_UK: {
        'English': '5177374031',
        'Spanish': '5177375031',
        'Russian': '5177376031',
        'Italian': '5177377031',
        'Swedish': '5177378031',
        'Danish': '5177379031',
        'German': '5177380031',
        'French': '5177381031',
        'Dutch': '5177382031',
        'Portuguese': '5177383031',
        'Japanese': '5177384031',
        'Greek': '5177385031',
        },
    }


WEB_HOSTS = {
    LOCALE_USA: 'www.audible.com',
    LOCALE_UK: 'www.audible.co.uk',
    }

COLLECTION_STORE = 'store'

COLLECTIONS = {COLLECTION_STORE}

LIBRARY_ID_SEPERATOR = '-'


def lib_locale(library_id):
    locale = library_id.rpartition(LIBRARY_ID_SEPERATOR)[0]

    if locale not in WEB_HOSTS:
        raise ValueError('Invalid Audible library id (country): "%s"' % locale)

    return locale


def lib_collection(library_id):
    collection = library_id.rpartition(LIBRARY_ID_SEPERATOR)[2]

    if collection not in COLLECTIONS:
        raise ValueError('Audible library id (collection) may only be %s, found: "%s"' % (
            ' or '.join(list(COLLECTIONS)), collection))

    return collection


class Audible(SearchableLibrary):
    id = 'au'
    name = 'Audible'
    formats_supported = {FORMAT_AUDIBLE_AUDIOBOOK}

    @staticmethod
    def validate_library_id(library_id, migrate=True, config=None):
        library_id = library_id.lower()

        locale = lib_locale(library_id)             # check country
        collection = lib_collection(library_id)     # check collection

        if locale:
            return '%s-%s' % (locale, collection)

        return collection

    @staticmethod
    def book_key_library_id(library_id):
        return lib_locale(library_id)     # book ids differ between countries

    @staticmethod
    def validate_book_id(book_id, library_id):
        if not re.match(r'^([0-9A-Za-z]{10})$', book_id):
            raise ValueError('Audible book id must be 10 alphanumberic characters: "%s"' % book_id)

        return book_id

    @staticmethod
    def book_url(library_id, book_id):
        return 'https://%s/pd/%s' % (WEB_HOSTS[lib_locale(library_id)], book_id)

    @staticmethod
    def supports_purchase(library_id):
        return True

    def __init__(self):
        self.cookiejar = http.cookiejar.CookieJar()  # having cookies enabled causes Amazon to give more consistent results

    def find_books(self, books, search_author, search_title, keyword_search):
        page_num = 1
        total_pages = 1
        results_processed = 0
        RESULTS_PER_PAGE = 20
        MAX_RESULTS_ALLOWED = 500

        locale = lib_locale(self.library_id)
        language_code = LANGUAGE_CODES.get(locale, {}).get(self.config.search_language)
        search_language = self.config.search_language if language_code else ''

        while page_num <= total_pages:
            data = {}

            if search_author:
                data['author_author'] = search_author

            if keyword_search:
                data['keywords'] = search_title
            elif search_title:
                data['title'] = search_title

            if language_code:
                data['feature_six_browse-bin'] = language_code

            if page_num > 1:
                data['page'] = "%d" % page_num

            data['ipRedirectOverride'] = 'true'     # do not redirect if wrong country

            response = open_url(self.log, 'https://%s/search?%s' % (WEB_HOSTS[locale], urllib.parse.urlencode(data)),
                                cookiejar=self.cookiejar)

            # Parse the html results for analysis
            soup = beautiful_soup(response.data_string)
            soup_text = text_only(soup)

            if 'No results for the keyword.' in soup_text or 'No results by author' in soup_text:
                break

            results_summary = must_find(soup, 'span', attrs=class_contains('resultsSummarySubheading'))
            # 4,158 results

            results_summary_l = text_only(results_summary).lower().replace(",", "").split()

            if len(results_summary_l) == 2 and (results_summary_l[1] == "result" or results_summary_l[1] == "results"):
                total_results = int(results_summary_l[0])
            elif len(results_summary_l) == 6 and (results_summary_l[5] == "result" or results_summary_l[5] == "results"):
                total_results = int(results_summary_l[4])
            else:
                raise LibraryError('Unexpected resultsSummarySubheading: %s' % results_summary)

            total_pages = ((total_results - 1) // RESULTS_PER_PAGE) + 1  # floor division
            self.log.info('Response: page %d of %d. %d total results' % (page_num, total_pages, total_results))

            if total_results > MAX_RESULTS_ALLOWED:
                return True

            page_results = 0

            for book_elem in must_findAll(soup, 'li', attrs=class_contains('productListItem')):
                book_id = None
                title = ''
                authors = []

                title_elem = must_find(book_elem, 'h3', attrs=class_contains('bc-heading'))
                title = normalize_title(text_only(title_elem))

                href = must_find(title_elem, 'a')['href']
                book_id = href.rpartition("/")[2].partition("?")[0]

                if not re.match(r'^([0-9A-Za-z]{10})$', book_id):
                    raise LibraryError('Failed to parse ASIN from: %s' % href)

                author_label = book_elem.find('li', attrs=class_contains('authorLabel'))
                if author_label:
                    for a in author_label.findAll('a'):
                        authors.append(normalize_author(text_only(a)))

                lbook = LibraryBook(
                        authors=authors, title=title, formats={FORMAT_AUDIBLE_AUDIOBOOK},
                        language=search_language, purchasable=True, lib=self, book_id=book_id,
                        search_author=search_author)

                self.log.info('Found: %s' % repr(lbook))
                books.add(lbook)

                results_processed += 1
                page_results += 1

            if page_num < total_pages:
                expected_results = RESULTS_PER_PAGE
            else:
                expected_results = total_results - ((page_num - 1) * RESULTS_PER_PAGE)

            if page_results != expected_results:
                raise LibraryError('Expected %s but found %d' % (value_unit(expected_results, 'result'), page_results))

            page_num += 1

        return False

    def get_book_info(self, book_id, cache):
        response = open_url(self.log, self.book_url(self.library_id, book_id), cookiejar=self.cookiejar)

        if 'Based on your location you have been directed' in response.data_string:
            self.log.warning('Audible redirected query to a different country')

        authors = []
        title = ''
        publisher = ''
        pubdate = None
        language = ''
        series = ''
        series_index = 0.0

        soup = beautiful_soup(response.data_string)

        for script in soup.findAll('script', attrs={'type': "application/ld+json"}):
            script = str(script).replace('<script type="application/ld+json">', '').replace('</script>', '')

            try:
                script_data_list = json.loads(script)
            except Exception as e:
                self.log.info("Script: %s" % script)
                raise Exception('Failed to parse json script: %s' % repr(e))

            if not isinstance(script_data_list, list):
                script_data_list = [script_data_list]

            for data in script_data_list:
                data_type = data.get('@type', '')
                if data_type == 'Audiobook':
                    break

                if data_type == 'PodcastSeries':
                    self.log.info('This is a podcast, not an Audiobook')
                    return None
            else:
                continue

            break
        else:
            raise Exception('Missing Audiobook json script')

        for author in data.get('author', []):
            authors.append(normalize_author(author["name"]))

        if 'name' in data:
            title = normalize_title(data['name'])

        if 'publisher' in data:
            publisher = data['publisher']

        if 'datePublished' in data:
            pubdate = parse_only_date(data['datePublished'], assume_utc=True)

        if 'inLanguage' in data:
            language = data['inLanguage'].capitalize()

        for li in soup.findAll('li', attrs=class_contains('seriesLabel')):
            t = text_only(li)
            #self.log.info('Check for series: %s' % repr(t))
            match = re.match(
                r'^Series: (.+)( Series)?, Book ([0-9.]+)(, Dramatized Adaptation)?(, Part [0-9]+)?( of [0-9]+)?( Dramatized)?( Adaptation)?$',
                t, flags=re.IGNORECASE)
            if match:
                series = match.group(1)
                series_index = float(match.group(3) + ('.' + match.group(5)[7:] if match.group(5) is not None else ''))
                break

            self.log.info('Unexpected series format: %s' % repr(t))

        return InfoBook(
            authors=authors, title=title, publisher=publisher, pubdate=pubdate, language=language,
            series=series, series_index=series_index,
            formats={FORMAT_AUDIBLE_AUDIOBOOK}, lib=self, book_id=book_id)
