﻿#!/usr/bin/env python
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai

from __future__ import (unicode_literals, division, absolute_import, print_function)

import base64
import lxml.html
import re

from calibre.utils.date import parse_only_date

from calibre_plugins.overdrive_link.numbers import value_unit
from calibre_plugins.overdrive_link.book import (LibraryBook, InfoBook)
from calibre_plugins.overdrive_link.formats import (FORMAT_ADOBE_EPUB, FORMAT_ADOBE_PDF)
from calibre_plugins.overdrive_link.library import SearchableLibrary
from calibre_plugins.overdrive_link.net import (open_url, hostname_from_url)
from calibre_plugins.overdrive_link.author_prep import normalize_author
from calibre_plugins.overdrive_link.title_prep import normalize_title
from calibre_plugins.overdrive_link.parseweb import (
    LibraryError, must_find, must_findall_xpath, must_find_xpath, find_xpath, text_only,
    lxml_text_only, strip_scripts, beautiful_soup)

from .python_transition import (IS_PYTHON2)
if IS_PYTHON2:
    from .python_transition import (http, repr, urllib)
else:
    import http.cookiejar
    import urllib.parse


__license__ = 'GPL v3'
__copyright__ = '2012-2022, John Howell <jhowell@acm.org>'


def find_between(s, first, last):
    try:
        start = s.index(first) + len(first)
        end = s.index(last, start)
        return s[start:end]
    except ValueError:
        return ""


def fix_title(title):
    publisher = ''
    pubdate = None

    title, matched = re.subn(r'\(Barnes & Noble[0-9a-zA-Z ]*\)', '', title)     # publisher in title
    if matched:
        publisher = 'Barnes & Noble'
        title = title.strip()

    m = re.search(r'\(([0-9]{4})\)$', title)    # pubdate in title
    if m:
        pubdate = parse_only_date(m.group(1), assume_utc=True)
        title = re.sub(r'\([0-9]{4}\)$', '', title).strip()

    title = re.sub('Classic Starts: ', '', title)     # B&N Classic Starts series precedes actual title

    return (normalize_title(title), publisher, pubdate)


def library_host(library_id):
    return library_id if '.' in library_id else '%s.freading.com' % library_id


class Freading(SearchableLibrary):
    id = 'fr'
    name = 'Freading'
    formats_supported = {FORMAT_ADOBE_EPUB, FORMAT_ADOBE_PDF}
    allow_format_merge = True   # formats are discovered as different instances of the same book key
    sign_in_affects_get_current_availability = True     # sign in needed to detect pre-release books

    @staticmethod
    def validate_library_id(library_id, migrate=True, config=None):
        if (':' in library_id) or ('/' in library_id):
            library_id = hostname_from_url(library_id)

        if library_id.lower().endswith('.freading.com'):
            library_id = library_id[:-len('.freading.com')]    # strip suffix

        if not re.match(r'^([0-9a-zA-Z]+)$', library_id):
            raise ValueError('Freading library id must be alphanumeric: "%s"' % library_id)

        return library_id.lower()

    @staticmethod
    def validate_book_id(book_id, library_id):
        # book id for freading is base64 encoded ebookId
        if not re.match(r'^([0-9a-zA-Z\+/]+={0,2})$', book_id):
            raise ValueError('Freading book id must be base64 encoded: "%s"' % book_id)

        return book_id

    @staticmethod
    def book_url(library_id, book_id):
        # may need to url encode if the book_id contains '/'?
        return 'https://%s/ebooks/details/r:download/%s' % (library_host(library_id), book_id)

    def __init__(self):
        self.cookiejar = http.cookiejar.CookieJar()

    def sign_in(self, use_credentials, only_test_form=False):
        # September 2021 sign in using email instead of library card

        self.lib_host = library_host(self.library_id)

        if not (self.card_number and use_credentials):
            return      # cannot sign in

        self.log.info('Signing in to %s' % self.name)

        self.signin_required = True
        self.cookiejar.clear()
        signin_url = 'https://%s/users/signinup' % self.lib_host

        open_url(self.log, signin_url, cookiejar=self.cookiejar)

        data = {}
        data['_method'] = 'POST'
        data['data[Signin][email]'] = self.card_number
        data['data[Signin][password]'] = self.card_pin
        data['singin'] = 'Sign In'          # spelling error in original

        response = open_url(self.log, signin_url, urllib.parse.urlencode(data), cookiejar=self.cookiejar)

        if 'Incorrect username or password.' in response.data_string or 'This is not a valid email.' in response.data_string:
            raise LibraryError('Sign in failed - Incorrect Email or Password')

        final_url = "https://%s/index" % self.lib_host
        redirect_url = response.geturl()
        #self.log.info('Redirected to %s' % redirect_url)

        if redirect_url != final_url:
            raise LibraryError('Sign in failed - Redirected to %s instead of %s' % (redirect_url, final_url))

        self.log.info('Sign in successful')
        self.signed_in = True

    def find_books(self, books, search_author, search_title, keyword_search):
        if keyword_search:
            search_keyword = search_title
            search_title = ''
        else:
            search_keyword = ''

        for search_format in self.formats_supported:
            if search_format in self.config.search_formats:
                if self.find_books_by_format(books, search_author, search_title, search_keyword, search_format):
                    return True

                # sometimes author shows up in keyword search when missed using author search at Freading
                if not keyword_search:
                    if self.find_books_by_format(books, '', search_title, search_author, search_format):
                        return True

        return False

    def find_books_by_format(self, books, search_author, search_title, search_keyword, search_format):
        page_num = 1
        total_results = 0
        results_processed = 0
        MAX_RESULTS_ALLOWED = 500

        # https://ocls.freading.com/search?keyword=&title=&author=aaronovitch&publisher=&category=&type=advanced
        # {!field f=language}English;{!field f=format}epub;{!field f=category}/0POLITICAL SCIENCE

        data = {}
        data['type'] = 'advanced'
        data['keyword'] = search_keyword
        data['title'] = search_title
        data['author'] = search_author
        data['publisher'] = ''

        filters = []

        if search_format == FORMAT_ADOBE_EPUB:
            filters.append('{!field f=format}epub')
        elif search_format == FORMAT_ADOBE_PDF:
            filters.append('{!field f=format}pdf')
        else:
            raise LibraryError('Unexpected search format ' + search_format)

        if filters:
            data['fq'] = base64.b64encode((';'.join(filters)).encode('ascii')).decode('ascii')

        next_url = 'https://%s/search?%s' % (self.lib_host, urllib.parse.urlencode(data))

        while (next_url):
            response = open_url(
                    self.log, next_url,
                    cookiejar=self.cookiejar, referer='https://%s/search/advanced' % self.lib_host,
                    addheaders=[('Origin', 'https://%s' % self.lib_host)], disable_ssl_verify=True)

            # Parse the html results for analysis
            soup = beautiful_soup(strip_scripts(response.data_string))

            if page_num == 1:
                if soup.find('p', attrs={'id': 'no-results'}):
                    self.log.info('No results')
                    return False

                all_results = must_find(soup, 'div', attrs={'id': 'all-results'})
                result_stats_container = must_find(all_results, 'div', attrs={'id': 'result-stats-container'})
                result_stats = must_find(result_stats_container, 'span', attrs={'class': 'result-stats'})
                total_results = int(text_only(result_stats).replace(',', ''))
                self.log.info(value_unit(total_results, 'total result'))

                if total_results > MAX_RESULTS_ALLOWED:
                    if (not search_author) and (not search_keyword):
                        self.log.info('Ignoring excessive results for title-only search')
                        return False

                    return True

            maskar = must_find(soup, 'div', attrs={'class': 'maskar'})
            result_lists = maskar.findAll('ul', attrs={'class': 'result-list'}, recursive=True)

            if not result_lists:
                break

            for result_list in result_lists:
                list_items = result_list.findAll('li', recursive=False)
                for item in list_items:
                    a = must_find(item, 'a')

                    href = a['href']
                    if '/r:download/' not in href:
                        raise LibraryError('missing r:download')

                    book_id = urllib.parse.unquote(find_between(href, '/r:download/', '/'))

                    title_link_a = must_find(item, 'a', attrs={'class': 'title-link'})
                    title_text = text_only(title_link_a)

                    if not title_text.endswith('...'):
                        title, publisher, pubdate = fix_title(title_text)
                    else:
                        title = ''
                        publisher = ''
                        pubdate = None

                    lbook = LibraryBook(
                        title=title, publisher=publisher, pubdate=pubdate, available=True,
                        lib=self, book_id=book_id, search_author=search_author,
                        formats=set([search_format]))

                    self.log.info('Found %s' % repr(lbook))
                    books.add(lbook)

                    results_processed += 1

            last_url = next_url
            next_url = None

            if results_processed != total_results:
                auto_scroll_nav = maskar.find('div', attrs={'class': 'autoscrollnav'})
                if auto_scroll_nav:
                    link_more = auto_scroll_nav.find('li', attrs={'class': 'link-more'})    # auto scrolling "next page" tag
                    if link_more:
                        next_a = link_more.find('a', attrs={'class': 'next'})
                        if next_a:
                            next_url = urllib.parse.urljoin(last_url, next_a['href'])
                            page_num += 1

            if next_url and (results_processed > MAX_RESULTS_ALLOWED):
                return True

        if results_processed != total_results:
            raise LibraryError('Expected %s but found %d' % (value_unit(total_results, 'result'), results_processed))

        return False

    def get_book_info(self, book_id, cache):
        response = open_url(self.log, self.book_url(self.library_id, book_id), cookiejar=self.cookiejar, disable_ssl_verify=True)

        if "Sorry, this title is no longer available." in response.data_string:
            self.log.info('This title is no longer available')
            return None

        if "This book is not available in your library." in response.data_string:
            self.log.info('This book is not available in your library')
            return None

        # Parse page for: authors, isbn, publisher

        authors = []
        title = ''
        publisher = ''
        pubdate = None
        isbn = ''

        # parse using lxml since BeautifulSoup has trouble parsing attributes containing < > characters
        html = lxml.html.fromstring(response.data_string)

        book_info = must_find_xpath(html, ".//div[@class='book-info']")
        link_download = must_find_xpath(book_info, ".//div[@class='link-download']")

        # following only present if signed in and has unused tokens
        for input in link_download.xpath(".//input"):
            name = input.get('name', '')
            value = input.get('value', '')

            if name == 'isbn':
                isbn = value

        text_info = must_find_xpath(book_info, ".//div[@class='text-info']")

        h2 = must_find_xpath(text_info, './/h2')
        title, publisher, pubdate = fix_title(lxml_text_only(h2))

        imprint_name = find_xpath(text_info, ".//a[@class='imprintName']")
        if imprint_name is not None:
            publisher = lxml_text_only(imprint_name)

        # some books don't show an author in Freading
        author_info = find_xpath(text_info, ".//ul[@class='author-info']")
        if author_info is not None:
            for author_name in must_findall_xpath(author_info, ".//a[@class='authorName']"):
                author = normalize_author(lxml_text_only(author_name), unreverse=True)
                if author:
                    authors.append(author)

        if not authors:
            authors = self.get_freading_authors_using_worldcat(book_id)

        return InfoBook(
                authors=authors, title=title, isbn=isbn, publisher=publisher, pubdate=pubdate,
                lib=self, book_id=book_id)

    def get_freading_authors_using_worldcat(self, book_id):
        '''
        Most freading books are in the catalog of worldcat.org.

        https://ocls.freading.com/ebooks/details/r:download/NTU4OTI1Mzk=
        https://www.worldcat.org/search?q=kw%3Ar%3Adownload%2FNTU4OTI1Mzk%3D

        https://www.freading.com/ebooks/details/r:download/MDAxMDE5LTk3OTI4MTU=
        https://www.worldcat.org/search?q=kw%3AMDAxMDE5LTk3OTI4MTU%3D
        '''

        url = 'https://www.worldcat.org/search?q=kw%3Ar%3Adownload%2F' + book_id.replace('=', '%3D').replace('+', '%2B').replace('/', '%2F')
        response = open_url(self.log, url)

        # Parse page for authors

        authors = []

        soup = beautiful_soup(response.data_string)

        results_table = soup.find('table', attrs={'id': 'br-table-results'})
        if results_table:
            for result_row in results_table.findAll('tr', attrs={'class': 'menuElem'}, recursive=True):
                author_div = result_row.find('div', attrs={'class': 'author'})
                if author_div:
                    for a in text_only(author_div).split(';'):
                        author = normalize_author(a, unreverse=True)

                        if author and (author not in authors):
                            authors.append(author)

        return authors

    def get_current_book_availability(self, book_id):
        release_date = None
        estimated_wait_days = None

        response = open_url(self.log, self.book_url(self.library_id, book_id), cookiejar=self.cookiejar, disable_ssl_verify=True)

        if "Sorry, this title is no longer available." in response.data_string:
            self.log.info('This title is no longer available')
            return False

        if "This book is not available in your library." in response.data_string:
            self.log.info('This book is not available in your library')
            return False

        # "Coming soon" info only shown if signed in and has not borrowed the full weekly limit of books
        if self.signed_in:
            # parse using lxml since BeautifulSoup has trouble parsing attributes containing < > characters
            html = lxml.html.fromstring(response.data_string)

            flash_message = find_xpath(html, ".//div[@id='flashMessage']")
            if flash_message is not None:
                flash_message_text = lxml_text_only(flash_message)
                self.log.info('flash message: %s' % flash_message_text)
                if 'book is not available' in flash_message_text:
                    return False

            weekly_limit_reached = False
            header = must_find_xpath(html, ".//div[@id='header']")
            for li in header.iter("li"):
                m = re.search(r'(?:Tokens Used|Weekly Limit):\s*([0-9]+)/([0-9]+)', lxml_text_only(li))      # Weekly Limit: 1/3
                if m:
                    if m.group(1) == m.group(2):
                        weekly_limit_reached = True
                    break

            book_info = must_find_xpath(html, ".//div[@class='book-info']")
            link_download = must_find_xpath(book_info, ".//div[@class='link-download']")

            # old
            popup = find_xpath(link_download, ".//div[@class='popup']")
            if popup is not None:
                popup_text = lxml_text_only(popup)
                #self.log.info('popup: %s' % popup_text)
                if popup_text.startswith('Coming on'):
                    release_date = parse_only_date(popup_text.partition('Coming on')[2], assume_utc=True)

            # 10/2016  <span style="font-weight: normal;">Coming Soon - Oct 18, 2016</span>
            for span in link_download.xpath(".//span"):
                span_text = lxml_text_only(span)
                #self.log.info('popup: %s' % popup_text)
                m = re.match(r"^Coming (?:on|soon -) ?(.+)$", span_text, flags=re.IGNORECASE)
                if m:
                    release_date = parse_only_date(m.group(1), assume_utc=True)

            if release_date is None and weekly_limit_reached:
                self.log.info('Cannot detect pre-release status because weekly limit has been reached.')
                estimated_wait_days = 7     # cannot borrow until next week

        # estimate availability
        return self.calculate_wait_weeks(library_copies=True, release_date=release_date, estimated_wait_days=estimated_wait_days)
