﻿#!/usr/bin/env python
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai

from __future__ import (unicode_literals, division, absolute_import, print_function)

import re

from calibre.utils.date import parse_only_date

from calibre_plugins.overdrive_link.numbers import value_unit
from calibre_plugins.overdrive_link.book import (LibraryBook, InfoBook)
from calibre_plugins.overdrive_link.formats import (FORMAT_ADOBE_EPUB, FORMAT_ADOBE_PDF)
from calibre_plugins.overdrive_link.library import (SearchableLibrary)
from calibre_plugins.overdrive_link.net import (hostname_from_url, open_url)
from calibre_plugins.overdrive_link.author_prep import normalize_author
from calibre_plugins.overdrive_link.title_prep import normalize_title
from calibre_plugins.overdrive_link.parseweb import (LibraryError, text_only, valid_isbn, must_find, beautiful_soup)

from .python_transition import (IS_PYTHON2)
if IS_PYTHON2:
    from .python_transition import (http, repr, urllib)
else:
    import http.cookiejar
    import urllib.parse


__license__ = 'GPL v3'
__copyright__ = '2012-2025, John Howell <jhowell@acm.org>'


'''
By putting double quotes around a set of words, the exact words in that exact order without any change.

Support format (but lists an odd combo)
Adobe PDF 101
EPUB 15208
epub 953
epub. 11
pdf 1204
PDF 1139
pdf. 2
'''


def drop_dot(author):
    # authors seem to always end with a period. Drop unless following a single initial
    if re.search(r'[a-zA-Z][a-zA-Z].$', author):
        return author[:-1]

    return author


class Enki(SearchableLibrary):
    id = 'en'
    name = 'Enki'
    formats_supported = {FORMAT_ADOBE_EPUB, FORMAT_ADOBE_PDF}

    @staticmethod
    def validate_library_id(library_id, migrate=True, config=None):
        if (':' in library_id) or ('/' in library_id):
            library_id = hostname_from_url(library_id)

        if library_id.lower().endswith('.enkilibrary.org'):
            library_id = library_id[:-len('.enkilibrary.org')]  # strip suffix

        if not re.match(r'^([0-9a-zA-Z]+)$', library_id):
            raise ValueError('Enki library id must be alphanumeric: "%s"' % library_id)

        return library_id.lower()

    @staticmethod
    def validate_book_id(book_id, library_id):
        if not re.match(r'^([0-9]+)$', book_id):
            raise ValueError('Enki book id must be numeric: "%s"' % book_id)

        return book_id

    @staticmethod
    def book_url(library_id, book_id):
        return 'http://%s.enkilibrary.org/EcontentRecord/%s' % (library_id, book_id)

    def __init__(self):
        self.cookiejar = http.cookiejar.CookieJar()

    def find_books(self, books, search_author, search_title, keyword_search):
        RESULTS_PER_PAGE = 20

        page_num = 1
        total_pages = 1
        total_results = 0
        results_processed = 0

        while (page_num <= total_pages):

            data = []

            data.append(('join', 'AND'))
            data.append(('bool0[]', 'AND'))

            if search_author:
                # "John Howe" matches but not "john howe". Lower case matches without double quotes.
                data.append(('lookfor0[]', search_author))
                data.append(('type0[]', 'Author'))

            if search_title:
                data.append(('lookfor0[]', search_title))
                data.append(('type0[]', 'Keyword' if keyword_search else 'Title'))

            # advanced search supports search by formats, but not usable since variations exist such as EPUB, epub, epub.

            if self.config.search_language in ['English', 'Spanish']:
                data.append(('filter', 'language:"%s"' % self.config.search_language))

            data.append(('sort', 'relevance'))

            if page_num > 1:
                data.append(('page', "%d" % page_num))

            data.append(('view', 'list'))
            data.append(('searchSource', 'local'))
            data.append(('submit', 'Find'))

            url = 'http://%s.enkilibrary.org/Search/Results?%s' % (self.library_id, urllib.parse.urlencode(data))

            response = open_url(self.log, url, cookiejar=self.cookiejar)

            # a single result will go directly to book page
            if '/EcontentRecord/' in response.geturl():
                # could improve by parsing the book page here, but just save id for now

                # http://xxx.enkilibrary.org/EcontentRecord/14575/Home
                book_id = response.geturl().partition('/EcontentRecord/')[2].partition('/')[0]
                lbook = LibraryBook(available=True, lib=self, book_id=book_id, search_author=search_author)
                self.log.info('Found: %s' % repr(lbook))
                books.add(lbook)
                return False

            # Parse the html results for analysis
            soup = beautiful_soup(response.data_string)

            page_content = must_find(soup, 'div', attrs={'id': 'page-content'})
            main_content = must_find(page_content, 'div', attrs={'id': 'main-content'})
            result_head = must_find(main_content, 'div', attrs={'class': 'resulthead'})

            head_text = text_only(result_head)

            if "No Results Found" in head_text:
                break

            # Results summary, EG: Showing 1 - 20 of 666 query time: 0.07s
            # Showing 1 - 8 of 8 for search: 'jay bell',	 query time: 0.04s

            count_list = head_text.split()

            if (len(count_list) < 6 or count_list[0] != 'Showing' or count_list[2] != '-' or
                    count_list[4] != 'of'):
                raise LibraryError('Unexpected results header: %s' % head_text)

            first_result = int(count_list[1])
            if first_result != results_processed + 1:
                raise LibraryError('Unexpected first result %d instead of %d' % (first_result, results_processed + 1))

            new_total_results = int(count_list[5])
            if total_results and (new_total_results != total_results):
                raise LibraryError('Total results changed from %d to %d' % (total_results, new_total_results))

            total_results = new_total_results
            total_pages = ((total_results - 1) // RESULTS_PER_PAGE) + 1  # floor division

            self.log.info('Response: page %d of %d. %d total results' % (page_num, total_pages, total_results))

            for result in main_content.findAll('div', attrs={'class': 'resultsList'}, recursive=True):
                book_id = result.get('id', '').replace('record', '')

                authors = []
                title = normalize_title(text_only(must_find(result, 'a', attrs={'class': 'title'})))

                author_a = result.find('a', attrs={'href': re.compile('/Author/Home')})
                if author_a:
                    authors = [normalize_author(drop_dot(text_only(author_a)), unreverse=True)]

                # cannot parse "other authors"

                lbook = LibraryBook(
                    authors=authors, title=title, available=True, lib=self,
                    book_id=book_id, search_author=search_author)

                self.log.info('Found: %s' % repr(lbook))
                books.add(lbook)

                results_processed += 1

            page_num += 1

        if results_processed != total_results:
            raise LibraryError('Expected %s but found %d' % (value_unit(total_results, 'result'), results_processed))

        return False

    def get_book_info(self, book_id, cache):
        response = open_url(self.log, self.book_url(self.library_id, book_id), cookiejar=self.cookiejar)

        authors = []
        title = ''
        publisher = ''
        pubdate = None
        language = ''
        isbn = ''
        formats = set()

        soup = beautiful_soup(response.data_string)

        page_content = must_find(soup, 'div', attrs={'id': 'page-content'})
        side_group = must_find(page_content, 'div', attrs={'id': 'titleDetailsSidegroup'})

        label_divs = side_group.findAll('div', attrs={'class': 'sidebarLabel'}, recursive=True)
        value_divs = side_group.findAll('div', attrs={'class': 'sidebarValue'}, recursive=True)

        for label_div, value_div in zip(label_divs, value_divs):
            label = text_only(label_div)
            value = text_only(value_div)

            if label == 'Main Author:':
                authors.append(normalize_author(drop_dot(value), unreverse=True))

            elif label == 'Additional Authors:':
                pass    # cannot easily parse

            elif label == 'Published:':
                # University of California Press,  2012
                if re.search(r', +[12][0-9][0-9][0-9]$', value):
                    publisher, sep, pubdate_s = value.rpartition(',')
                    pubdate = parse_only_date(pubdate_s.strip(), assume_utc=True)
                else:
                    publisher = value

            elif label == 'Format:':
                lvalue = value.lower()

                if 'epub' in lvalue:
                    formats.add(FORMAT_ADOBE_EPUB)
                elif 'pdf' in lvalue:
                    formats.add(FORMAT_ADOBE_PDF)

            elif label == 'Language:':
                language = value

            elif label == 'ISBN:':
                isbn = valid_isbn(value)

        title = normalize_title(text_only(must_find(page_content, 'div', {'id': 'recordTitle'})))

        return InfoBook(
                authors=authors, title=title, isbn=isbn, language=language,
                publisher=publisher, pubdate=pubdate, formats=formats,
                lib=self, book_id=book_id)
