﻿#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai

from __future__ import (unicode_literals, division, absolute_import,
                        print_function)

__license__   = 'GPL v3'
__copyright__ = '2016, John Howell <jhowell@acm.org>'
__docformat__ = 'restructuredtext en'

import re
import mechanize
import urllib
import cookielib

from calibre.ebooks.BeautifulSoup import BeautifulSoup
from calibre.utils.date import parse_only_date

from calibre_plugins.overdrive_link.book import (LibraryBook, InfoBook)
from calibre_plugins.overdrive_link.formats import (FORMAT_HOOPLA_AUDIOBOOK,
        FORMAT_HOOPLA_BOOK_READER, FORMAT_HOOPLA_COMIC_READER)
from calibre_plugins.overdrive_link.language import LANGUAGES
from calibre_plugins.overdrive_link.library import SearchableLibrary
from calibre_plugins.overdrive_link.net import (browse_url, open_url)
from calibre_plugins.overdrive_link.match import (normalize_author, normalize_title)
from calibre_plugins.overdrive_link.parseweb import (LibraryError, must_find, text_only, class_contains)
    
    
HOOPLA_HOST = 'www.hoopladigital.com'


COLLECTION_FORMAT_NAMES = {
    'ebook':  FORMAT_HOOPLA_BOOK_READER,
    'comic':  FORMAT_HOOPLA_COMIC_READER,
    'audiobook': FORMAT_HOOPLA_AUDIOBOOK,
    }


class Hoopla(SearchableLibrary):
    id = 'ho'
    name = 'Hoopla'
    formats_supported = {FORMAT_HOOPLA_AUDIOBOOK, FORMAT_HOOPLA_BOOK_READER, FORMAT_HOOPLA_COMIC_READER}
    title_used_in_search = False
    
    @staticmethod
    def validate_library_id(library_id, migrate=True, config=None):
        # allow null library id for full site without sign in
        if not (re.match(r'^([0-9]+)$', library_id) or library_id == ''):
            raise ValueError('Hoopla library id must be numeric: "%s"' % library_id)
                
        return library_id
         
    @staticmethod
    def validate_book_id(book_id, library_id):
        if not re.match(r'^([0-9]+)$', book_id):
            raise ValueError('Hoopla book id must be numeric: "%s"' % book_id)
            
        return book_id
            
    @staticmethod
    def book_url(library_id, book_id):
        return 'https://%s/title/%s' % (HOOPLA_HOST, book_id)
        
    def __init__(self):
        self.cookiejar = cookielib.CookieJar()
        

    def sign_in(self, use_credentials):
        if self.card_number and use_credentials:
            # sign in to library to produce selective results
            self.signin_required = True
        
            self.log.info('Signing in to %s' % self.name)
            
            br = mechanize.Browser()
            br.set_cookiejar(self.cookiejar)
            browse_url(self.log, br, mechanize.Request('https://%s/login' % HOOPLA_HOST))

            # Select the Sign in form
            br.select_form(predicate=lambda f: f.attrs.get('action','').startswith('/login')) # Exception if not found
            
            # User credentials
            br.form['email'] = self.card_number
            br.form['password'] = self.card_pin
                    
            # Login
            page = browse_url(self.log, br, None)

            if 'Invalid email address or password' in page: 
                raise LibraryError('Sign in failed. Check email address (card number) and password (PIN).')

            if 'Sign Out' not in page: 
                raise LibraryError('Sign in unsuccessful.')
            
            if not self.library_id:
                url = 'https://%s/profile' % HOOPLA_HOST
                response = open_url(self.log, url, cookiejar=self.cookiejar)
                soup = BeautifulSoup(response.data, convertEntities=BeautifulSoup.HTML_ENTITIES)
                
                library_select = must_find(soup, 'select', attrs={'name': 'patron.libraryId'})
                libraries = {}
                
                for opt in library_select.findAll('option'):
                    libraries[text_only(opt)] = opt['value']
                    if opt.has_key('selected'):
                        # found proper library id
                        raise Exception('Library ID required - Configure %s with library ID "%s" for %s' % (
                            self.name, opt['value'], text_only(opt)))
                 
                self.log.warn('Configure %s with the appropriate library ID:' % self.name)
                
                for library_name in sorted(libraries.keys()):
                    self.log.info('%s = %s'%(library_name, libraries[library_name]))

                raise Exception('Library ID required - View log for a list of possible IDs')
            
        
            self.log.info('Sign in to %s successful' % self.name)
            self.signed_in = True
            
        else:
            if self.library_id:
                self.signin_required = True
                raise Exception('email address (card number) must be configured for non-generic Hoopla library.')
                
    

    def find_books(self, books, search_author, search_title, keyword_search, find_recommendable):
        '''
        Search hoopla for audiobooks that match an author/title (or subsets thereof).
        '''
        
        new_books = set()

        results_processed = 0
        offset = 0
        
        PAGE_SIZE = 80
        MAX_RESULTS_ALLOWED = 500
        
        while True:
            '''
            https://www.hoopladigital.com/search?results=&q=john&kind=AUDIOBOOK
            https://www.hoopladigital.com/search?results=&sort=DEFAULT_RANK&q=john&kind=AUDIOBOOK
            https://www.hoopladigital.com/search?more=&q=john&offset=97&kind=AUDIOBOOK
            
            https://www.hoopladigital.com/search?q=kurt+vonnegut&offset=0
            https://www.hoopladigital.com/search?q=kurt+vonnegut&offset=0&kind=EBOOK
            https://www.hoopladigital.com/search?q=kurt+vonnegut&offset=0&kind=AUDIOBOOK

            https://www.hoopladigital.com/search?results=&q=kurt+vonnegut&offset=40&kind=EBOOK
            https://www.hoopladigital.com/search?results=&q=kurt+vonnegut&offset=80&kind=EBOOK
            '''
            
            data = {}
            data['q'] = search_author or search_title   # including title with author causes missed books!
            data['pageSize'] = unicode(PAGE_SIZE)
            data['offset'] = unicode(offset)
            
            #data['sort'] = 'DEFAULT_RANK'   # 'NEW_RELEASES', 'TITLE'
            
            if (FORMAT_HOOPLA_BOOK_READER in self.config.search_formats or
                        FORMAT_HOOPLA_COMIC_READER in self.config.search_formats):
                # leave out 'kind' in order to default to all since need both 'EBOOK' and 'COMIC'
                data['kind'] = ''
                pass
                
            elif FORMAT_HOOPLA_AUDIOBOOK in self.config.search_formats:
                data['kind'] = 'AUDIOBOOK'      # 'MOVIE', 'MUSIC', 'EBOOK', 'COMIC', 'TELEVISION'
                
            else:
                return False    # no formats needed
            
            url = 'https://%s/search?%s' % (HOOPLA_HOST, urllib.urlencode(data))
                
            response = open_url(self.log, url, cookiejar=self.cookiejar)
            
            # Parse the html results for analysis
            soup = BeautifulSoup(response.data, convertEntities=BeautifulSoup.HTML_ENTITIES)
            
            items = soup.findAll('div', attrs=class_contains('s-results'), recursive=True)
            for item in items:
                # Start of book info
                title = ''
                authors = []
                
                item_a = must_find(item, 'a')
                    
                # href="https://www.hoopladigital.com/title/11073039"
                # href="/title/10051962;jsessionid=300E7271F933589446269AA6B91BD31B"
                book_id = item_a['href'].rsplit('/', 1)[1].split(';')[0]
                
                book_img = item_a.find('img')
                if book_img and 'title' in book_img:
                    title,sep,author = book_img['title'].rpartition('/')
                    if sep:
                        title = normalize_title(title)
                        authors = [normalize_author(author, unreverse=False)]
                    
                if not title:
                    title_div = must_find(item, 'h2')
                    title = normalize_title(text_only(title_div))
                
                # <span>found in Audiobooks</span> Ebooks, Music, comics etc.
                formats = set()
                fmt_name = 'unknown'
                for span in item.findAll('span'):
                    span_text_split = text_only(span).lower().split()
                    if len(span_text_split) == 3 and span_text_split[0] == 'found' and span_text_split[1] == 'in':
                        fmt_name = span_text_split[2]
                        if fmt_name.endswith('s'): fmt_name = fmt_name[:-1]
                        
                        if fmt_name in COLLECTION_FORMAT_NAMES:
                            formats.add(COLLECTION_FORMAT_NAMES[fmt_name])
                            
                lbook = LibraryBook(
                    title=title, authors=authors, formats=formats,
                    available=True, lib=self, book_id=book_id, search_author=search_author)
                        
                if self.config.search_formats.isdisjoint(formats):
                    self.log.info('Ignoring (wrong format %s): %s' % (fmt_name, repr(lbook)))
                else:
                    self.log.info('Found: %s' % repr(lbook))
                    new_books.add(lbook)
                    
                results_processed += 1
                
            if len(items) == 0:
                break               # reached end of results
                
            next_a = soup.find('a', attrs=class_contains('more'))
            if not next_a:
                break               # no more results
                
            offset += len(items)
            if offset > MAX_RESULTS_ALLOWED:
                return True
            
        
        # only add found books on success since an excessive number of results is only detected after the fact 
        books.update(new_books)
        
        return False
        
    def get_book_info(self, book_id):
        response = open_url(self.log, self.book_url(self.library_id, book_id), cookiejar=self.cookiejar)
        
        soup = BeautifulSoup(response.data, convertEntities=BeautifulSoup.HTML_ENTITIES)
        
        title = ''
        authors = []
        publisher = ''
        pubdate = None
        language = ''
        
        main = must_find(soup, 'section', attrs={'class':'main-section'})
        
        h2 = main.find('h2')
        if h2:
            title = text_only(h2)
            
        h5 = main.find('h5', attrs={'class':'subheader'})
        if h5:
            subtitle = text_only(h5)
            if subtitle:
                title += ': ' + subtitle
            
        title = normalize_title(title)

        for a in main.findAll('a'):
            href = a.get('href', '')
            if href.startswith('/publisher/'):
                publisher = text_only(a)
                
                # language follows publisher
                next_s = a.parent.nextSibling
                while next_s and (getattr(next_s, 'name', '') not in ['span']):
                    next_s = next_s.nextSibling     # skip forward to tag containing the language
                
                if next_s and next_s.name == 'span':
                    language_plus = text_only(next_s).lower()     # ENGLISH (UNABRIDGED)
                    #self.log.info('language plus: %s' % language_plus)
                    for lang in LANGUAGES:
                        if lang and (lang.lower() in language_plus):
                            language = lang
                            break

            elif href.startswith('/artist/'):
                authors.append(normalize_author(text_only(a), unreverse=False))
        
        meta1 = main.find('div', attrs={'class':'title-meta text-center'})
        if meta1:
            for span in meta1.findAll('span'):
                t = text_only(span)
                #self.log.info('check span for date: %s' % t)
                if re.match(r'^((19)|(20))[0-9][0-9]$', t):
                    pubdate = parse_only_date(t, assume_utc=True)   # looks like a year
        
        return InfoBook(
            authors=authors, title=title, publisher=publisher, pubdate=pubdate,
            language=language, lib=self, book_id=book_id)

            
    def check_book_obtainable(self, book_id):
        return 0    # always available, assuming no pre-release titles
