#!/usr/bin/env python3

import datetime
import os
import re
from urllib.parse import urlencode

from calibre.library.comments import sanitize_comments_html
from calibre.utils.cleantext import clean_ascii_chars
from lxml import etree
from lxml.html import fromstring, tostring


def html_lines_to_list(html, ignore=''):
    # text_list = html.xpath(".//text()")
    return [e for e in html.xpath(".//text()") if e != ignore]


load_translations()


class SfLeihbuchObject(object):

    @classmethod
    def root_from_url(cls, browser, url, timeout, log):
        # log.info('*** Enter SfLeihbuchObject.root_from_url().')
        # log.info('url={0}'.format(url))
        response = browser.open_novisit(url, timeout=timeout)
        raw = response.read()
        # raw = raw.decode('iso_8859_1', 'ignore')  # site encoding is now utf-8
        raw = raw.decode('utf-8', 'ignore')  # site encoding is iso-8859-1

        # Get rid of empty lines
        # filtered = str(filter(lambda x: not re.match(r'^\s*$', x), raw))
        filtered = os.linesep.join([s for s in raw.splitlines() if s.strip()])
        regex = re.compile(r'[\n\r\t]')
        filtered = regex.sub("", filtered)
        # raw = os.linesep.join([s for s in raw.splitlines() if s])
        # log.info('raw={0}'.format(filtered))  # raw contains html as text
        return fromstring(clean_ascii_chars(filtered))  # fromstring(clean_ascii_chars(raw))


class QueryBuilder(SfLeihbuchObject):
    URL = 'http://www.sf-leihbuch.de/index.cfm?'
    TYPE = None

    @classmethod
    def url_from_params(cls, params, log):
        # log.info("*** Enter QueryBuilder.url_from_params()")
        # log.info("urlencode(params, encoding='iso-8859-1')={0}".format(urlencode(params, encoding='iso-8859-1')))
        try:
            # return cls.URL + urlencode(params, encoding='iso-8859-1')
            return cls.URL + urlencode(params, encoding='utf-8')
        except UnicodeEncodeError as e:
            log.error(_('Error while encoding {0}: {1}.').format(params, e))
            encoded_params = urlencode(params, encoding='iso-8859-1', errors='replace')
            # cut the search string (? is the encoding replae char)
            encoded_params = encoded_params.split('%3F')[0][:-1]
            return cls.URL + encoded_params

    @classmethod
    def is_type_of(cls, url, log):
        regexp = re.compile(r'.*(?:buchid|\?bid)=[0].*')
        match = regexp.match(url)
        if match:
            # log.info('Match!')
            return True
        else:
            # log.info('No match!')
            return False


class BookList(QueryBuilder):
    # In sf-leihbuch.de there is no distinction between title and publication.
    # In the Web-Form there are searches for: Suchbegriff (in title / in blurb), Verlag, Autor, Serie, Grafiker,
    # Übersetzer, Jahr, Originaljahr

    TYPE = "BookList"

    @classmethod
    def url_from_title(cls, prefs, title, log):

        # log.info("*** Enter BookList.url_from_title().")
        # log.info("title={0}".format(title))

        TYPE = "BookList"

        # log.info("prefs={0}".format(prefs))

        params = {
            "page": "db/index",
            "loginok": "",
            "formsent": "",
            "bildhoehe": "500",
            "lastchoice": "",
            "lverlagid": "",
            "stichwort": title,
            "autorid": "",
            "sucheimklappentext": "1" if prefs['fuzzy_search_target'] == 'search_in_blurb' else "0",
            "serienid": "",
            "anzeigemodus": "-1",
            "grafikerid": "",
            "uebersetzerid": "",
            "jahr": "0",
            "loriginaljahr": "0",
            "buchid": "0",
        }

        # ToDo: Is it possible to grab other metadata fields than title and author for identification?
        # Question from 89jonese 01-30-2021, 05:30 PM:
        # I'm new to plugin writing (and even somewhat to Python though not to programming generally). I'm trying to
        # write a plugin that uses the series and series_index fields to help locate covers/metadata for items. I've
        # been examining the existing examples of metadata plugins, and I haven't seen any that seem to reference those
        # pieces of metadata. It looks like author and title get passed explicitly, but I'm unclear on how I should
        # reference other metadata fields in my code.
        # Sorry if this is an obvious one. Appreciate the assistance.
        # Kovid's answer:
        # IIRC only title, author and identifiers are passed into metadata plugins for identification, not series and
        # series_index.

        return cls.url_from_params(params, log)

    @classmethod
    def from_url(cls, browser, url, timeout, log):

        # log.info('*** Enter BookList.from_url().')
        # log.info('url={0}'.format(url))

        book_stubs = []

        root = cls.root_from_url(browser, url, timeout, log)  # site encoding is iso-8859-1
        # log.info('root={0}'.format(tostring(root)))

        if not root:
            # log.info('No root object from url! url={0}'.format(url))
            return []

        # Check response content
        # (a website is delivered, but perhaps with the response "Keine passenden Einträge gefunden ..."
        # Caveat: The generated web page has a syntax error in line 18, position 211: semicolon expected
        # (slideshow script in head).
        try:
            error_page = root.xpath('//*[text()[contains(.,"Keine passenden Einträge gefunden")]]')
            if error_page:
                log.error(_('No entrys found for query {0}').format(url))
                return []
        except lementTree.ParseError as e:
            log.error(_("Parsing error: {0}").format(e))
            return []

        # //*[@id="table0"]/tbody/tr
        # rows = root.xpath('//*[@id="table0"]/tbody/tr')
        # Don't trust the browser! Copied full path from chrome:
        # /html/body/div[2]/table/tbody/tr/td/table/tbody/tr[2]/td[1]/table/tbody/tr/td[2]/form/table/tbody/tr[2]/td/table/tbody/tr
        # but in the raw response, there is only on tbody tag!
        #                  /html/body/div[2]/table/tr/td/table/tr/td/table/tr/td[2]/form/table/tr[2]/td/table/tbody/tr/
        try:
            rows = root.xpath(
                '/html/body/div[2]/table/tr/td/table/tr/td/table/tr/td[2]/form/table/tr[2]/td/table/tbody/tr')
            if not rows:
                log.error(
                    'No rows object from xpath! xpath=/html/body/div[2]/table/tr/td/table/tr/td/table/tr/td[2]/form/'
                    'table/tr[2]/td/table/tbody/tr')
                return []
        except lementTree.ParseError as e:
            log.error(_("Parsing error: {0}").format(e))
            return []

        row_no = 0
        for row in rows:
            row_no = row_no + 1
            row_text = row.text_content().strip()
            row_html = tostring(row).strip()
            # log.info('row_no {0}={1}'.format(row_no, row_html))  # ToDo: convert bytes to string?
            book_stubs.append(BookDetail.stub_from_search(row, log))
            # log.info("Append stub_from_search to book_stubs.")

        log.info("Parsed books from url %r. Found %d titles." % (url, len(book_stubs)))

        return book_stubs


class Record(SfLeihbuchObject):
    URL = None

    @classmethod
    def is_type_of(cls, url, log):
        # log.info('*** Enter Record.is_type_of().')
        # log.info('url={0}'.format(url))
        # http://www.sf-leihbuch.de/index.cfm?bid=728
        # Do we have a book id greather than zero?
        regexp = re.compile(r'.*(?:buchid|\?bid)=[1-9].*')
        match = regexp.match(url)
        if match:
            # log.info('Match!')
            return True
        else:
            # log.info('No match!')
            return False


class BookDetail(Record):
    # in sf-leihbuch.de there is no distinction between title and publication

    URL = 'http://www.sf-leihbuch.de/index.cfm?bid='  # http://www.sf-leihbuch.de/index.cfm?bid=728

    EXTERNAL_IDS = {
        'DNB': ["dnb", "Deutsche Nationalbibliothek", "http://d-nb.info/"],
        'OCLC/WorldCat': ["oclc-worldcat", "Online Computer Library Center", "http://www.worldcat.org/oclc/"],
    }

    @classmethod
    def url_from_id(cls, sfldb_id):
        # log.info('*** Enter BookDetail.url_from_id().')
        return cls.URL + sfldb_id

    @classmethod
    def id_from_url(cls, url):
        if 'buchid=' in url:
            return re.search('buchid=(\d+)$', url).group(1)
        elif 'bid=' in url:
            return re.search('bid=(\d+)$', url).group(1)
        else:
            return ''

    @classmethod
    def stub_from_search(cls, row, log):

        # log.info('*** Enter BookDetail.stub_from_search().')

        # Parse the title list and fill stubs for book detail fetching

        properties = {}

        if row is None:
            log.error('BookDetail.stub_from_search(): row ist None.')
            return properties

        properties["authors"] = row.xpath('td[2]//a')[0].text_content()
        regexp = re.compile(r'(.*), (.*)')
        match = regexp.match(properties["authors"][0])
        if match:
            properties["authors"][0] = match.group(2) + ' ' + match.group(1)
        # log.info('authors={0}'.format(properties["authors"]))

        # properties["title"] = row.xpath('//*[@id="table0"]/tbody/tr/td[3]/a/b')[0].text_content()
        properties["title"] = row.xpath('td[3]/a')[0].text_content()
        # log.info('title={0}'.format(properties["title"]))

        # <a href="#" onclick="document.suchform.buchid.value=728;
        # document.suchform.submit()"><b>Türme strahlen den Tod</b></a>
        properties["book_id"] = row.xpath('td[3]/a/@onclick')[0]
        regexp = re.compile(r'document.suchform.buchid.value=([0-9.-]+)')
        match = regexp.match(properties["book_id"])
        if match:
            properties["book_id"] = match.group(1)
        else:
            properties["book_id"] = ''
        # log.info('book_id={0}'.format(properties["book_id"]))
        # There is a short link for a known book id: http://www.sf-leihbuch.de/index.cfm?bid=728
        properties["short_url"] = 'http://www.sf-leihbuch.de/index.cfm?bid=' + properties["book_id"]
        # Use full parametrized url. Cause: author pairs are not correct displayed with bid (e. g. 806)
        properties["url"] = \
            'http://www.sf-leihbuch.de/index.cfm?page=db%2Findex&loginok=&formsent=&bildhoehe=500&lastchoice=' \
            '&verlagid=&stichwort=universum&autorid=&sucheimklappentext=0&serienid=&anzeigemodus=-1&grafikerid=' \
            '&uebersetzerid=&jahr=0&originaljahr=0&buchid=' + \
            properties["book_id"]
        # log.info('url={0}'.format(properties["url"]))

        # There's no reasonm to extract the other fields in book overview list

        return properties

    @classmethod
    def from_url(cls, browser, url, timeout, log):

        # Parse a book detail page

        # log.info('*** Enter BookDetail.from_url().')
        # log.info('url={0}'.format(url))

        properties = {}
        series_titles = []
        # permalink: http://www.sf-leihbuch.de/index.cfm?bid=728
        properties["sfldb_id"] = cls.id_from_url(url)  # Get the book id from the actual url
        properties["short_url"] = 'http://www.sf-leihbuch.de/index.cfm?bid=' + properties["sfldb_id"]

        root = cls.root_from_url(browser, url, timeout, log)
        # log.info('root={0}'.format(list(root.iter())))
        # detail_cols = root.xpath('/html/body/div[2]/table/tr/td/table/tr[2]/td/table/tr/td[2]/form/table/tr[2]/td/
        # table/tr/td/table/tr[2]/td')
        # /html/body/div[2]/table/tr/td/table/tr[2]/td/table/tr/td[2]/form/table/tr[2]/td/table
        book_table_columns = root.xpath(
            '/html/body/div[2]/table/tr/td/table/tr[2]/td/table/tr/td[2]/form/table/tr[2]/td/table/tr/td')
        if not book_table_columns:
            log.error(_('No book detail table columns found!'))

        column_no = 0
        for book_table_column in book_table_columns:

            column_no = column_no + 1
            # log.info('column_no={0}'.format(column_no))
            # log.info('book_table_column={0}'.format(etree.tostring(book_table_column, encoding='iso-8859-1',
            # method='html', with_tail=False)))

            # Get all descending rows (caption and content). Note: Ignore table tag before row tag
            book_detail_fields = book_table_column.xpath('.//tr')
            if not book_detail_fields:
                # if there is only one cover image, there are no rows in column 3
                # log.info('No book_detail_fields found in column {0} with .//tr. Trying .//a'.format(column_no))
                book_detail_fields = book_table_column.xpath('.//a')
                if not book_detail_fields:
                    log.error(_('No book_detail_fields found!'))

            for book_detail_field in book_detail_fields:

                section_list = []
                section_list = book_detail_field.xpath('.//text()')
                # log.info('section_list={0}'.format(section_list))
                if len(section_list) > 1:
                    section_caption = section_list[0].strip()
                    if section_caption in ['Autor', 'Titel', 'Serie', 'Verlag', 'VerlagsNr.', 'Jahr', 'Originaltitel',
                                           'Originaljahr', 'Übersetzer', 'Titelbild', 'Titelbild 2', 'Nachdrucke', ]:
                        section_list = section_list[1:]
                    else:
                        section_caption = ''
                else:
                    section_caption = ''

                section_text = ' '.join(section_list).strip()
                # section_html = etree.tostring(book_detail_field, encoding='iso-8859-1', method='html', with_tail=False)
                section_html = etree.tostring(book_detail_field, encoding='utf-8', method='html', with_tail=False)
                # log.info('section_html={0}'.format(section_html))  # ToDo: convert bytes to string?
                # log.info('section_caption={0}'.format(section_caption))
                # log.info('section_text={0}'.format(section_text))

                try:

                    if column_no == 1:

                        if section_caption == 'Autor':
                            # <td valign="top" class="buchtitel">
                            # <a href="javascript:document.suchform.autorid.options[
                            # document.suchform.autorid.selectedIndex].value=39;document.suchform.verlagid.options[
                            # document.suchform.verlagid.selectedIndex].value='';document.suchform.buchid.options[
                            # document.suchform.buchid.selectedIndex].value=0;document.suchform.submit();"
                            # class="abuchtitel">William Brown</a>
                            # <br><span class="standard">( Ernst Hermann Richter<br>bzw.<br>Winfried Scholz<br>bzw.<br>
                            # Hans Peschke)</span>/td>
                            properties["authors"] = []
                            for a in book_detail_field.xpath('.//a'):
                                # log.info('a={0}'.format(a.text_content().strip()))
                                # Check for authors couple
                                # <td valign="top" class="buchtitel">
                                # <a href="javascript:document.suchform.autorid.options[
                                # document.suchform.autorid.selectedIndex].value=67;
                                # document.suchform.verlagid.options[
                                # document.suchform.verlagid.selectedIndex].value='';
                                # document.suchform.buchid.options[
                                # document.suchform.buchid.selectedIndex].value=0;
                                # document.suchform.submit();" class="abuchtitel">Clark Darlton</a>
                                # &amp;
                                # <a href="javascript:document.suchform.autorid.options[
                                # document.suchform.autorid.selectedIndex].value=68;
                                # document.suchform.verlagid.options[
                                # document.suchform.verlagid.selectedIndex].value='';
                                # document.suchform.buchid.options[
                                # document.suchform.buchid.selectedIndex].value=0;
                                # document.suchform.submit();" class="abuchtitel">Jesco von Puttkamer</a>
                                # <br><span class="standard">(Walter Ernsting)</span>
                                # </td>
                                # The html above comes from search form. If html is send via bid, author couples are
                                # not correct, e. g. for:
                                # buchid	autorid	        autorid2	titel
                                # 806	    Darlton Clark	68	        Das unsterbliche Universum
                                if '&' in a.text_content() or '&amp;' in a.text_content():
                                    multiple_authors = a.text_content().strip().split('&')
                                    # log.info('multiple_authors={0}'.format(multiple_authors))
                                    for single_author in multiple_autors:
                                        properties["authors"].append(single_author.strip())
                                else:
                                    properties["authors"].append(a.text_content().strip())
                            if len(properties["authors"]) == 0:
                                # log.info('len(properties["authors"])==0. Filling with section_text={0}'.format(section_text))
                                properties["authors"] = list(section_text)
                            properties["author_string"] = str(properties["authors"][0])

                            # Extract authors_alias
                            properties["authors_alias"] = []
                            for span in book_detail_field.xpath('.//span'):
                                text_line = span.text_content().strip('()').strip('=').strip()
                                # log.info('text_line={0}'.format(text_line))
                                if text_line.startswith('auch:'):
                                    text_line = text_line[5:]
                                text_line = text_line.replace('Verlagspseudonym, in vielen Fällen:', '').strip()
                                text_line = text_line.replace('Verlagspseudonym · Autor hier:', '').strip()
                                text_line = text_line.replace('Verlagspseudonym:', '').strip()
                                text_line = text_line.replace('Nachdruckpseudonym:', '').strip()
                                # log.info('text_line={0}'.format(text_line))
                                properties["authors_alias"] = [x.strip() for x in re.split(
                                    'Pseudonym von|· alias|\- alias|\- auch\:|auch\:|=|\,|bzw\.| und ', text_line)]
                                # Remove eventual duplicates from "Autor" field in table no 1
                                # log.info('properties["authors_alias"]={0}'.format(properties["authors_alias"]))
                                properties["authors_alias"] = list(dict.fromkeys(properties["authors_alias"]))
                                # log.info('properties["authors_alias"]={0}'.format(properties["authors_alias"]))
                            # Get rid of empty elements
                            properties["authors_alias"] = list(filter(None, properties["authors_alias"]))
                            if len(properties["authors_alias"]) == 0:
                                del properties["authors_alias"]
                            else:
                                properties["authors_alias"].sort()

                            # Extract author_id
                            # ToDo: Multiple authors
                            # <a href="javascript:document.suchform.autorid.options[
                            # document.suchform.autorid.selectedIndex].value=217;
                            # document.suchform.verlagid.options[document.suchform.verlagid.selectedIndex].value='';
                            # document.suchform.buchid.options[document.suchform.buchid.selectedIndex].value=0;
                            # document.suchform.submit();" class="abuchtitel">Bert Andrew</a>
                            properties["author_id"] = 0
                            regexp = re.compile(r'.*autorid\.selectedIndex%5D\.value=([0-9]+).*')
                            # match = regexp.match(section_html.decode(encoding='iso-8859-1'))
                            match = regexp.match(section_html.decode(encoding='utf-8'))
                            if match:
                                properties["author_id"] = int(match.group(1).strip())
                            # log.info('author_id={0}'.format(properties["author_id"]))

                        elif section_caption == 'Titel':
                            properties["title"] = section_list[0].strip()
                            try:
                                properties["subtitle"] = section_list[2].strip()
                            except IndexError:
                                pass

                        elif section_caption == 'Serie':
                            properties["series"] = section_text
                            # log.info('series={0}'.format(properties["series"]))
                            properties["series_index"] = 0.0
                            if properties["series"]:
                                regexp = re.compile(r'(.*)#([0-9]+).*')
                                match = regexp.match(properties["series"])
                                if match:
                                    properties["series"] = match.group(1).strip()
                                    properties["series_index"] = float(match.group(2).strip())
                            # log.info('series {0}, #{1}'.format(properties["series"], properties["series_index"]))
                            # Extract series_id:
                            properties["series_id"] = 0
                            # b'<tr>
                            # <td valign="top" align="right" bgcolor="F9EBA9">Serie</td>
                            # <td valign="top"><b><a href="javascript:
                            # document.suchform.serienid.value=53;
                            # document.suchform.autorid.options%5Bdocument.suchform.autorid.selectedIndex%5D.value=\'\';
                            # document.suchform.verlagid.options%5Bdocument.suchform.verlagid.selectedIndex%5D.value=\'\';
                            # document.suchform.buchid.options%5Bdocument.suchform.buchid.selectedIndex%5D.value=0;
                            # document.suchform.submit();">Dr. Solden (William Brown)</a>#1</b></td>
                            # </tr>'
                            # Direct link: http://www.sf-leihbuch.de/index.cfm?sid=34
                            regexp = re.compile(r'.*serienid.value=([0-9.-]+).*')
                            match = regexp.match(
                                # section_html.decode(encoding='iso-8859-1'))  # section_html.decode('iso-8859-1')
                                section_html.decode(encoding='utf-8'))  # section_html.decode('iso-8859-1')
                            if match:
                                properties["series_id"] = int(match.group(1))
                            # log.info('series_id {0}={1}'.format(properties["series_id"], properties["series"]))

                            # Get a list of all books in this serie
                            series_titles = Series.from_url(browser, 'http://sf-leihbuch.de/index.cfm?sid='
                                                            + str(properties["series_id"]), timeout, log)
                            # log.info('series_titles={0}'.format(series_titles))

                        elif section_caption == 'Verlag':
                            # <a href="javascript:document.suchform.autorid.options[
                            # document.suchform.autorid.selectedIndex].value='';
                            # document.suchform.verlagid.options[document.suchform.verlagid.selectedIndex].value=9;
                            # document.suchform.buchid.options[document.suchform.buchid.selectedIndex].value=0;
                            # document.suchform.submit();" class="abuchtitel"><b>Hönne Verlag<br>Balve<br>
                            # (Gebr. Zimmermann)</b></a>
                            regexp = re.compile(r'.*verlagid\.selectedIndex%5D\.value=([0-9.-]+).*')
                            match = regexp.match(
                                # section_html.decode(encoding='iso-8859-1'))  # section_html.decode('iso-8859-1')
                                section_html.decode(encoding='utf-8'))  # section_html.decode('iso-8859-1')
                            if match:
                                properties["publisher_id"] = int(match.group(1))
                            else:
                                properties["publisher_id"] = 0
                            text_list = html_lines_to_list(book_detail_field, 'Verlag')
                            # log.info('text_list={0}'.format(text_list))
                            properties["publisher"] = ''
                            if len(text_list) > 0:
                                properties["publisher"] = ' '.join(text_list).strip()
                                # ToDo: Extract publisher_name and publisher_location
                                # text_list=['Balowa', 'Gebrüder Zimmermann Verlag', 'Balve']

                        elif section_caption == 'VerlagsNr.':
                            properties["publishers_no"] = section_text

                        elif section_caption == 'Jahr':
                            # # We use this instead of strptime to handle dummy days and months
                            # # E.g. 1965-00-00
                            # year, month, day = [int(p) for p in section_text.split("-")]
                            # month = month or 1
                            # day = day or 1
                            # # Correct datetime result for day = 0: Set hour to 2 UTC
                            # # (if not, datetime goes back to the last month and, in january, even to december last year)
                            properties["pubdate"] = datetime.datetime(int(section_text), 1, 1, 2, 0, 0)

                        elif section_caption == 'Originaltitel':
                            text_list = html_lines_to_list(book_detail_field, 'Originaltitel')
                            # log.info('text_list={0}'.format(text_list))
                            if len(text_list) > 0:
                                properties["original_title"] = text_list[0]
                            if len(text_list) > 1:
                                properties["original_title"] = properties["original_title"] + ' ('
                            for i in range(1, len(text_list)):
                                properties["original_title"] = properties["original_title"] + text_list[i] + ' | '
                            if len(text_list) > 1:
                                properties["original_title"] = properties["original_title"][:-3] + ')'
                            # log.info('properties["original_title"]={0}'.format(properties["original_title"]))

                        elif section_caption == 'Originaljahr':
                            properties["original_year"] = section_text

                        elif section_caption == 'Übersetzer':
                            properties["translator"] = section_text

                        elif section_caption == 'Titelbild 2':
                            properties["cover_2_artist"] = section_text

                        elif section_caption == 'Nachdrucke':
                            # Put original content in comments with html formatting (has_html_comments=true)
                            properties["reprints"] = section_text + ':' \
                                                     + sanitize_comments_html(section_html[67:])

                        elif section_caption == 'Titelbild' \
                                and '\xa0Titelbild\xa0' not in section_html.decode(encoding='utf-8'):  # encoding='iso-8859-1'
                            # Last condition: Ignore caption text in resize buttons
                            properties["cover_artist"] = section_text
                            # log.info('properties["cover_artist"]={0}'.format(properties["cover_artist"]))

                        else:
                            if '\xa0Titelbild\xa0' not in section_html.decode(encoding='utf-8'): ## encoding='iso-8859-1'
                                if "notes" in properties:
                                    properties["notes"] = properties["notes"] + '\n' + sanitize_comments_html(
                                        section_text)
                                else:
                                    properties["notes"] = sanitize_comments_html(section_text)

                    if column_no == 2:
                        # Put original content in comments with html formatting (has_html_comments=true)
                        properties["comments"] = sanitize_comments_html(section_html)
                        # [ Winfried Scholz (1925-1981) · alias W. Brown (VP) · alias William Brown (VP) ·
                        # alias Winston Brown · alias Munro R. Upton (SP) ]
                        # [ Wilhelm Wolfgang Bröll (1913-1989) ]
                        # [ Verlagspseudonym · Autor hier: vermutlich Erwin Banzhaf ]
                        # [ Alexander Calhoun = Kurt C. Metz (1921-1985) ]
                        # [ Winfried Scholz (1925-1981) · alias W. Brown (VP) · alias William Brown (VP) ·
                        # alias Winston Brown · alias Munro R. Upton (SP) ]
                        if "authors_alias" not in properties:
                            properties["authors_alias"] = []
                        # log.info('text_line={0}'.format(text_line))
                        regexp = re.compile(r'.*?\[ (.*?) \].*')  # ? makes non-greedy
                        # regexp = re.compile(r'.{3,50}\[ (.*?) \].*')  # ? makes non-greedy
                        match = regexp.match(section_text)
                        if match:
                            text_line = match.group(1).strip()
                            # log.info('text_line={0}'.format(text_line))
                            text_line = text_line.replace('(Verlags) Pseudonym · Autor hier:', '').strip()
                            text_line = text_line.replace('Verlagspseudonym · Autor hier:', '').strip()
                            text_line = text_line.replace('Verlagspseudonym. Autor hier:', '').strip()
                            # log.info('text_line={0}'.format(text_line))
                            # log.info('properties["authors_alias"]={0}'.format(properties["authors_alias"]))
                            # Check for unwanted text_line contents
                            if not text_line[0].isdigit() and len(text_line) < 140 \
                                    and not text_line.startswith('Reihe') \
                                    and not text_line.startswith('Cover:') \
                                    and 'OA ' not in text_line and 'Supronyl' not in text_line:
                                properties["authors_alias"] = properties["authors_alias"] + \
                                                              [x.strip() for x in
                                                               re.split('· alias|· alias|\- alias| alias |\- auch\:'
                                                                        '|auch\:|=|\,', text_line)]
                                # Remove eventual duplicates from "Autor" field in table no 1
                                # log.info('properties["authors_alias"]={0}'.format(properties["authors_alias"]))
                                properties["authors_alias"] = list(dict.fromkeys(properties["authors_alias"]))
                                # log.info('properties["authors_alias"]={0}'.format(properties["authors_alias"]))
                        # Get rid of empty tags
                        properties["authors_alias"] = list(filter(None, properties["authors_alias"]))
                        if len(properties["authors_alias"]) == 0:
                            del properties["authors_alias"]
                        else:
                            properties["authors_alias"].sort()

                    if column_no == 3:
                        if "cover" not in properties:  # Ignore caption text in resize buttons
                            properties["cover"] = []
                        if "cover_url" not in properties:
                            properties["cover_url"] = []
                        # section_html=b'<tr>
                        # <td><a href="javascript:coverpopup(\'covers/GreyCharles_TuermeStrahlenDenTod_SU.jpg\',
                        # \'Charles%20Grey%3A%20T%C3%BCrme%20strahlen%20den%20Tod\');" title="Cover zoomen"
                        # alt="Cover zoomen">
                        # <img src="covers/GreyCharles_TuermeStrahlenDenTod_SU.jpg" height="500" name="coverimage"
                        # border="1"></a></td>
                        # </tr>'
                        # section_html=b'<tr>
                        # <td><br><a href="javascript:coverpopup(\'covers/GreyCharles_TuermeStrahlenDenTod_4x.jpg\',
                        # \'Charles%20Grey%3A%20T%C3%BCrme%20strahlen%20den%20Tod\');" title="Cover zoomen"
                        # alt="Cover zoomen">
                        # <img src="covers/GreyCharles_TuermeStrahlenDenTod_4x.jpg" height="500" name="coverimage"
                        # border="1"></a></td>
                        # </tr>'
                        try:
                            for img_src in book_detail_field.xpath('.//img/@src'):
                                # log.info('src={0}'.format(img_src))
                                properties["cover"].append('http://www.sf-leihbuch.de/' + img_src)
                                properties["cover_url"].append('http://www.sf-leihbuch.de/' + img_src)
                            # log.info('properties["cover_url"]={0}'.format(properties["cover_url"]))
                        except Exception as e:
                            log.exception(_('Error parsing cover for url: %r. Error: %r') % (url, e))

                except Exception as e:
                    log.exception(_('Error parsing section %r for url: %r. Error: %r') % (section_html, url, e))

        # log.info('All book detail tables processed.')
        additional_comments_from_source_specific_fields = ''  # Put user defined fields in comment
        for k in sorted(properties):
            if k in ['original_title', 'subtitle', 'translator', 'original_title', 'original_year', 'cover_artist',
                     'cover_2_artist', 'publishers_no', ] and properties[k] is not None:
                additional_comments_from_source_specific_fields = \
                    additional_comments_from_source_specific_fields + k + ': ' + \
                    properties[k] + '<br />'
            if k == 'authors_alias' and len(properties[k]) > 0:
                additional_comments_from_source_specific_fields = \
                    additional_comments_from_source_specific_fields + k + ': ' + \
                    " | ".join(properties[k]).strip() + '<br />'
        additional_comments_from_source_specific_fields = additional_comments_from_source_specific_fields.replace(
            '<br /><br />', '<br />')
        # log.info('additional_comments_from_source_specific_fields={0}'.format(additional_comments_from_source_specific_fields))
        # ToDo: Convert to Kovid's receipt:
        # kovidgoyal 08-21-2020, 01:18 PM	  #2
        # Metadata download plugins can only store data in builtin columns, not custom one.
        # Q:
        # Well, are there any workaround to do that? maybe i could just save the value into a txt file, then write a
        # script to load them into my column?
        # or any other method? i just want to save the rating value into my column.
        # Kovid:
        # Save it as a tag and use a custom column template to display it in another column or use search and replace
        # to copy it into another column.
        # Q:
        # that seems OK, but i'm not sure how to do that:
        # "use a custom column template to display it in another column"
        # "use search and replace to copy it into another column"
        # could you give me a demo?
        # A likekindle:
        # @likekindle - for the Skoob bookstore (Brazilian) there are two plugins, a Metadata Download plugin, and a GUI
        # plugin called Skoob Synch, you might want to have a look at those, and IIRC Goodreads might have something
        # similar. See ==>> Index of plugins

        if len(series_titles) > 0:
            series_info = '<table><thead>'
            series_info = series_info + '<br />--- ' + _('The book series "{0}" contains').format(
                properties["series"]) + ' ---'
            series_info = series_info + '</thead><tbody>'
            for series_title in series_titles:
                if series_title[0] == properties["series_index"]:
                    series_info = series_info + '<tr style="font-weight:bold;">'
                else:
                    series_info = series_info + '<tr>'
                # series index, authors, title, year
                series_info = series_info + '<td>' + series_title[0] + '</td>'
                series_info = series_info + '<td>' + series_title[1] + '</td>'
                series_info = series_info + '<td>' + series_title[2] + '</td>'
                series_info = series_info + '<td>' + series_title[4] + '</td>'
                series_info = series_info + '</tr>'
            series_info = series_info + '</tbody></table>'
            # log.info('series_info={0}'.format(series_info))
        else:
            series_info = ''
            # log.info('No series info.')

        if 'reprints' in properties:
            properties["comments"] = properties["comments"] + '<p>' + properties["reprints"] + '</p>'

        if 'notes' in properties:
            properties["comments"] = properties["comments"] + '<p>' + properties["notes"] + '</p>'

        if series_info:
            properties["comments"] = properties["comments"] + '<p>' + series_info + '</p>'

        properties["comments"] = properties["comments"] \
                                 + '<p>--- ' + _('Candidates for user defined fields') + ' ---<br />' \
                                 + additional_comments_from_source_specific_fields + '</p>'

        # log.info(_('Adding sources to comment field.'))
        if "short_url" in properties:
            properties["comments"] = properties["comments"] + '<p>---<br />' + _('Source: ') + properties["short_url"]
        else:
            properties["comments"] = properties["comments"] + '<p>---<br />' + _('Source: ') + url
        if 'author_id' in properties:
            properties["comments"] = properties["comments"] + '<br />' + _(
                'Author: ') + 'http://www.sf-leihbuch.de/index.cfm?aid=' + str(properties["author_id"])
        if 'series_id' in properties:
            properties["comments"] = properties["comments"] + '<br />' + _(
                'Series: ') + 'http://www.sf-leihbuch.de/index.cfm?sid=' + str(properties["series_id"])
        properties["comments"] = properties["comments"] + '</p>'

        properties["tags"] = []
        # Get the site meta keyword tags
        if root.xpath('//meta[@name="keywords"]'):
            meta_keywords_string = root.xpath('//meta[@name="keywords"]/@content')[0]
            properties["tags"] = [x.strip() for x in meta_keywords_string.split(',')]  # get rid of whitespace
            # log.info('properties["tags"] from meta keywords={0}'.format(properties["tags"]))
        properties["tags"].append('Leihbuch')
        # log.info('properties["tags"]={0}'.format(properties["tags"]))
        if 'subtitle' in properties:
            properties["tags"].append(properties["subtitle"])
        if 'original_year' in properties:
            properties["tags"].append(properties['original_year'])
        if 'authors_alias' in properties:
            properties["tags"] = properties["tags"] + properties['authors_alias']
        if 'translator' in properties:
            properties["tags"].append(properties['translator'])
        if 'cover_artist' in properties:
            properties["tags"].append(properties['cover_artist'])
        if 'cover_2_artist' in properties:
            properties["tags"].append(properties['cover_2_artist'])
        if 'publishers_no' in properties:
            properties["tags"].append(_('Publisher\'s book number') + ' ' + properties['publishers_no'])
        # log.info('properties["tags"]={0}'.format(properties["tags"]))
        properties["tags"] = list(filter(None, properties["tags"]))  # Get rid of empty tags
        # log.info('properties["tags"]={0}'.format(properties["tags"]))
        properties["tags"] = [x.lower() for x in properties["tags"]]  # Convert tags to lowercase
        # log.info('properties["tags"]={0}'.format(properties["tags"]))
        properties["tags"] = list(dict.fromkeys(properties["tags"]))  # Get rid of duplicate tags
        # log.info('properties["tags"]={0}'.format(properties["tags"]))

        properties["type"] = 'BookDetail'

        log.info(_('Book details processing finished.'))
        # log.info('properties={0}'.format(properties))
        return properties


class Series(Record):
    # ToDo: Put all series titles in comment

    URL = 'http://www.sf-leihbuch.de/index.cfm?sid='

    @classmethod
    def root_from_url(cls, browser, url, timeout, log):
        # log.info('*** Enter Series.root_from_url().')
        # log.info('url={0}'.format(url))
        response = browser.open_novisit(url, timeout=timeout)
        raw = response.read()
        # raw = raw.decode('iso_8859_1', 'ignore')  # site encoding is iso-8859-1
        raw = raw.decode('utf-8', 'ignore')  # site encoding is iso-8859-1
        # Parses an XML document or fragment from a string. Returns the root node
        # (or the result returned by a parser target).
        # To override the default parser with a different parser you can pass it to the parser keyword argument.
        # The base_url keyword argument allows to set the original base URL of the document to support relative Paths
        # when looking up external entities (DTD, XInclude, ...).

        # Get rid of empty lines
        # filtered = str(filter(lambda x: not re.match(r'^\s*$', x), raw))
        filtered = os.linesep.join([s for s in raw.splitlines() if s.strip()])
        # Get rid of \r \n \t
        regex = re.compile(r'[\n\r\t]')
        filtered = regex.sub("", filtered)
        # raw = os.linesep.join([s for s in raw.splitlines() if s])
        return fromstring(clean_ascii_chars(filtered))  # fromstring(clean_ascii_chars(raw))

    @classmethod
    def url_from_id(cls, series_id):
        return cls.URL + series_id

    @classmethod
    def id_from_url(cls, url):
        return re.search('(\d+)$', url).group(1)

    @classmethod
    def from_url(cls, browser, url, timeout, log):

        # log.info('*** Enter Series.from_url().')
        # log.info('url={0}'.format(url))

        series_properties = []

        root = cls.root_from_url(browser, url, timeout, log)
        # log.info('root={0}'.format(root))
        if not root:
            # log.info('No root object from url! url={0}'.format(url))
            return []

        # Don't trust the browser! Copied full path from chrome:
        # /html/body/div[2]/table/tbody/tr/td/table/tbody/tr[2]/td[1]/table/tbody/tr/td[2]/form/table/tbody/tr[2]/td/table/tbody/tr
        # but in the raw response, there is only on tbody tag!
        # /html/body/div[2]/table/      tr/td/table/      tr/   td/   table/      tr/td[2]/form/table/      tr[2]/td/table/tbody/tr
        rows = root.xpath(
            '/html/body/div[2]/table/tr/td/table/tr[2]/td[1]/table/tr/td[2]/form/table/tr[2]/td/table/tbody/tr')
        if not rows:
            log.error(
                'No rows object from xpath! xpath=/html/body/div[2]/table/tr/td/table/tr/td/table/tr/td[2]/form/table/'
                'tr[2]/td/table/tbody/tr')
            return []

        row_no = 0
        series_properties = []

        for row in rows:
            row_no = row_no + 1
            series_properties.append([])

            column_nodes = row.xpath('.//td')
            if not column_nodes:
                log.error('No column_nodes!')
            for column_node in column_nodes:
                # log.info('column_node.text_content().strip()={0}'.format(column_node.text_content().strip()))
                # row no, author, title, verlag, jahr, o-jahr, coverbild, grafiker, klappentext, serie
                series_properties[row_no - 1].append(column_node.text_content().strip())

        log.info(_("Parsed books in series %r. Found %d titles.") % (url, len(series_properties)))
        # log.info("series_properties={0}".format(series_properties))

        return series_properties

# End of objects.py
