#!/usr/bin/env python
# -*- coding: utf-8 -*-
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import, print_function)

__license__   = 'GPL v3'
__copyright__ = '2014, Roman Cupisz <roman.cupisz+calibre@gmail.com>, 2015-2025 enhancements by Becky <becky@fr.pl>'
__docformat__ = 'restructuredtext en'

# import six
from six import text_type as unicode

# from itertools import compress

import socket
import re
import datetime
# from collections import OrderedDict
from threading import Thread

from lxml.html import fromstring
from lxml.html import tostring

from calibre.ebooks.metadata.book.base import Metadata
from calibre.library.comments import sanitize_comments_html
from calibre.utils.cleantext import clean_ascii_chars
from calibre.ebooks.metadata import check_isbn
# from calibre.utils.icu import capitalize, lower

from calibre.ebooks.metadata.sources.base import fixauthors

import mechanize
from mechanize import HTTPError

import calibre_plugins.lubimyczytac.config as cfg

import json

# import win32api

frazastrona = ["strona", "strony", "stron"]

import time
from calibre.constants import DEBUG
from calibre import prints
BASE_TIME = None

def debug_print(*args):
    global BASE_TIME
    if BASE_TIME is None:
        BASE_TIME = time.time()
    if DEBUG:
        prints('DEBUG: %6.1f'%(time.time()-BASE_TIME), *args)


class Worker(Thread):  # Get details

    '''
    Get book details from LubimyCzytac.pl book page in a separate thread
    '''

    def __init__(self, url, result_queue, browser, log, relevance, plugin, timeout=20):
        Thread.__init__(self)
        self.daemon = True
        self.url, self.result_queue = url, result_queue
        self.log, self.timeout = log, timeout
        self.relevance, self.plugin = relevance, plugin
        self.browser = browser.clone_browser()
        self.cover_url = self.lubimy_czytac_id = self.isbn = None

    def run(self):
        try:
            self.get_details()
        except:
            self.log.exception('get_details failed for url: %r'%self.url)

    def parse_first(self, root, xpath, loginfo, convert=lambda x: x[0].strip()):
        try:
            nodes = root.xpath(xpath)
            nodes_temp = strip_accents(unicode(str(convert(nodes)))) if nodes else None
            self.log.info('BECKY INFO: Found %s: %s' % (loginfo,nodes_temp))
            # self.log.info('Found %s: %s' % (loginfo,nodes))
            return convert(nodes) if nodes else None
        except Exception as e:
            self.log.exception('Error parsing for %s with xpath: %s' % (loginfo, xpath))

    def get_details(self):
        try:
            self.log.info('LubimyCzytac.pl   url: %r'%self.url)
            response = self.browser.open_novisit(self.url, timeout=self.timeout)
            raw = response.read().strip()
        except HTTPError as e:
            self.log.error(f"HTTP Error: {e}")
            if e.code == 500:
                self.log.error('Serwis LubimyCzytac domyślnie zwraca blad 500.')
                self.log.error('To nie jest problem zwiazany z wtyczka!')
                # Pobieramy zawartość odpowiedzi, nawet jeśli wystąpił błąd 500
                raw = e.read().strip()
                self.log.info("Probuje parsowac strone pomimo bledu 500")
            else:
                self.log.error("Inny blad HTTP, nie mozna pobrac danych")
                return
        except Exception as e:
            if callable(getattr(e, 'getcode', None)) and \
                    e.getcode() == 404:
                self.log.error('URL malformed: %r'%self.url)
                return
            if callable(getattr(e, 'getcode', None)) and \
                    e.getcode() == 503:
                self.log.error('Serwis LubimyCzytac jest niedostepny.')
                self.log.error('To nie jest problem zwiazany z wtyczka!')
                return
            if callable(getattr(e, 'getcode', None)) and \
                    e.getcode() == 500:
                self.log.error('Serwis LubimyCzytac domyślnie zwraca blad 500.')
                self.log.error('To nie jest problem zwiazany z wtyczka!')
                return
            attr = getattr(e, 'args', [None])
            attr = attr if attr else [None]
            if isinstance(attr[0], socket.timeout):
                msg = 'LubimyCzytac.pl timed out. Try again later.'
                self.log.error(msg)
            else:
                msg = 'Failed to make details query: %r'%self.url
                self.log.exception(msg)
            return

        raw = raw.decode('utf-8', errors='replace')
        # open('D:\\Tools\\WORKSPACE\\LC\\log_becky_worker.html', 'w').write(raw)
        '''
        f = open("D:\\Tools\\WORKSPACE\\LC\\log_becky_worker.html", "w")
        try:
            f.write(raw.rstrip('\n') )
        finally:
            f.close()
        '''

        if 'Przepraszamy, nie możemy znaleźć strony, której szukasz.' in raw:
            self.log.error('URL malformed: %r'%self.url)
            return

        try:
            root = fromstring(clean_ascii_chars(raw.encode("utf-8")))
        except:
            msg = 'Failed to parse LubimyCzytac.pl details page: %r'%self.url
            self.log.exception(msg)
            return

        self.parse_details(root)

    def parse_details(self, root):
        self.log.info('---------------------------------  PARSOWANIE  ---------------------------------')
        append_pages = cfg.plugin_prefs[cfg.STORE_NAME].get(cfg.APPEND_PAGES, cfg.DEFAULT_STORE_VALUES[cfg.APPEND_PAGES])
        append_pages_new = cfg.plugin_prefs[cfg.STORE_NAME].get(cfg.APPEND_PAGES_NEW, cfg.DEFAULT_STORE_VALUES[cfg.APPEND_PAGES_NEW])
        append_original_title = cfg.plugin_prefs[cfg.STORE_NAME].get(cfg.APPEND_ORIGINAL_TITLE, cfg.DEFAULT_STORE_VALUES[cfg.APPEND_ORIGINAL_TITLE])
        append_translators = cfg.plugin_prefs[cfg.STORE_NAME].get(cfg.APPEND_TRANSLATION, cfg.DEFAULT_STORE_VALUES[cfg.APPEND_TRANSLATION])
        append_serwyd = cfg.plugin_prefs[cfg.STORE_NAME].get(cfg.APPEND_SERWYD, cfg.DEFAULT_STORE_VALUES[cfg.APPEND_SERWYD])
        append_kategoria = cfg.plugin_prefs[cfg.STORE_NAME].get(cfg.APPEND_GENRE, cfg.DEFAULT_STORE_VALUES[cfg.APPEND_GENRE])
        append_keywords = cfg.plugin_prefs[cfg.STORE_NAME].get(cfg.APPEND_KEYWORDS, cfg.DEFAULT_STORE_VALUES[cfg.APPEND_KEYWORDS])
        append_rating = cfg.plugin_prefs[cfg.STORE_NAME].get(cfg.APPEND_RATING, cfg.DEFAULT_STORE_VALUES[cfg.APPEND_RATING])
        append_rating_counts = cfg.plugin_prefs[cfg.STORE_NAME].get(cfg.APPEND_RATING_COUNT, cfg.DEFAULT_STORE_VALUES[cfg.APPEND_RATING_COUNT])
        append_id = cfg.plugin_prefs[cfg.STORE_NAME].get(cfg.APPEND_ID, cfg.DEFAULT_STORE_VALUES[cfg.APPEND_ID])
        before_other_editions_exist = cfg.plugin_prefs[cfg.STORE_NAME].get(cfg.BEFORE_OTHER_EDITIONS_EXIST, cfg.DEFAULT_STORE_VALUES[cfg.BEFORE_OTHER_EDITIONS_EXIST])
        before_other_editions_all = cfg.plugin_prefs[cfg.STORE_NAME].get(cfg.BEFORE_OTHER_EDITIONS_ALL, cfg.DEFAULT_STORE_VALUES[cfg.BEFORE_OTHER_EDITIONS_ALL])
        before_other_editions_short = cfg.plugin_prefs[cfg.STORE_NAME].get(cfg.BEFORE_OTHER_EDITIONS_SHORT, cfg.DEFAULT_STORE_VALUES[cfg.BEFORE_OTHER_EDITIONS_SHORT])
        before_hr = cfg.plugin_prefs[cfg.STORE_NAME].get(cfg.BEFORE_HR, cfg.DEFAULT_STORE_VALUES[cfg.BEFORE_HR])
        asTags = cfg.plugin_prefs[cfg.STORE_NAME].get(cfg.TAGS_AS, cfg.DEFAULT_STORE_VALUES[cfg.TAGS_AS])
        append_first_pubdate = cfg.plugin_prefs[cfg.STORE_NAME].get(cfg.APPEND_FIRST_PUBDATE, cfg.DEFAULT_STORE_VALUES[cfg.APPEND_FIRST_PUBDATE])
        append_first_pubdate_pl = cfg.plugin_prefs[cfg.STORE_NAME].get(cfg.APPEND_FIRST_PUBDATE_PL, cfg.DEFAULT_STORE_VALUES[cfg.APPEND_FIRST_PUBDATE_PL])
        try:
            lubimy_czytac_id = self.parse_lubimy_czytac_id(self.url)
        except:
            self.log.exception('Error parsing LubimyCzytac.pl id for url: %r'%self.url)
            lubimy_czytac_id = None

        try:
            title = self.parse_title(root)
        except:
            self.log.exception('Error parsing title for url: %r'%self.url)
            title = None

        try:
            authors = self.parse_authors(root)
        except:
            self.log.exception('Error parsing authors for url: %r'%self.url)
            authors = []

        # Fix for titles with author 20250826 START
        # jeśli mamy w tytule autora to go usuwamy
        if title and authors:
            title = title.strip()
            # self.log.info('BECKY INFO: title przed poprawieniem: %s'%title)
            for author in authors:
                # self.log.info('BECKY INFO: author roboczy: %s'%author)
                if title.endswith(" - " + author):
                    # self.log.info('BECKY INFO: tak, autor jest w tytule')
                    title = title[:-(len(author) + 3)].strip()
                    break
        # self.log.info('BECKY INFO: title po poprawieniu: %s'%title)
        # Fix for titles with author 20250826 END

        try:
            translators = self.parse_translators(root)
        except:
            self.log.exception('Error parsing translators for url: %r'%self.url)
            translators = []

        if not title or not authors or not lubimy_czytac_id:
            if not title :
                self.log.error('LubimyCzytac.pl_id: %r Title: %r Authors: %r'%(lubimy_czytac_id, title, authors))
                self.log.error('Could not find LubimyCzytac.pl_id or title or authors for %r'%self.url)
            return

        mi = Metadata(title, authors)
        mi.set_identifier('lubimyczytac', lubimy_czytac_id)
        self.lubimy_czytac_id = lubimy_czytac_id

        try:
            serwyd = self.parse_serwyd(root)
        except:
            self.log.exception('Error parsing book series for url: %r'%self.url)
            serwyd = None

        try:
            (series, series_index) = self.parse_series(root)
        except:
            self.log.exception('Error parsing series (cykl) for url: %r'%self.url)
            series = series_index = None
        if series:
            mi.series = series
        if series and series_index:
            mi.series_index = series_index

        replaceISBN = cfg.plugin_prefs[cfg.STORE_NAME].get(cfg.REPLACE_ISBN, cfg.DEFAULT_STORE_VALUES[cfg.REPLACE_ISBN])
        self.log.info('BECKY INFO: Download ISBN number from LC: %s'%replaceISBN)
        if replaceISBN == 1:
            try:
                isbn = self.parse_isbn(root)
                if isbn:
                    self.isbn = mi.isbn = isbn
            except:
                self.log.exception('Error parsing ISBN for url: %r'%self.url)

        try:
            mi.rating = self.parse_rating(root)
        except:
            self.log.exception('Error parsing ratings (stars) for url: %r'%self.url)

        try:
            rating_LC = self.parse_rating_lc(root)
        except:
            self.log.exception('Error parsing ratings (number) for url: %r'%self.url)

        try:
            rating_LC_count = self.parse_rating_lc_count(root)
            # self.log.info('BECKY TEMP: Liczba ocen w LC: %s'%rating_LC_count)
        except:
            self.log.exception('Error parsing ratings counts for url: %r'%self.url)
            rating_LC_count = None

        try:
            pobrane_strony = self.parse_pages(root)
        except:
            self.log.exception('Error parsing pages for url: %r'%self.url)
            pobrane_strony = None

        try:
            tytul_oryginalu = self.parse_tytul_oryginalu(root)
        except:
            self.log.exception('Error parsing original title for url: %r'%self.url)

        try:
            first_pubdate = self.parse_first_published_date(root)
        except:
            self.log.exception('Error parsing first publication date for url: %r'%self.url)

        try:
            first_pubdate_pl = self.parse_first_published_date_pl(root)
        except:
            self.log.exception('Error parsing first publication date in Poland for url: %r'%self.url)

        try:
            inne_wydania = self.parse_inne_wydania(root)
            # self.log.info('BECKY INFO: Inne wydania: %s'%inne_wydania)
        except:
            self.log.exception('Error parsing other editions for url: %r'%self.url)

        try:
            inne_wydania_lista = self.parse_inne_wydania_all(root)
            if inne_wydania_lista:
                self.log.info('BECKY INFO: Pelna lista innych wydan:\n-----%s-----'%strip_accents(inne_wydania_lista))
        except:
            self.log.exception('Error parsing list of other editions for url: %r'%self.url)

        try:
            inne_wydania_short_lista = self.parse_inne_wydania_short(root)
            if inne_wydania_short_lista:
                self.log.info('BECKY INFO: Krotka lista innych wydan:\n-----%s-----'%strip_accents(inne_wydania_short_lista).replace('•',  '*'))
        except:
            self.log.exception('Error parsing list of other editions for url: %r'%self.url)

        try:
            tags = self.parse_tags(root)
            tags_keywords = self.parse_tags_keywords(root)
            if asTags == 0:
                if tags:
                    # Komentarz Becky: Download Genre as Tags
                    tags_ze_srednikami = [w.replace(',',  ';') for w in tags]
                    # self.log.info(tags_ze_srednikami)
                    mi.tags = tags_ze_srednikami
                    # self.log.info(mi.tags)
                    self.log.info('BECKY INFO: Do pola Tags/Etykiety dopisano kategorie')
            else:
                if tags_keywords:
                    # Komentarz Becky: Download Keywords as Tags
                    mi.tags = tags_keywords
                    self.log.info('BECKY INFO: Do pola Tags/Etykiety dopisano slowa kluczowe')
            # Beautiful tags (kategorie)
            if tags is not None:
                tags = [re.sub('[a-ząćęłńóśźżA-ZĄĆĘŁŃÓŚŹŻ]', lambda x: x.group().upper(), y, 1) for y in tags]
                tags = ''.join(tags)
            # Beautiful tags (słowa kluczowe)
            if tags_keywords is not None:
                tags_keywords = [re.sub('[a-ząćęłńóśźżA-ZĄĆĘŁŃÓŚŹŻ]', lambda x: x.group().upper(), y, 1) for y in tags_keywords]
        except:
            self.log.exception('Error parsing tags for url: %r'%self.url)

        try:
            mi.comments = self.parse_comments(root)
            if (append_pages == 1) and (pobrane_strony is not None):
                if (append_pages_new == 0):
                    mi.comments = mi.comments + '<p id="strony">Książka ma ' + pobrane_strony + odmiana(pobrane_strony, frazastrona) + '.</p>'
                else:
                    # For new METAmover private release
                    # mi.comments = mi.comments + '<p id="numberOfPages">Liczba stron w książce: ' + pobrane_strony + '</p>'
                    mi.comments = mi.comments + '<p id="numberOfPages">Liczba stron w książce: ' + pobrane_strony + '</p>'
                self.log.info('BECKY INFO: Do komentarza dolaczono liczbe stron')

            if (append_original_title == 1) and (tytul_oryginalu is not None):
                mi.comments = mi.comments + '<p id="tytul_oryginalu">Tytuł oryginału: <em>' + tytul_oryginalu + '</em></p>'
                self.log.info('BECKY INFO: Do komentarza dolaczono tytul oryginalu')

            if (append_translators == 1) and (translators is not None):
                mi.comments = mi.comments + '<p id="tlumaczenie">Tłumaczenie: ' + ', '.join(translators) + '</p>'
                self.log.info('BECKY INFO: Do komentarza dolaczono tlumaczy')

            if (append_serwyd == 1) and (serwyd is not None):
                mi.comments = mi.comments + '<p id="seria">Seria: ' + serwyd + '</p>'
                self.log.info('BECKY INFO: Do komentarza dolaczono nazwe serii wydawniczej')

            if (append_kategoria == 1) and (tags is not None):
                # tags = list(tags)
                # mi.comments = mi.comments + '<p id="kategoria">Kategoria: ' + ', '.join(tags) + '</p>'
                mi.comments = mi.comments + '<p id="kategoria">Kategoria: ' + tags + '</p>'
                self.log.info('BECKY INFO: Do komentarza dolaczono nazwe kategorii')

            if (append_keywords == 1) and (tags_keywords is not None):
                mi.comments = mi.comments + '<p id="slowa_kluczowe">Słowa kluczowe: ' + ', '.join(tags_keywords) + '</p>'
                self.log.info('BECKY INFO: Do komentarza dolaczono slowa kluczowe')

            if (append_rating == 1) and (lubimy_czytac_id is not None) and (rating_LC is not None):
                mi.comments = mi.comments + '<p id="ratingLC">Ocena w LC: ' + rating_LC + '</p>'
                self.log.info('BECKY INFO: Do komentarza dolaczono ocene z serwisu LubimyCzytac')

            if (append_rating_counts == 1) and (lubimy_czytac_id is not None) and (rating_LC_count is not None):
                mi.comments = mi.comments + '<p id="ratingLC_count">Liczba ocen w LC: ' + rating_LC_count + '</p>'
                self.log.info('BECKY INFO: Do komentarza dolaczono liczbe ocen z serwisu LubimyCzytac')

            if (append_first_pubdate == 1) and (first_pubdate is not None):
                mi.comments = mi.comments + '<p id="pierwsze_wydanie">Data pierwszego wydania: ' + first_pubdate.strftime("%d.%m.%Y") + '</p>'
                self.log.info('BECKY INFO: Do komentarza dolaczono date pierwszego wydania')

            if (append_first_pubdate_pl == 1) and (first_pubdate_pl is not None):
                mi.comments = mi.comments + '<p id="pierwsze_wydanie_pl">Data pierwszego wydania w Polsce: ' + first_pubdate_pl.strftime("%d.%m.%Y") + '</p>'
                self.log.info('BECKY INFO: Do komentarza dolaczono date pierwszego wydania w Polsce')

            if (append_id == 1) and (lubimy_czytac_id is not None):
                mi.comments = mi.comments + '<p id="lubimyczytac">ID: ' + lubimy_czytac_id + '</p>'
                self.log.info('BECKY INFO: Do komentarza dolaczono ID z serwisu LubimyCzytac')

            if (before_hr == 1) and (((before_other_editions_all == 1) and (inne_wydania_lista is not None)) or ((before_other_editions_short == 1) and (inne_wydania_short_lista is not None)) or ((before_other_editions_exist == 1) and (inne_wydania is not None))):
                mi.comments = '<hr id="przed-komentarzem">' + mi.comments

            if (before_other_editions_all == 1) and (inne_wydania_lista is not None):
                mi.comments = inne_wydania_lista + mi.comments

            if (before_other_editions_short == 1) and (inne_wydania_short_lista is not None):
                mi.comments = inne_wydania_short_lista + mi.comments

            if (before_other_editions_exist == 1) and (inne_wydania is not None):
                mi.comments = '<p>Inne wydania: <span style="color:red; font-weight: bold;">Istnieją inne wydania</span></p>' + mi.comments

        except:
            self.log.exception('Error parsing comments for url: %r'%self.url)

        try:
            mi.publisher = self.parse_publisher(root)
        except:
            self.log.exception('Error parsing publisher for url: %r'%self.url)

        try:
            mi.pubdate = self.parse_published_date(root)
        except:
            self.log.exception('Error parsing published date for url: %r'%self.url)

        try:
            languages = self.parse_languages(root)
            if languages:
                mi.languages = languages
        except:
            self.log.exception('Error parsing languages for url: %r'%self.url)

        try:
            self.cover_url = self.parse_cover(root)
        except:
            self.log.exception('Error parsing cover for url: %r'%self.url)
        mi.has_cover = bool(self.cover_url)

        #relevance = calc_source_relevance(mi)
        #mi.source_relevance = relevance

        #myrelevance = 0
        #self.log.info('BECKY INFO: -mrb----------------------------------------------')
        #self.log.info('BECKY INFO:    title: %s'%title)
        #self.log.info('BECKY INFO: mi.title: %s'%mi.title)
        #if mi.title == "Test ":
        #    myrelevance += 100
        #self.log.info('BECKY INFO: -mra----------------------------------------------')
        #mi.source_relevance = myrelevance
        #mi.relevance_in_source = myrelevance
        mi.source_relevance = self.relevance
        #self.log.info('BECKY INFO: -rb----------------------------------------------')
        #self.log.info('BECKY INFO: Relevance: %s'%self.relevance)
        #self.log.info('BECKY INFO: Relevance: %s'%myrelevance)
        #self.log.info('BECKY INFO: -ra----------------------------------------------')

        if self.lubimy_czytac_id:
            if self.isbn:
                self.plugin.cache_isbn_to_identifier(self.isbn, self.lubimy_czytac_id)
            if self.cover_url:
                self.plugin.cache_identifier_to_cover_url(self.lubimy_czytac_id, self.cover_url)

        self.clean_downloaded_metadata(mi)
        #self.log.info('BECKY INFO: -rqb----------------------------------------------')
        self.result_queue.put(mi)
        #self.log.info('BECKY INFO: mi: %s'%mi)
        #self.log.info('BECKY INFO: -rqa----------------------------------------------')

    def parse_lubimy_czytac_id(self, url):
        # return re.search('lubimyczytac.pl/ksiazka/(\\d+)/.*', url).groups(0)[0]
        # Audiobooki od wersji 2.3.3
        return re.search('lubimyczytac.pl/(?:ksiazka|audiobook)/(\\d+)/.*', url).groups(1)[0]


    def parse_title(self, root):

        title_text = ''

        # Tytuł z meta og:title
        try:
            title_text = title_text + root.xpath('//meta[@property="og:title"]/@content')[0]
            title_text = title_text.split("|")[0]
        except IndexError as e:
            self.log.error('BECKY ERROR: Tymczasowy problem ze strona tej ksiazki w serwisie LC. Sprobuj ponownie za jakis czas.')

        #ampersand
        if "&amp;" in title_text:
            title_text = title_text.replace("&amp;", "&")

        # Liczymy cudzysłowy
        liczba_cudzyslowow = title_text.count("&quot;")
        #self.log.info('BECKY INFO: Found title before: %s'%strip_accents(title_text))
        #self.log.info('BECKY INFO: Liczba cudzyslowow: %s'%liczba_cudzyslowow)
        if liczba_cudzyslowow == 2:
            title_text = title_text.replace("&quot;", "„",1)
            title_text = title_text.replace("&quot;", "”",1)
        else:
            title_text = title_text.replace("&quot;", "\"")

        self.log.info('BECKY INFO: Found title: %s'%strip_accents(title_text))
        if not title_text:
            return None

        if title_text is not None:
            return title_text

        # if title_node[0].text is not None:
        #    title_text = title_node[0].text.strip()
        #    self.log.info('BECKY INFO: Found title: %s'%strip_accents(unicode(str(title_text))))
        #    return title_text

    def parse_authors(self, root):

        # self.log.info('Start autorow')
        author_nodes = root.xpath('//section[@class="container book"]//span[contains(@class,"author")]/a[contains(@class,"link-name")]')

        if not author_nodes:
            self.log.info('Problem z pobraniem autora. Powiadom Becky o tym.')
            return None

        if author_nodes:
            authors = []
            for author_node in author_nodes:
                # self.log.info('BECKY INFO: Autor: %s'%strip_accents(author_node.text.strip()))
                if author_node.text is not None:
                    # win32api.MessageBox(0, author_node.text, 'Ojej!')
                    author = author_node.text.strip()
                    self.log.info('BECKY INFO: Found author: %s'%strip_accents(unicode(str(author))))
                    authors.append(author)

            # Tutaj dodajemy dodatkowych autorów
            author_nodes2 = root.xpath('//section[@class="container book"]//div[@id="authors-list"]/a[@class="link-name"]')

            if not author_nodes2:
                return authors

            if author_nodes2:
                for author_node in author_nodes2:
                    # self.log.info('BECKY INFO: Autor dodatkowy: %s'%strip_accents(author_node.text.strip()))
                    if author_node.text is not None:
                        author = author_node.text.strip()
                        self.log.info('BECKY INFO: Found author: %s'%strip_accents(unicode(str(author))))
                        authors.append(author)

            return authors

    # Tłumacze
    def parse_translators(self, root):
        # self.log.info('Start tlumaczy')
        translator_nodes = root.xpath('//section[@class="container book"]//dt[contains(text(),"Tłumacz:")]/following-sibling::dd/a[contains(@href,"/tlumacz/")]')

        if translator_nodes:
            translators = []
            for translator_node in translator_nodes:
                if translator_node.text is not None and translator_node.text not in translators:
                    translator = translator_node.text.strip()
                    self.log.info('BECKY INFO: Found translator: %s'%strip_accents(unicode(str(translator))))
                    translators.append(translator)
            return unique(translators)

    # Seria wydawnicza
    def parse_serwyd(self, root):
        try:
            # self.log.info('try parse serwyd')
            serwyd_node = root.xpath('//section[@class="container book"]//dt[contains(text(),"Seria:")]/following-sibling::dd/a[contains(@href,"/seria/")]/text()')
            # self.log.info('try parse serwyd serwyd_node wersja 1: %s'%serwyd_node)
            if serwyd_node:
                serwyd_lst = root.xpath('//section[@class="container book"]//a[contains(@href,"/seria/")]/text()')
                # self.log.info('try parse serwyd serwyd_lst wersja 1: %s'%serwyd_lst)
                if serwyd_lst:
                    serwyd_txt = serwyd_lst[0]
                    # self.log.info('Jest dobrze (serwyd_txt): %s'%serwyd_txt)
                else:
                    serwyd_txt = None
                # self.log.info('try parse serwyd 1 serwyd_txt: %s'%serwyd_txt)
            else:
                self.log.info('BECKY INFO: Not found book series (Nie znaleziono serii wydawniczej)')
                return None
            serwyd = serwyd_txt.strip()
            self.log.info('BECKY INFO: Found book series (Znaleziono serie wydawnicza): %s'%strip_accents(unicode(serwyd)))
            # self.log.info('BECKY INFO: Found book series (Znaleziono serie wydawnicza): %s'%serwyd)
            return serwyd
        except:
            return None

    # Cykl
    def parse_series(self, root):
        try:

            # self.log.info('try parse series')
            series_node = root.xpath('//section[@class="container book"]//span/a[contains(@href,"/cykl/")]')
            # self.log.info('try parse series series_node wersja 1: %s'%series_node)
            if series_node:
                series_lst = root.xpath('//section[@class="container book"]//span/a[contains(@href,"/cykl/")]/text()')
                # self.log.info('try parse series series_lst wersja 1: %s'%series_lst)
                if series_lst:
                    series_txt = series_lst
                    # self.log.info('BECKY INFO: Jest dobrze (series_txt): %s'%series_txt[0].replace('\n','').strip())
                else:
                    series_txt = None
                # self.log.info('try parse series 1 series_txt: %s'%series_txt)
            else:
                # self.log.info('BECKY INFO: Not found series (Nie znaleziono cyklu)')
                return (None, None)

            if series_txt:
                # ser_string = root.xpath('//section[@class="container book"]//span[@class="cycle"]/a[contains(@href,"/cykl/")]/text()')
                # self.log.info('try parse series ser_string wersja 1: %s'%ser_string)
                ser_string = [series_txt[0].replace('\n','').strip()]
                # self.log.info('ser_string: %s'%ser_string)
                ser_nazwa = ser_string
                for ser in ser_string:
                    # self.log.info('ser 1: %s'%ser)
                    if 'tom ' in ser:
                        # self.log.info('ser 2: %s'%ser)
                        # self.log.info('try parse series ser: %s'%ser)
                        ser_info = ser.split(' (tom ', 1)
                        # self.log.info('ser_info: %s'%ser_info)
                        ser_nazwa = ser.split(' (tom ')[0]
                        # self.log.info('ser_nazwa: %s'%ser_nazwa)
                        # found = 1
                        break
            # self.log.info('try parse series ser_info: %s'%ser_info)
            # self.log.info('try parse series ser_info[1]: %s'%ser_info[1])
            if ser_info:
                series_index_unicode = ser_info[1]
                # self.log.info('series_index_unicode: %s'%series_index_unicode)
                series_index_string = str(series_index_unicode.replace(" ", "").replace(")", ""))
                # self.log.info('series_index_string: %s'%series_index_string)
                # Sprawdzamy, czy cykl nie jest kompletem/pakietem tomów, np. 1-3
                if "-" in series_index_string:
                    series_index_string_temp = series_index_string.split('-', 1)
                    series_index_string = series_index_string_temp[0]
                if series_index_string.replace(".", "").isdigit() is True:
                    series_index = get_int_or_float(series_index_string)
                else:
                    series_index = 0
                # self.log.info('series_index (liczba): %s'%series_index)

                # self.log.info('try parse series series_index, value and type: %s %s' %(series_index,type(series_index)))
            else:
                series_index = 0
            series = ser_nazwa
            # roman_dict = {'X':10, 'IX':9, 'VIII':8, 'VII':7, 'VI':6, 'V':5, 'IV':4, 'III':3, 'II':2, 'I':1 }
            # series_index = roman_dict.get( series_index,  series_index)
            # self.log.info('BECKY INFO: Found series (znaleziono cykl): (%s, %s)' % (series, series_index))
            self.log.info('BECKY INFO: Found series (znaleziono cykl): (%s, %s)' % (strip_accents(unicode(series)), series_index))
            return (series, series_index)
        except:
            return (None, None)

    # STARS
    def parse_rating(self, root):
        rating_node = root.xpath('//meta[@property="books:rating:value"]/@content')
        if rating_node:
            self.log.info('BECKY INFO: Ocena w LC: %s'%rating_node[0])
            rating_value = round(float((rating_node[0]).replace(',','.'))/2)
            self.log.info('BECKY INFO: Found rating: %s'%rating_value)
            return rating_value
        return None

    # VALUE
    def parse_rating_lc(self, root):
        rating_node = root.xpath('//meta[@property="books:rating:value"]/@content')
        if rating_node:
            # self.log.info('BECKY INFO: Ocena w LC again: %s'%rating_node)
            rating_LC = rating_node[0]
            rating_LC = rating_LC.replace(',','.')
            return rating_LC
        return None

    def parse_rating_lc_count(self,root):
        rating_count = None
        data = json.loads(root.xpath('//script[@type="application/ld+json"]//text()')[1])
        try:
            if data["aggregateRating"]["ratingCount"]:
                rating_count = str(data["aggregateRating"]["ratingCount"])
                self.log.info('BECKY INFO: Liczba ocen: %s'%rating_count)
        except KeyError:
            rating_count = None
        return rating_count

    def parse_isbn(self, root):
        # try <meta>
        isbn_text = None
        try:
            isbn_node = root.xpath('//meta[@property="books:isbn"]/@content')[0]
        except IndexError:
            isbn_node = None

        if isbn_node:
            self.log.info('BECKY INFO: ISBN from meta: %s'%isbn_node)
            #isbn_text = isbn_node[0]
            isbn_text = isbn_node
        else:
            self.log.info('BECKY INFO: Problem z numerem ISBN pobieranym z meta, ale probuje dalej...')

        if isbn_text is None:
            isbn_details = None
            data = self.parse_first(root,'//div[@id="book-details"]//dt[contains(text(),"ISBN:")]/following-sibling::dd[1]/text()','isbn')
            isbn_details = data.strip()

            if isbn_details is not None:
                self.log.info('BECKY INFO: ISBN from book-details: %s'%isbn_details)
                isbn_text = isbn_details
            else:
                self.log.info('BECKY INFO: Problem z numerem ISBN w book-details')
                isbn_text = None
        return isbn_text

    def parse_publisher(self, root):
        #publisher = root.xpath('//section[@class="container book"]//dt[contains(text(),"Wydawnictwo:")]/following-sibling::dd/a/text()')
        publisher = root.xpath('//span[contains(@class,"book__txt") and contains(normalize-space(.),"Wydawnictwo:")]//a/text()')
        # self.log.info(strip_accents(unicode(publisher)))
        if publisher:
            return publisher[0]
        else:
            return None

    def parse_published_date(self, root):
        pub_date_text = None
        data = json.loads(root.xpath('//script[@type="application/ld+json"]//text()')[1])
        # self.log.info(data)
        # pub_date_text = self.parse_first(root,'//dd[@itemprop="datePublished"]/text()','datePublished')
        # pub_date_text = data['itemListElement'][0]['datePublished']
        try:
            if data['datePublished']:
                pub_date_text = data['datePublished']
        except KeyError:
            pub_date_text = None
        # self.log.info(pub_date_text)

        if pub_date_text is None:
            pub_date_details = None
            data = self.parse_first(root,'//div[@id="book-details"]//dt[contains(text(),"Data wydania:")]/following-sibling::dd[1]/text()','data_wydania')
            pub_date_details = data.strip()
            pub_date_text = pub_date_details

        if pub_date_text is not None:
            pub_date = self._convert_date_text(pub_date_text.strip())
            self.log.info('BECKY INFO: Found publication date: %s'%pub_date)
        else:
            self.log.info('BECKY INFO: Not found publication date')
            pub_date = None
        return pub_date

    def parse_first_published_date(self, root):
        first_pub_date_text = None
        data = self.parse_first(root,'//div[@id="book-details"]//dt[contains(@title,"Data pierwszego wydania oryginalnego")]/following-sibling::dd[1]/text()','data_first')
        first_pub_date_text = data

        if first_pub_date_text is not None:
            first_pub_date = self._convert_date_text_exact(first_pub_date_text.strip())
            self.log.info('BECKY INFO: Found first publication date: %s'%first_pub_date)
        else:
            self.log.info('BECKY INFO: Not found first publication date')
            first_pub_date = None
        return first_pub_date

    def parse_first_published_date_pl(self, root):
        first_pub_date_pl_text = None
        data = self.parse_first(root,'//div[@id="book-details"]//dt[contains(@title,"Data pierwszego wydania polskiego")]/following-sibling::dd[1]/text()','data_first_pl')
        first_pub_date_pl_text = data

        if first_pub_date_pl_text is not None:
            first_pub_date_pl = self._convert_date_text_exact(first_pub_date_pl_text.strip())
            self.log.info('BECKY INFO: Found first publication date in Poland: %s'%first_pub_date_pl)
        else:
            self.log.info('BECKY INFO: Not found first publication date in Poland')
            first_pub_date_pl = None
        return first_pub_date_pl

    def _convert_date_text(self, date_text):
        year = int(datetime.datetime.strptime(date_text, '%Y-%m-%d').strftime('%Y'))
        month = int(datetime.datetime.strptime(date_text, '%Y-%m-%d').strftime('%m'))
        day = int(datetime.datetime.strptime(date_text, '%Y-%m-%d').strftime('%d'))
        if day == 1 and month == 1:
            month = 7
        return datetime.datetime(year, month, day, 14, 0, 0)

    def _convert_date_text_exact(self, date_text):
        year = int(datetime.datetime.strptime(date_text, '%Y-%m-%d').strftime('%Y'))
        month = int(datetime.datetime.strptime(date_text, '%Y-%m-%d').strftime('%m'))
        day = int(datetime.datetime.strptime(date_text, '%Y-%m-%d').strftime('%d'))
        return datetime.datetime(year, month, day, 14, 0, 0)

    def parse_pages(self, root):
        data = json.loads(root.xpath('//script[@type="application/ld+json"]//text()')[1])
        try:
            if data['numberOfPages']:
                pages_text = data['numberOfPages']
        except KeyError:
            pages_text = None
        if pages_text is not None:
            pages = pages_text.strip()
            self.log.info('BECKY INFO: Found number of pages: %s'%pages)
        else:
            self.log.info('BECKY INFO: Not found number of pages')
            pages = None
        return pages

    def parse_tytul_oryginalu(self, root):
        tytul_oryginalu_node = self.parse_first(root,'//section[@class="container book"]//dt[contains(text(),"Tytuł oryginału")]/following-sibling::dd/text()','tytul_oryginalu')
        # self.log.info('BECKY INFO: Tytul oryginalu: %s'%tytul_oryginalu_node)
        if tytul_oryginalu_node:
            tytul_oryginalu = tytul_oryginalu_node
        else:
            tytul_oryginalu = None
        return tytul_oryginalu

    def parse_inne_wydania(self, root):
        inne_wydania_istnieja = root.xpath('//button[@id="buttonShowOtherEditions"]/text()')
        # self.log.info('BECKY INFO: inne_wydania_istnieja: %s'%inne_wydania_istnieja)
        if inne_wydania_istnieja:
            self.log.info('BECKY INFO: Istnieja inne wydania tej ksiazki')
        else:
            return None
        return inne_wydania_istnieja

    def parse_inne_wydania_all(self, root):
        # inne_wydania_tytuly = root.xpath('//div[@id="other-editions"][contains(@class,"book__other-editions--desktop")]//section//a/@href')
        inne_wydania_tytuly = root.xpath('//div[@id="other-editions"]/div/div//a/img/@alt')
        inne_wydania_wszystkie = None
        if inne_wydania_tytuly:
            # self.log.info(inne_wydania_tytuly)
            self.log.info('BECKY INFO: Tytuly innych wydan (wszystkie):')
            inne_wydania_tytuly = [strip_accents(unicode(str(w.rpartition('/')[2]))) for w in inne_wydania_tytuly]
            inne_wydania_tytuly_unique = unique(inne_wydania_tytuly)
            inne_wydania_tytuly_temp = [strip_accents(unicode(str(w))) for w in inne_wydania_tytuly_unique]
            self.log.info(inne_wydania_tytuly_temp)
            # inne_wydania_tytuly_bez_powtorzen = list(set((inne_wydania_tytuly)))
            # self.log.info ('Bez powtorzen: ',inne_wydania_tytuly_bez_powtorzen)
            #inne_wydania_linki = root.xpath('//div[@id="other-editions"][contains(@class,"book__other-editions--desktop")]//section//a/@href')
            inne_wydania_linki = root.xpath('//div[@id="other-editions"]/div/div//a/@href')
            if inne_wydania_linki:
                self.log.info('BECKY INFO: Linki do innych wydan:')
                # self.log.info(inne_wydania_linki)
                inne_wydania_linki2 = ['https://lubimyczytac.pl' + x for x in inne_wydania_linki]
                # self.log.info(inne_wydania_linki2)
                inne_wydania_dane = '<br>'.join('<a href="%s">%s</a>' % t for t in zip(inne_wydania_linki2, inne_wydania_tytuly))
                inne_wydania_wszystkie = '<p id="lista-innych-wydan">Lista innych wydań:<br>' + inne_wydania_dane + '</p>'
                # inne_wydania_dane[inwyd] =  + ' ' + inne_wydania_linki[inwyd]
                # TypeError: list indices must be integers, not lxml.etree._ElementUnicodeResult
                # self.log.info ('BECKY INFO: Kumulacja innych wydan:')
                # self.log.info (inne_wydania_wszystkie)
            else:
                self.log.info('BECKY INFO: Brak linkow do innych wydan. Przypadek? Nie sadze.')

        if inne_wydania_wszystkie:
            lista_innych_wydan = inne_wydania_wszystkie
        else:
            lista_innych_wydan = None
        return lista_innych_wydan

    def parse_inne_wydania_short(self, root):
        #inne_wydania_tytuly = root.xpath('//div[@id="other-editions"]//section//a/@href')
        inne_wydania_tytuly = root.xpath('//div[@id="other-editions"]/div/div//a/@href')
        inne_wydania_wszystkie_short = None
        lista_innych_wydan_short = None
        if inne_wydania_tytuly:
            # To rozwiazanie nie jest idealne, wiec zrobilam lepsze
            # inne_wydania_tytuly_bez_powtorzen = sorted(list(set((i.lower() for i in inne_wydania_tytuly))))
            inne_wydania_tytuly = [strip_accents(unicode(str(w.rpartition('/')[2]))) for w in inne_wydania_tytuly]
            inne_wydania_tytuly_bez_powtorzen = []
            for chwiloword in inne_wydania_tytuly:
                for i in range(len(inne_wydania_tytuly_bez_powtorzen)):
                    if inne_wydania_tytuly_bez_powtorzen[i].lower() == chwiloword.lower():
                        inne_wydania_tytuly_bez_powtorzen[i] = min(chwiloword, inne_wydania_tytuly_bez_powtorzen[i])
                        break
                else:
                    inne_wydania_tytuly_bez_powtorzen.append(chwiloword)
            # inne_wydania_tytuly_bez_powtorzen_temp = [strip_accents(unicode(str(w))) for w in inne_wydania_tytuly_bez_powtorzen]
            # self.log.info('BECKY INFO: Tytuly innych wydan (bez powtorzen): ',inne_wydania_tytuly_bez_powtorzen_temp)
            inne_wydania_dane_short = ' • '.join('%s' % t for t in inne_wydania_tytuly_bez_powtorzen)
            if len(inne_wydania_tytuly_bez_powtorzen) == 1:

                if len(inne_wydania_tytuly) == 1:
                    inne_wydania_wszystkie_short = '<p id="lista-innych-wydan-short">Tytuł innego wydania: ' + inne_wydania_dane_short + '</p>'
                else:
                    inne_wydania_wszystkie_short = '<p id="lista-innych-wydan-short">Wszystkie inne wydania mają ten sam tytuł: ' + inne_wydania_dane_short + '</p>'
            else:
                inne_wydania_wszystkie_short = '<p id="lista-innych-wydan-short">Tytuły innych wydań: ' + inne_wydania_dane_short + '</p>'
            # inne_wydania_dane[inwyd] =  + ' ' + inne_wydania_linki[inwyd]
            # TypeError: list indices must be integers, not lxml.etree._ElementUnicodeResult
            # self.log.info ('BECKY INFO: Kumulacja innych wydan:')
            # self.log.info (inne_wydania_wszystkie)
        if inne_wydania_wszystkie_short:
            lista_innych_wydan_short = inne_wydania_wszystkie_short
        else:
            lista_innych_wydan_short = None
        return lista_innych_wydan_short

    def parse_comments(self, root):
        comments = ''

        description_node = root.xpath('//section[@class="container book"]//div[@class="collapse-content"]')
        if description_node:
            for zrodla in root.xpath('//p[@class="source"]'):
                zrodla.getparent().remove(zrodla)
            comments = tostring(description_node[0], method='html')
            comments = sanitize_comments_html(comments)

        else:
            # try <meta>
            description_node = root.xpath('//meta[@property="og:description"]/@content')
            if description_node:
                comments = description_node[0]
                comments = sanitize_comments_html(comments)
        return comments

    def parse_tags(self, root):
        tags = None
        try:
            # tags_from_genre = root.xpath('//section[@class="container book"]//dt[contains(text(),"Kategoria:")]/following-sibling::dd/text()')[1]
            # tags_from_genre = root.xpath('//section[@class="container book"]//a[@class="book__category"]')
            tags_from_genre = root.xpath('//nav[@aria-label="breadcrumbs"]//a[contains(@href,"/ksiazki/k/")]/span[@itemprop="name"]/text()')
            if tags_from_genre:
                # tags_from_genre_temp = strip_accents(unicode(tags_from_genre.strip()))
                tags_from_genre_temp = strip_accents(unicode(''.join(tags_from_genre).strip()))
                self.log.info('BECKY INFO: Tags (genre): %s'%tags_from_genre_temp)
                tags = tags_from_genre
                tags = [w.replace(', itd.',  ' itd.') for w in tags]
                return tags
            else:
                self.log.info('BECKY INFO: No tags (genre)')
                return None
        except:
            return tags

    def parse_tags_keywords(self, root):
        tags_keywords = list()
        try:
            tags_from_keywords = root.xpath('//section[@class="container book"]//dt[contains(text(),"Tagi:")]/following-sibling::dd/a/text()')
            if tags_from_keywords:
                # tags_from_keywords_temp = [strip_accents(unicode(w.strip())) for w in tags_from_keywords]
                # self.log.info('BECKY INFO: Tags (keywords): %s'%tags_from_keywords_temp)
                for tag in tags_from_keywords:
                    if tag not in tags_keywords:
                        tags_keywords.append(tag.strip())
            if tags_keywords:
                self.log.info('BECKY INFO: Unikatowe tags (keywords): %s'%unique(tags_keywords))
                return unique(tags_keywords)
            else:
                self.log.info('BECKY INFO: No tags (keywords)')
                return None
        except:
            return list(tags_keywords)

    def parse_languages(self, root):
        lang = self.parse_first(root,'//section[@class="container book"]//dt[contains(text(),"Język:")]/following-sibling::dd/text()','inLanguage')
        # self.log.info('Lang: %s'%lang)
        languages = list()
        if lang:
            if "polski" in lang:
                languages.append("pol")
            if "angielski" in lang:
                languages.append("eng")
        return languages

    def parse_cover(self, root):
        # Try <meta/>
        imgcol_node = root.xpath('//meta[@property="og:image"]/@content')

        if imgcol_node:
            img_url = imgcol_node[0]
            try:
                info = self.browser.open_novisit(img_url, timeout=self.timeout).info()
            except:
                self.log.warning('BECKY HINT: Ten problem moze byc zwiazany z antywirusem lub firewallem')
                info = None
            if info is not None:
                if info.get('Content-Length') is not None:
                    if int(info.get('Content-Length')) > 1000:
                        return img_url
                    else:
                        self.log.warning('Broken image for url: %s'%img_url)
                else:
                    return img_url

    def clean_downloaded_metadata(self, mi):
        replaceISBN = cfg.plugin_prefs[cfg.STORE_NAME].get(cfg.REPLACE_ISBN, cfg.DEFAULT_STORE_VALUES[cfg.REPLACE_ISBN])
        swapnames = cfg.plugin_prefs[cfg.STORE_NAME].get(cfg.SWAP_NAMES, cfg.DEFAULT_STORE_VALUES[cfg.SWAP_NAMES])


        # asTags = cfg.plugin_prefs[cfg.STORE_NAME].get(cfg.TAGS_AS, cfg.DEFAULT_STORE_VALUES[cfg.TAGS_AS])
        '''
        Custom implementation - without title capitalizing
        '''
        # self.log.warning('BECKY INFO: mi-authors przed: %s'%mi.authors)
        mi.authors = fixauthors(mi.authors)
        # self.log.warning('BECKY INFO: mi-authors po: %s'%mi.authors)


        if swapnames:
            # old_authors = [a.strip().replace('|', ',') for a in mi.authors.split(',')]
            old_authors = mi.authors
            new_authors = [swap_names(a) for a in old_authors]
            mi.authors = new_authors
        # Pierwszą literę każdego elementu dajemy od wielkiej litery
        mi.tags = [re.sub('[a-ząćęłńóśźżA-ZĄĆĘŁŃÓŚŹŻ]', lambda x: x.group().upper(), y, 1) for y in mi.tags]
        mi.tags = list(mi.tags)
        if replaceISBN == 1:
            mi.isbn = check_isbn(mi.isbn)

def get_int_or_float(v):
    number_as_float = float(v)
    number_as_int = int(number_as_float)
    return number_as_int if number_as_float == number_as_int else number_as_float

def odmiana(x, y):
    x = int(x)
    if x == 0:
        return " "
    else:
        inumber = x % 10
        iinumber = x % 100
        if x == 1:
            y = y[0]
        elif iinumber > 4 and iinumber < 22:
            y = y[2]
        elif inumber>1 and inumber<=4:
            y = y[1]
        else:
            y = y[2]
        return " " + y

def strip_accents(s):
    if s is None:  # or s is not str:
        return s
    else:
        symbols = ("öÖüÜóÓőŐúÚéÉáÁűŰíÍąĄćĆęĘłŁńŃóÓśŚźŹżŻ",
                   "oOuUoOoOuUeEaAuUiIaAcCeElLnNoOsSzZzZ")
        tr = dict([(ord(a), ord(b)) for (a, b) in zip(*symbols)])
        return s.translate(tr)  # .lower()

# function to get unique values
def unique(list1):

    # intilize a null list
    unique_list = []

    # traverse for all elements
    for x in list1:
        # check if exists in unique_list or not
        if x not in unique_list:
            unique_list.append(x)
    # print list
    return unique_list


def swap_names(a):
    if ',' in a:
        parts = a.split(',')
        if len(parts) <= 1:
            return a
        surname = parts[0]
        return '%s %s' % (' '.join(parts[1:]), surname)
    else:
        parts = a.split(None)
        if len(parts) <= 1:
            return a
        surname = parts[-1]
        return '%s %s' % (surname, ' '.join(parts[:-1]))
