Quote:
Originally Posted by lizzie1170
 I added get_word_count definition but it depends on other definitions. Running the code results in TypeError: TagsFromEpub.run() takes 3 positional arguments but 4 were given.
Code:
from calibre.ebooks.oeb.iterator import EbookIterator
from calibre_plugins.action_chains.actions.base import ChainAction
with open("test_dict.txt", "r") as f:
tags_dict = f.read()
class TagsFromEpub(ChainAction):
name = 'Tags_F_Epub'
support_scopes = True
def get_word_count(iterator, book_path, icu_wordcount):
'''Given an iterator for the epub (if already opened/converted), estimate a word count'''
from calibre.utils.localization import get_lang
if iterator is None:
iterator = _open_epub_file(book_path)
lang = iterator.opf.language
lang = get_lang() if not lang else lang
DEFAULT_STORE_VALUES = {}
KEY_USE_ICU_WORDCOUNT = 'useIcuWordcount'
icu_wordcount = c.get(cfg.KEY_USE_ICU_WORDCOUNT, cfg.DEFAULT_STORE_VALUES[cfg.KEY_USE_ICU_WORDCOUNT])
count = _get_epub_standard_word_count(iterator, lang, icu_wordcount)
print('\tWord count:', count)
return iterator, count
def _open_epub_file(book_path, strip_html=False):
'''Given a path to an EPUB file, read the contents into a giant block of text'''
iterator = EbookIterator(book_path)
iterator.__enter__(only_input_plugin=True, run_char_count=True, read_anchor_map=False)
return iterator
def _get_epub_standard_word_count(iterator, lang='en', icu_wordcount=False):
'''This algorithm counts individual words instead of pages'''
book_text = _read_epub_contents(iterator, strip_html=True)
wordcount = None
if icu_wordcount:
try:
from calibre.spell.break_iterator import count_words
print('\tWord count using icu_wordcount - trying to count_words')
wordcount = count_words(book_text, lang)
print('\tWord count - used count_words:', wordcount)
except:
try: # The above method is new and no-one will have it as of 08/01/2016.
print('\tWord count using icu_wordcount - trying to import split_into_words_and_positions')
from calibre.spell.break_iterator import split_into_words_and_positions
print('\tWord count - trying split_into_words_and_positions:')
wordcount = len(split_into_words_and_positions(book_text, lang))
print('\tWord count - used split_into_words_and_positions:', wordcount)
except:
pass
if not wordcount: # If not using icu wordcount, or it failed, use the old method.
from calibre.utils.wordcount import get_wordcount_obj
print('\tWord count using older method - trying get_wordcount_obj')
wordcount = get_wordcount_obj(book_text)
wordcount = wordcount.words
return wordcount
def tags_from_epub(path_to_epub):
temp = []
res = dict()
for line in wordcount:
for key,value in tags_dict.items():
if re.search(rf'{value}', line):
if value not in temp:
temp.append(value)
res[key] = value
regex = re.compile(value)
match_array = regex.finditer(line)
match_list = list(match_array)
for m in match_list:
print(key, ":",m.group())
def run(gui, settings, chain):
db = gui.current_db
for book_id in chain.scope().get_book_ids():
fmts = [ fmt.strip() for fmt in db.formats(book_id, index_is_id=True).split(',') ]
if 'EPUB' in fmts:
path_to_epub = db.format_abspath(book_id, 'EPUB', index_is_id=True)
tags_from_epub(path_to_epub)
|
Why are subclassing ChainAction?! This is intended for completely different thing. It is used for creating custom actions in the module manager, not for the "Run Python Code".
For the "Run Python Code" you should use the run() as separate function, not a method for any class, as I previously told you to do in
this post (note that there is NO mention of subclassing ChainAction). The other methods should be separate functions as well.
I do not understand what you are trying to do with your code, and I do not have the time to debug it. If you can get a working function that returns whatever tags you want, I can help from there. However, here is a couple of points regarding your code:
- get_word_count() is defined but not called anywhere in the code.
- in tags_from_epub() you reference a variable called wordcount which is not assigned before in any part of the code.
P.S. If your main problem is converting the epub to text, the easiest way is using calibre's conversion as follows:
Code:
def convert_to_text(path_to_epub):
import os, subprocess
from calibre.ptempfile import PersistentTemporaryDirectory
tdir = PersistentTemporaryDirectory('_temp_convert')
output_file = os.path.join(tdir, 'temp.txt')
cmd = 'ebook-convert "{}" "{}"'.format(path_to_epub, output_file)
subprocess.call(cmd, shell='true')
return output_file
path_to_txt = convert_to_text(path_to_epub)