Quote:
Originally Posted by terry.nz
I was about to reply that the TXT files are UTF-8 as I spot checked some using Notepad++ which is generally pretty good identifying the encoding type. Then the last one I spot checked came back as ANSI. So it would seem that somehow there are possibly a few incorrectly encoded TXT files. I've no idea how that may have occured, but with 970 of them to check it's going to be a mission to sort out.
Thanks
Terry
|
You can run this code below instead, which will automatically try ansi if utf-8 fails:
Code:
import regex
fmt = 'TXT'
encodings = ['utf-8','windows-1252']
def move_note(db, book_id, encoding='utf-8'):
path_to_book = db.format_abspath(book_id, fmt, index_is_id=True)
title = db.title(book_id, index_is_id=True)
series_name = db.new_api.field_for('series', book_id)
print(f'Book title: {title} | Series: {series_name}')
if not path_to_book:
print(f'Book does not have format: {fmt}')
return
with open(path_to_book, 'r', encoding=encoding) as f:
note = regex.sub(r'http[^\s]+', r'', f.read())
series_id = db.new_api.get_item_id('series', series_name)
print(f'series_id: {series_id} | note: {note}')
db.new_api.set_notes_for('series', series_id, note)
def run(gui, settings, chain):
db = gui.current_db
book_ids = db.data.search_getting_ids('series_index:=9999999', None)
failed_ids = []
for book_id in book_ids:
for encoding in encodings:
print(f'call move_notes for {book_id} with encoding {encoding}')
try:
move_note(db, book_id, encoding=encoding)
break
except:
print(f'Encoding {encoding} failed for book {book_id}')
Note: You can add other encoding to the list of encodings highlighted in
red.