MobileRead Forums - View Single Post

capink · 12-09-2023, 05:02 PM

Quote:

Originally Posted by terry.nz

I was about to reply that the TXT files are UTF-8 as I spot checked some using Notepad++ which is generally pretty good identifying the encoding type. Then the last one I spot checked came back as ANSI. So it would seem that somehow there are possibly a few incorrectly encoded TXT files. I've no idea how that may have occured, but with 970 of them to check it's going to be a mission to sort out.

Thanks
Terry

You can run this code below instead, which will automatically try ansi if utf-8 fails:

Code:

import regex

fmt = 'TXT'

encodings = ['utf-8','windows-1252']

def move_note(db, book_id, encoding='utf-8'):
    path_to_book = db.format_abspath(book_id, fmt, index_is_id=True)
    title = db.title(book_id, index_is_id=True)
    series_name = db.new_api.field_for('series', book_id)
    print(f'Book title: {title} | Series: {series_name}')
    if not path_to_book:
        print(f'Book does not have format: {fmt}')
        return
    with open(path_to_book, 'r', encoding=encoding) as f:
        note = regex.sub(r'http[^\s]+', r'', f.read())
        series_id = db.new_api.get_item_id('series', series_name)
        print(f'series_id: {series_id} | note: {note}')
        db.new_api.set_notes_for('series', series_id, note)

def run(gui, settings, chain):
    db = gui.current_db
    book_ids = db.data.search_getting_ids('series_index:=9999999', None)
    failed_ids = []
    for book_id in book_ids:
        for encoding in encodings:
            print(f'call move_notes for {book_id} with encoding {encoding}')
            try:
                move_note(db, book_id, encoding=encoding)
                break
            except:
                print(f'Encoding {encoding} failed for book {book_id}')

Note: You can add other encoding to the list of encodings highlighted in red.