from re import match, DOTALL
from sys import argv

from bs4 import BeautifulSoup

chapter_pattern = r".*? - (.*?)( > ).*"


def gather_chapter_notes(html: str):
    soup = BeautifulSoup(html, 'html.parser')
    title_insert = {}
    remove_these = set()
    for note_heading in soup.find_all('div', class_='noteHeading'):
        content = note_heading.contents[-1]
        if matches := match(chapter_pattern, content, flags=DOTALL):
            title, token = matches.groups()
            if title not in title_insert:
                title_insert[title] = note_heading
            remove_these.add(f'{title} &gt; ')

    for title, node in title_insert.items():
        title_section = soup.new_tag('div', attrs=[('class', 'sectionHeading')])
        title_section.string = f'● {title}'
        node.insert_before(title_section)

    html = str(soup)
    for remove_this in remove_these:
        html = html.replace(remove_this, '')
    return html


for arg in argv[1:]:
    with open(arg) as f:
        html_text = f.read()

    new_html = gather_chapter_notes(html=html_text)

    with open(arg.replace('.html', '-new.html'), 'w') as f:
        f.write(new_html)
