Code:
from calibre import walk
for path in walk('.'):
if os.path.splitext(path)[1:].lower() in ('html', 'htm'):
with open(path, 'r+b') as f:
raw = f.read()
raw = raw.decode('utf-8')
for pat, func in self.preprocess_regexps:
raw = pat.sub(func, raw)
f.seek(0)
f.truncate()
f.write(raw.encode('utf-8'))
This will need some adjustments, of course.