from calibre.utils.cleantext import clean_ascii_chars raw = br.open(url).read() etree.fromstring(clean_ascii_chars(raw))