def preprocess_raw_html(self, raw_html, url): open('/path/to/tempfile.html', 'wb').write(raw_html.encode('utf-8')) return raw_html