def preprocess_html(self, soup): for img in soup.findAll('img'): a = img.findParet('a') if a is not None: img['src'] = a['href'] return soup