https://github.com/kovidgoyal/calibr...autilus.recipe
Code:
def preprocess_html(self, soup):
for img in soup.findAll('img', attrs={'data-src': True}):
img['src'] = img['data-src'].split('?')[0]
for figcaption in soup.findAll('figcaption'):
figcaption['id']='fig-c'
for ul in soup.findAll('ul', attrs={'class':
['breadcrumb', 'article-list_item-byline', 'channel-article-author', 'article-author']}):
ul.name = 'span'
for li in ul.findAll('li'):
li.name = 'p'
return soup
Code:
extra_css = '''
.article-list_item-byline{font-size:small;}
blockquote{color:#404040; text-align:center;}
#fig-c{font-size:small;}
em{color:#202020;}
.breadcrumb{color:gray; font-size:small;}
.article-author{font-size:small;}
'''
and add
'article-collection_box' to remove_tags classes