Not sure what I'm doing wrong here but what i'm trying to achieve is this
Header
Date
<space>
article
what i'm having trouble with is this portion of my code
Code:
for jumbled in soup.find('div', attrs={'valign':'Top'}):
parent_tag = jumbled.parent
new_tag = Tag(soup,'p')
new_tag.insert(0,jumbled)
parent_tag.replaceWith(new_tag)
here is the full code:
Spoiler:
Code:
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, re
class AdvancedUserRecipe1282101454(BasicNewsRecipe):
title = 'Bizzare News'
language = 'en'
__author__ = 'TonytheBookworm'
description = 'Crazy things that happen in the news'
publisher = 'bizzarenews'
category = 'news,humor'
oldest_article = 365
max_articles_per_feed = 100
linearize_tables = True
no_stylesheets = True
remove_javascript = True
#masthead_url = 'http://blog.stackoverflow.com/wp-content/uploads/how-to-geek-logo.png'
keep_only =[dict(name='td', attrs={'class':['mainbody']})]
remove_tags =[dict(name='div', attrs={'id':['horiz-menu','section2']}),
dict(name='td', attrs={'class':['left']}),
# dict(name='div', attrs={'class':['feedflare']}),
]
extra_css = '''
.contentheading{font-family:Arial,Helvetica,sans-serif;color:orange; font-weight:bold;font-size:large;}
.createdate{font-family:Arial,Helvetica,sans-serif; color:blue; font-weight:normal;font-size:small;}
div{font-family:Helvetica,Arial,sans-serif;font-size:small;}
p{font-family:Helvetica,Arial,sans-serif;font-size:small;}
'''
feeds = [
('Feed', 'http://www.bizarrenews.org/component/option,com_rss/feed,RSS2.0/no_html,1/')
]
def preprocess_html(self, soup):
for item in soup.findAll(attrs={'style':True}):
del item['style']
return soup
def postprocess_html(self, soup, first):
for tag in soup.findAll(name=['table', 'tr', 'td']):
tag.name = 'div'
for date in soup.find('div', attrs={'class':'createdate'}):
date.name ='p'
#I'm wanting to take and insert a <p> between date and this section so it is not bunched together
#---------------------------------------------------------------------------------------------------
for jumbled in soup.find('div', attrs={'valign':'Top'}):
parent_tag = jumbled.parent
new_tag = Tag(soup,'p')
new_tag.insert(0,jumbled)
parent_tag.replaceWith(new_tag)
#---------------------------------------------------------
print 'the soup is: ', soup
return soup