View Single Post
Old 09-24-2010, 09:04 PM   #1
TonytheBookworm
Addict
TonytheBookworm is on a distinguished road
 
TonytheBookworm's Avatar
 
Posts: 264
Karma: 62
Join Date: May 2010
Device: kindle 2, kindle 3, Kindle fire
Help with Recipe inserting tag

Not sure what I'm doing wrong here but what i'm trying to achieve is this
Header
Date
<space>
article

what i'm having trouble with is this portion of my code
Code:
for jumbled in soup.find('div', attrs={'valign':'Top'}):
            parent_tag = jumbled.parent
            new_tag = Tag(soup,'p')
            new_tag.insert(0,jumbled)
            parent_tag.replaceWith(new_tag)
here is the full code:
Spoiler:

Code:
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, re
class AdvancedUserRecipe1282101454(BasicNewsRecipe):
    title = 'Bizzare News'
    language = 'en'
    __author__ = 'TonytheBookworm'
    description = 'Crazy things that happen in the news'
    publisher = 'bizzarenews'
    category = 'news,humor'
    oldest_article = 365
    max_articles_per_feed = 100
    linearize_tables = True
    no_stylesheets = True
    remove_javascript   = True
    #masthead_url = 'http://blog.stackoverflow.com/wp-content/uploads/how-to-geek-logo.png'
    keep_only =[dict(name='td', attrs={'class':['mainbody']})]
    
    
    remove_tags =[dict(name='div', attrs={'id':['horiz-menu','section2']}),
                  dict(name='td', attrs={'class':['left']}),
      #            dict(name='div',   attrs={'class':['feedflare']}),
                  ]
    
    
    extra_css = '''
                    .contentheading{font-family:Arial,Helvetica,sans-serif;color:orange; font-weight:bold;font-size:large;}
                    .createdate{font-family:Arial,Helvetica,sans-serif; color:blue; font-weight:normal;font-size:small;}
                    
                    div{font-family:Helvetica,Arial,sans-serif;font-size:small;}
                    
                    
                    p{font-family:Helvetica,Arial,sans-serif;font-size:small;}
		        '''
		
                   
    feeds          = [
                      ('Feed', 'http://www.bizarrenews.org/component/option,com_rss/feed,RSS2.0/no_html,1/')
                      
                    ]
    def preprocess_html(self, soup):
        for item in soup.findAll(attrs={'style':True}):
            del item['style']
        return soup
    
    def postprocess_html(self, soup, first):
        for tag in soup.findAll(name=['table', 'tr', 'td']):
            tag.name = 'div'
        for date in soup.find('div', attrs={'class':'createdate'}):
            date.name ='p'
        #I'm wanting to take and insert a <p> between date and this section so it is not bunched together
        #---------------------------------------------------------------------------------------------------
        for jumbled in soup.find('div', attrs={'valign':'Top'}):
            parent_tag = jumbled.parent
            new_tag = Tag(soup,'p')
            new_tag.insert(0,jumbled)
            parent_tag.replaceWith(new_tag)
        #---------------------------------------------------------
        print 'the soup is: ', soup
        return soup
TonytheBookworm is offline   Reply With Quote