MobileRead Forums - View Single Post - need help in trying to update .net recipe

Camper65 · 05-27-2013, 11:16 AM

Got the corrected one again. But give me a week or two to make sure that the changes feedsportal made are permanent and not just a fluke, since I now only download this once a week (they only produce articles five days a week and it's better downloading Saturday or Sunday for it to get that week's articles.

I'll let you know next week hopefully, in between installing components to my new tower I'm building.

Code:

from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, BeautifulStoneSoup
from calibre.web.feeds import feed_from_xml, templates, feeds_from_index, Feed
from calibre.ptempfile import PersistentTemporaryFile

class dotnetMagazine (BasicNewsRecipe):
    __author__ = u'Bonni Salles - post in forum if questions for me'
    __version__ = '1.1'
    __license__   = 'GPL v3'
    __copyright__ = u'2013, Bonni Salles'
    title                 = '.net '
    oldest_article        = 7
    no_stylesheets        = True
    encoding              = 'utf8'
    use_embedded_content  = False
    recursion = 1
    articles_are_obfuscated = True
    language              = 'en'
    remove_empty_feeds    = True
    extra_css             = ' body{font-family: Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} '
    cover_url = u'http://media.netmagazine.futurecdn.net/sites/all/themes/netmag/logo.png'

    remove_tags_after = dict(name='footer', id=lambda x:not x)     
    remove_tags_before = dict(name='header', id=lambda x:not x)


    remove_tags = [
         dict(name='div', attrs={'class': 'item-list'}),
         dict(name='h4', attrs={'class': 'std-hdr'}),
         dict(name='div', attrs={'class': 'item-list share-links'}), #removes share links
         dict(name=['script', 'noscript']),
         dict(name='div', attrs={'id': 'comments-form'}), #comment these out if you want the comments to show
         dict(name='div', attrs={'id': re.compile('advertorial_block_($|| )')}),
         dict(name='div', attrs={'id': 'right-col'}),
         dict(name='div', attrs={'id': 'comments'}), #comment these out if you want the comments to show
         dict(name='div', attrs={'class': 'item-list related-content'}),

         ]
         
    feeds = [
               (u'net', u'http://feeds.feedburner.com/net/topstories?format=xml')
            ]

    temp_files = []
    
    def get_obfuscated_article(self, url):
        br = self.get_browser()
        print 'THE CURRENT URL IS: ', url
        br.open(url)
        response = br.open(url)
        html = response.read()
         
        self.temp_files.append(PersistentTemporaryFile('_fa.html'))
        self.temp_files[-1].write(html)
        self.temp_files[-1].close()
        return self.temp_files[-1].name

Camper65