View Single Post
Old 05-27-2013, 11:16 AM   #11
Camper65
Enthusiast
Camper65 began at the beginning.
 
Posts: 32
Karma: 10
Join Date: Apr 2011
Device: Kindle wifi; Dell 2in1
Cool Got it

Got the corrected one again. But give me a week or two to make sure that the changes feedsportal made are permanent and not just a fluke, since I now only download this once a week (they only produce articles five days a week and it's better downloading Saturday or Sunday for it to get that week's articles.

I'll let you know next week hopefully, in between installing components to my new tower I'm building.

Code:
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, BeautifulStoneSoup
from calibre.web.feeds import feed_from_xml, templates, feeds_from_index, Feed
from calibre.ptempfile import PersistentTemporaryFile

class dotnetMagazine (BasicNewsRecipe):
    __author__ = u'Bonni Salles - post in forum if questions for me'
    __version__ = '1.1'
    __license__   = 'GPL v3'
    __copyright__ = u'2013, Bonni Salles'
    title                 = '.net '
    oldest_article        = 7
    no_stylesheets        = True
    encoding              = 'utf8'
    use_embedded_content  = False
    recursion = 1
    articles_are_obfuscated = True
    language              = 'en'
    remove_empty_feeds    = True
    extra_css             = ' body{font-family: Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} '
    cover_url = u'http://media.netmagazine.futurecdn.net/sites/all/themes/netmag/logo.png'

    remove_tags_after = dict(name='footer', id=lambda x:not x)     
    remove_tags_before = dict(name='header', id=lambda x:not x)


    remove_tags = [
         dict(name='div', attrs={'class': 'item-list'}),
         dict(name='h4', attrs={'class': 'std-hdr'}),
         dict(name='div', attrs={'class': 'item-list share-links'}), #removes share links
         dict(name=['script', 'noscript']),
         dict(name='div', attrs={'id': 'comments-form'}), #comment these out if you want the comments to show
         dict(name='div', attrs={'id': re.compile('advertorial_block_($|| )')}),
         dict(name='div', attrs={'id': 'right-col'}),
         dict(name='div', attrs={'id': 'comments'}), #comment these out if you want the comments to show
         dict(name='div', attrs={'class': 'item-list related-content'}),

         ]
         
    feeds = [
               (u'net', u'http://feeds.feedburner.com/net/topstories?format=xml')
            ]

    temp_files = []
    
    def get_obfuscated_article(self, url):
        br = self.get_browser()
        print 'THE CURRENT URL IS: ', url
        br.open(url)
        response = br.open(url)
        html = response.read()
         
        self.temp_files.append(PersistentTemporaryFile('_fa.html'))
        self.temp_files[-1].write(html)
        self.temp_files[-1].close()
        return self.temp_files[-1].name

Camper65
Camper65 is offline   Reply With Quote