Got the corrected one again. But give me a week or two to make sure that the changes feedsportal made are permanent and not just a fluke, since I now only download this once a week (they only produce articles five days a week and it's better downloading Saturday or Sunday for it to get that week's articles.
I'll let you know next week hopefully, in between installing components to my new tower I'm building.
Code:
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, BeautifulStoneSoup
from calibre.web.feeds import feed_from_xml, templates, feeds_from_index, Feed
from calibre.ptempfile import PersistentTemporaryFile
class dotnetMagazine (BasicNewsRecipe):
__author__ = u'Bonni Salles - post in forum if questions for me'
__version__ = '1.1'
__license__ = 'GPL v3'
__copyright__ = u'2013, Bonni Salles'
title = '.net '
oldest_article = 7
no_stylesheets = True
encoding = 'utf8'
use_embedded_content = False
recursion = 1
articles_are_obfuscated = True
language = 'en'
remove_empty_feeds = True
extra_css = ' body{font-family: Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} '
cover_url = u'http://media.netmagazine.futurecdn.net/sites/all/themes/netmag/logo.png'
remove_tags_after = dict(name='footer', id=lambda x:not x)
remove_tags_before = dict(name='header', id=lambda x:not x)
remove_tags = [
dict(name='div', attrs={'class': 'item-list'}),
dict(name='h4', attrs={'class': 'std-hdr'}),
dict(name='div', attrs={'class': 'item-list share-links'}), #removes share links
dict(name=['script', 'noscript']),
dict(name='div', attrs={'id': 'comments-form'}), #comment these out if you want the comments to show
dict(name='div', attrs={'id': re.compile('advertorial_block_($|| )')}),
dict(name='div', attrs={'id': 'right-col'}),
dict(name='div', attrs={'id': 'comments'}), #comment these out if you want the comments to show
dict(name='div', attrs={'class': 'item-list related-content'}),
]
feeds = [
(u'net', u'http://feeds.feedburner.com/net/topstories?format=xml')
]
temp_files = []
def get_obfuscated_article(self, url):
br = self.get_browser()
print 'THE CURRENT URL IS: ', url
br.open(url)
response = br.open(url)
html = response.read()
self.temp_files.append(PersistentTemporaryFile('_fa.html'))
self.temp_files[-1].write(html)
self.temp_files[-1].close()
return self.temp_files[-1].name
Camper65