Hello,
This is my first public recipe attempt, it works for the Digital Foundry (and probably other) feeds at Euro Gamer site.
Currently it's the initial state - i.e.: barely working. If you have any comments/suggestions/bugs, please do not hesitate to tell.
Code:
class DigitalFoundry(BasicNewsRecipe):
author = 'Sukru'
title = 'Eurogamer'
oldest_article = 10
max_articles_per_feed = 100
no_stylesheets = True
remove_javascript = True
cover_url = 'http://www.eurogamer.net/img/DigitalFoundryPage/logo.gif'
# Add other feeds from http://www.eurogamer.net/rss.php
feeds = [(u'Digital Foundry', u'http://www.eurogamer.net/rss/eurogamer_digitalfoundry_feed.rss')]
INDEX = 'http://www.eurogamer.net/'
def remove_item(self, text, item):
children = text.findAll(id=item)
for child in children:
child.extract()
def append_page(self, soup, appendtag, position):
pager = soup.find('li',attrs={'class':'next'})
if pager:
nexturl = pager.a['href']
print "adding page", nexturl
soup2 = self.index_to_soup(self.INDEX + nexturl)
texttag = soup2.find('div', id='blog')
newpos = len(texttag.contents)
self.append_page(soup2,texttag,newpos)
texttag.extract()
appendtag.insert(position,texttag)
def preprocess_html(self, soup):
text = soup.find('div', id='blog')
body = soup.find('div', id='browserMaster')
if text == None:
print "*** ERROR *** - NO TEXT"
if body == None:
print "*** ERROR *** - NO BODY"
text.extract()
body.replaceWith(text)
self.append_page(soup, soup.body, 10)
return soup
def postprocess_html(self, soup, first_fetch):
movies = soup.findAll('div', attrs={'class':'egtv-video centre'})
for movie in movies:
preview = movie.find('img', attrs={'class':'screengrab'})
frame = movie.find(attrs={'class':'frame'})
if preview != None and frame != None:
preview.extract()
frame.replaceWith(preview)
else:
print "Missing parts in movie"
print "frame = ", frame
print "preview = ", preview
print "movie = ", movie
self.remove_item(soup, 'phat-footer')
self.remove_item(soup, 'fb-root')
self.remove_item(soup, 'facebook-like-button')
return soup
(I should give credit to the original author of append_page routine, but I don't know who he is).