|
|
#1 |
|
Junior Member
![]() Posts: 6
Karma: 10
Join Date: Oct 2010
Device: kindle
|
Eurogamer / Digital Foundry Recipe
Hello,
This is my first public recipe attempt, it works for the Digital Foundry (and probably other) feeds at Euro Gamer site. Currently it's the initial state - i.e.: barely working. If you have any comments/suggestions/bugs, please do not hesitate to tell. Code:
class DigitalFoundry(BasicNewsRecipe):
author = 'Sukru'
title = 'Eurogamer'
oldest_article = 10
max_articles_per_feed = 100
no_stylesheets = True
remove_javascript = True
cover_url = 'http://www.eurogamer.net/img/DigitalFoundryPage/logo.gif'
# Add other feeds from http://www.eurogamer.net/rss.php
feeds = [(u'Digital Foundry', u'http://www.eurogamer.net/rss/eurogamer_digitalfoundry_feed.rss')]
INDEX = 'http://www.eurogamer.net/'
def remove_item(self, text, item):
children = text.findAll(id=item)
for child in children:
child.extract()
def append_page(self, soup, appendtag, position):
pager = soup.find('li',attrs={'class':'next'})
if pager:
nexturl = pager.a['href']
print "adding page", nexturl
soup2 = self.index_to_soup(self.INDEX + nexturl)
texttag = soup2.find('div', id='blog')
newpos = len(texttag.contents)
self.append_page(soup2,texttag,newpos)
texttag.extract()
appendtag.insert(position,texttag)
def preprocess_html(self, soup):
text = soup.find('div', id='blog')
body = soup.find('div', id='browserMaster')
if text == None:
print "*** ERROR *** - NO TEXT"
if body == None:
print "*** ERROR *** - NO BODY"
text.extract()
body.replaceWith(text)
self.append_page(soup, soup.body, 10)
return soup
def postprocess_html(self, soup, first_fetch):
movies = soup.findAll('div', attrs={'class':'egtv-video centre'})
for movie in movies:
preview = movie.find('img', attrs={'class':'screengrab'})
frame = movie.find(attrs={'class':'frame'})
if preview != None and frame != None:
preview.extract()
frame.replaceWith(preview)
else:
print "Missing parts in movie"
print "frame = ", frame
print "preview = ", preview
print "movie = ", movie
self.remove_item(soup, 'phat-footer')
self.remove_item(soup, 'fb-root')
self.remove_item(soup, 'facebook-like-button')
return soup
|
|
|
|
![]() |
|
Similar Threads
|
||||
| Thread | Thread Starter | Forum | Replies | Last Post |
| Q: Memory Error trying to download EuroGamer RSS Feed | sukru | Recipes | 5 | 10-20-2010 10:30 AM |
| Memory Error trying to download EuroGamer RSS Feed | sukru | Calibre | 3 | 10-15-2010 07:32 AM |
| New verions of Adobe Digital Editions - Digital Editions 1.7.2 | kennyc | ePub | 0 | 02-12-2010 07:21 AM |
| Recipe Help Please | estral | Calibre | 1 | 06-11-2009 03:35 PM |
| Top Publishers and Sony Support New Digital Publishing Experience (Digital Editions) | NatCh | Sony Reader | 50 | 06-22-2007 09:02 PM |