![]() |
#1 |
Junior Member
![]() Posts: 6
Karma: 10
Join Date: Oct 2010
Device: kindle
|
Eurogamer / Digital Foundry Recipe
Hello,
This is my first public recipe attempt, it works for the Digital Foundry (and probably other) feeds at Euro Gamer site. Currently it's the initial state - i.e.: barely working. If you have any comments/suggestions/bugs, please do not hesitate to tell. Code:
class DigitalFoundry(BasicNewsRecipe): author = 'Sukru' title = 'Eurogamer' oldest_article = 10 max_articles_per_feed = 100 no_stylesheets = True remove_javascript = True cover_url = 'http://www.eurogamer.net/img/DigitalFoundryPage/logo.gif' # Add other feeds from http://www.eurogamer.net/rss.php feeds = [(u'Digital Foundry', u'http://www.eurogamer.net/rss/eurogamer_digitalfoundry_feed.rss')] INDEX = 'http://www.eurogamer.net/' def remove_item(self, text, item): children = text.findAll(id=item) for child in children: child.extract() def append_page(self, soup, appendtag, position): pager = soup.find('li',attrs={'class':'next'}) if pager: nexturl = pager.a['href'] print "adding page", nexturl soup2 = self.index_to_soup(self.INDEX + nexturl) texttag = soup2.find('div', id='blog') newpos = len(texttag.contents) self.append_page(soup2,texttag,newpos) texttag.extract() appendtag.insert(position,texttag) def preprocess_html(self, soup): text = soup.find('div', id='blog') body = soup.find('div', id='browserMaster') if text == None: print "*** ERROR *** - NO TEXT" if body == None: print "*** ERROR *** - NO BODY" text.extract() body.replaceWith(text) self.append_page(soup, soup.body, 10) return soup def postprocess_html(self, soup, first_fetch): movies = soup.findAll('div', attrs={'class':'egtv-video centre'}) for movie in movies: preview = movie.find('img', attrs={'class':'screengrab'}) frame = movie.find(attrs={'class':'frame'}) if preview != None and frame != None: preview.extract() frame.replaceWith(preview) else: print "Missing parts in movie" print "frame = ", frame print "preview = ", preview print "movie = ", movie self.remove_item(soup, 'phat-footer') self.remove_item(soup, 'fb-root') self.remove_item(soup, 'facebook-like-button') return soup |
![]() |
![]() |
![]() |
|
![]() |
||||
Thread | Thread Starter | Forum | Replies | Last Post |
Q: Memory Error trying to download EuroGamer RSS Feed | sukru | Recipes | 5 | 10-20-2010 09:30 AM |
Memory Error trying to download EuroGamer RSS Feed | sukru | Calibre | 3 | 10-15-2010 06:32 AM |
New verions of Adobe Digital Editions - Digital Editions 1.7.2 | kennyc | ePub | 0 | 02-12-2010 06:21 AM |
Recipe Help Please | estral | Calibre | 1 | 06-11-2009 02:35 PM |
Top Publishers and Sony Support New Digital Publishing Experience (Digital Editions) | NatCh | Sony Reader | 50 | 06-22-2007 08:02 PM |