View Single Post
Old 01-17-2012, 05:34 AM   #4
roedi06
Junior Member
roedi06 began at the beginning.
 
Posts: 9
Karma: 10
Join Date: Jan 2012
Device: SONY PRS-T1
Completed!

I successfully completed the Recipe for 'Tweakers.net - Including reactions'

For details see code below or have a look at the attached file. Kovid can you have look at it and maybe include it in a next release?? Me and more people in the tweakers.net community would be thankfull!

Code:
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import with_statement

__license__   = 'GPL v3'
__docformat__ = 'restructuredtext en'

import re
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ptempfile import PersistentTemporaryFile

class Tweakers(BasicNewsRecipe):
     title          = u'Tweakers.net - with Reactions'
     __author__     = 'Roedi06'
     language       = 'nl'
     oldest_article = 7
     max_articles_per_feed = 100
     cover_url       = 'http://img51.imageshack.us/img51/7470/tweakersnetebook.gif'
	
     keep_only_tags = [dict(name='div', attrs={'class':'columnwrapper news'}),
	{'id':'reacties'},
	  ]

     remove_tags    = [dict(name='div', attrs={'id' : ['utracker']}),
						{'id' : ['channelNav']},
						{'id' : ['contentArea']},
						{'class' : ['breadCrumb']},
						{'class' : ['nextPrevious ellipsis']},
						{'class' : ['advertorial']},
						{'class' : ['sidebar']},
						{'class' : ['filterBox']},
                        {'id' : ['toggleButtonTxt']},
						{'id' : ['socialButtons']},
                        {'class' : ['button']},
                        {'class' : ['textadTop']},
                        {'class' : ['commentLink']},
						{'title' : ['Reageer op deze reactie']},
						{'class' : ['pageIndex']},
	    {'class' : ['reactieHeader collapsed']},
                      ]
     no_stylesheets=True

     preprocess_regexps = [
     (re.compile(r'<hr*?>', re.IGNORECASE | re.DOTALL), lambda match : ''),
     (re.compile(r'<p>', re.IGNORECASE | re.DOTALL), lambda match : ''),
     (re.compile(r'</p>', re.IGNORECASE | re.DOTALL), lambda match : ''),
     (re.compile(r'<a.*?>'), lambda h1: '<b><u>'),
     (re.compile(r'</a>'), lambda h2: '</u></b>'),
	 (re.compile(r'<span class="new">', re.IGNORECASE | re.DOTALL), lambda match : ''),
	 (re.compile(r'</span>', re.IGNORECASE | re.DOTALL), lambda match : ''),
	 (re.compile(r'<div class="moderation"><img src="http://tweakimg.net/g/if/comments/score_0'), lambda match : ' - moderated 0<div class="moderation"><img src="http://tweakimg.net/g/if/comments/score_0'),
	 (re.compile(r'<div class="moderation"><img src="http://tweakimg.net/g/if/comments/score_1'), lambda match : ' - moderated +1<div class="moderation"><img src="http://tweakimg.net/g/if/comments/score_1'),
	 (re.compile(r'<div class="moderation"><img src="http://tweakimg.net/g/if/comments/score_2'), lambda match : ' - moderated +2<div class="moderation"><img src="http://tweakimg.net/g/if/comments/score_2'),
	 (re.compile(r'<div class="moderation"><img src="http://tweakimg.net/g/if/comments/score_3'), lambda match : ' - moderated +3<div class="moderation"><img src="http://tweakimg.net/g/if/comments/score_3'),
	 (re.compile(r'<div class="moderation">.*?</div>'), lambda h1: ''),
	 ]

     extra_css = '.reactieHeader { color: #333333; font-size: 6px; border-bottom:solid 2px #333333; border-top:solid 1px #333333; } \
  	   .reactieContent { font-family:"Times New Roman",Georgia,Serif; color: #000000; font-size: 8px; } \
	   .quote { font-family:"Times New Roman",Georgia,Serif; padding-left:2px; border-left:solid 3px #666666; color: #666666; }'
	   
	   

	 
     feeds          = [(u'Tweakers.net', u'http://feeds.feedburner.com/tweakers/nieuws')]

     def print_version(self, url):
        return url + '?max=200'
Attached Files
File Type: zip Tweakers.net including React_1000.zip (429 Bytes, 324 views)
roedi06 is offline   Reply With Quote