View Single Post
Old 05-14-2011, 12:48 PM   #1
schuster
Zealot
schuster doesn't litterschuster doesn't litter
 
Posts: 119
Karma: 100
Join Date: Jan 2011
Location: Germany / NRW /Köln
Device: prs-650 / prs-350 /kindle 3
recipe for scientific publication - Max Planck Inst. - german

Code:
import string, re
from calibre import strftime
from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup
class AdvancedUserRecipe1303841067(BasicNewsRecipe):

    title          = u'Max-Planck-Inst.'
    __author__  = 'schuster'
    remove_tags = [dict(attrs={'class':['clearfix', 'lens', 'col2_box_list', 'col2_box_teaser group_ext no_print', 'dotted_line', 'col2_box_teaser', 'box_image small', 'bold', 'col2_box_teaser no_print', 'print_kontakt']}),
                dict(id=['ie_clearing', 'col2', 'col2_content']),
                dict(name=['script', 'noscript', 'style'])]
    oldest_article = 30
    max_articles_per_feed = 100
    no_stylesheets         = True
    use_embedded_content   = False
    language               = 'de'
    remove_javascript      = True

    def print_version(self, url):
	split_url = url.split("/")
	print_url = 'http://www.mpg.de/print/' +  split_url[3]
	return print_url

    feeds          = [(u'Forschung', u'http://www.mpg.de/de/forschung.rss')]
schuster is offline   Reply With Quote