View Single Post
Old 06-05-2012, 11:18 PM   #5
atordo
Connoisseur
atordo is a splendid one to beholdatordo is a splendid one to beholdatordo is a splendid one to beholdatordo is a splendid one to beholdatordo is a splendid one to beholdatordo is a splendid one to beholdatordo is a splendid one to beholdatordo is a splendid one to beholdatordo is a splendid one to beholdatordo is a splendid one to beholdatordo is a splendid one to behold
 
Posts: 89
Karma: 19669
Join Date: Apr 2012
Device: Kindle Touch
Updated version with better page parsing and some CSS for eye candy.

Spoiler:
Code:
import re
from calibre.web.feeds.news import BasicNewsRecipe

class ElMundoTodayRecipe(BasicNewsRecipe):
    title = 'El Mundo Today'
    description = u'La actualidad del maņana'
    category = 'Noticias, humor'
    cover_url = 'http://www.elmundotoday.com/wp-content/themes/EarthlyTouch/images/logo.png'
    oldest_article = 30
    max_articles_per_feed = 60
    auto_cleanup = False
    no_stylesheets = True
    remove_javascript = True
    language = 'es_ES'
    use_embedded_content  = False

    preprocess_regexps = [
        (re.compile(r'</title>.*<!--Begin Article Single-->', re.DOTALL),
        lambda match: '</title><body>'),
        #(re.compile(r'^\t{5}<a href.*Permanent Link to ">$'), lambda match: ''),
        #(re.compile(r'\t{5}</a>$'), lambda match: ''),
        (re.compile(r'<div class="social4i".*</body>', re.DOTALL),
        lambda match: '</body>'),
    ]

    keep_only_tags = [
        dict(name='div', attrs={'class':'post-wrapper'})
    ]

    remove_attributes = [ 'href', 'title', 'alt' ]

    extra_css = '''
        .antetitulo{font-variant:small-caps; font-weight:bold} .articleinfo{font-size:small}
        img{margin-bottom:0.4em; display:block; margin-left:auto; margin-right:auto}
    '''

    feeds = [('El Mundo Today', 'http://www.elmundotoday.com/feed/')]

    def get_broser(self):
        br = BasicNewsRecipe.get_browser(self)
        br.set_handle_gzip(True)
        return br
atordo is offline   Reply With Quote