View Single Post
Old 01-20-2015, 04:22 PM   #1
desUBIKado
Member
desUBIKado began at the beginning.
 
Posts: 22
Karma: 12
Join Date: Feb 2009
Location: Zaragoza, Spain
Device: prs-505, iliad
jotdown.es fixed [ES]

Hi there:

I bring a new version of this recipe. The last one didn't work fine for format problems since many weeks ago.

Regards.

Spoiler:

Code:
#!/usr/bin/env  python
__license__     = 'GPL v3'
__copyright__   = '23 June 2013, desUBIKado'
__author__      = 'desUBIKado'
__description__ = 'Contemporary Culture Magazine'
__version__     = 'v0.02'
__date__        = '20, January 2015'
'''
http://www.jotdown.es/
'''

import time
import re
from calibre.web.feeds.news import BasicNewsRecipe

class jotdown(BasicNewsRecipe):
    author        = 'desUBIKado'
    description   = 'Revista digital con magníficos y extensos artículos'
    title          = u'Jot Down - Contemporary Culture Magazine'
    publisher      = 'Wabi Sabi Investments, S.C.'
    category       = 'Opinion, culture, science, movies, TV shows, music, blogs'
    language       = 'es'
    timefmt        = '[%a, %d %b, %Y]'
    oldest_article = 7
    delay          = 1
    max_articles_per_feed = 20
    masthead_url   = 'http://www.jotdown.es/wp-content/uploads/2011/04/logoJotDown.png'
    use_embedded_content  = False
    remove_javascript = True
    no_stylesheets = True
    

    feeds          = [
                        (u'Portada', u'http://www.jotdown.es/feed/')                        
                     ]


    
    keep_only_tags     = [dict(name='div', attrs={'id':['content']}),
                          dict(name='div', attrs={'id':['comments']}),
                         ]

    remove_tags        = [dict(name='a', attrs={'href':['http://alternativaseconomicas.coop/']}),
                          dict(name='div', attrs={'class':['reply','after-meta','tags_list','wp_rp_wrap  wp_rp_plain','after-meta','share_box']}),
                          dict(name='div', attrs={'align':['center']}),
                          dict(name='span', attrs={'class':['fbreplace','says']}),                          
                          dict(name='img', attrs={'class':['avatar avatar-60 photo']}),
                          dict(name='li', attrs={'class':['post pingback']}),
                          dict(name='div', attrs={'id':'respond'})                          
		         ]
		         		         
		         
    remove_tags_after  = dict(name='div' , attrs={'id':'respond'})



    preprocess_regexps = [     
 # To present the image of the embedded video                        
                           (re.compile(r'<object type="application/x-shockwave-flash" data="http://www.youtube.com/v', re.DOTALL|re.IGNORECASE), lambda match: '<img src="http://img.youtube.com/vi'),
                           (re.compile(r'&rel=0&fs=1"', re.DOTALL|re.IGNORECASE), lambda match: '/0.jpg"><object'),
 # To remove the link of the category 
                           (re.compile(r'<div class="meta">', re.DOTALL|re.IGNORECASE), lambda match: '<div class="meta"><!-- '),
			   (re.compile(r'</a>, <a href="http://www.jotdown.es/category', re.DOTALL|re.IGNORECASE), lambda match: ', <!--'),
			   (re.compile(r'"category tag">', re.DOTALL|re.IGNORECASE), lambda match: '--> '),
                           (re.compile(r'</a> &mdash;', re.DOTALL|re.IGNORECASE), lambda match: ''),
 # To remove the link of the title	    
                           (re.compile(r'<h1 class="title"><a href="', re.DOTALL|re.IGNORECASE), lambda match: '<h1 class="title"><div class="'),
                           (re.compile(r'</a></h1>', re.DOTALL|re.IGNORECASE), lambda match: '</div></h1>')
	    
 		      ]
desUBIKado is offline   Reply With Quote