Register Guidelines E-Books Search Today's Posts Mark Forums Read

Go Back   MobileRead Forums > E-Book Software > Calibre > Recipes

Notices

Reply
 
Thread Tools Search this Thread
Old 01-09-2011, 02:36 PM   #1
desUBIKado
Member
desUBIKado began at the beginning.
 
Posts: 19
Karma: 12
Join Date: Feb 2009
Location: Zaragoza, Spain
Device: prs-505, iliad
One new recipe and other one updated (In Spanish)

Hi there,

I bring a newspaper from northern Spain, elcorreo.com

Spoiler:

#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '08 Januery 2011, desUBIKado'
__author__ = 'desUBIKado'
__description__ = 'Daily newspaper from Biscay'
__version__ = 'v0.08'
__date__ = '08, Januery 2011'
'''
http://www.elcorreo.com/
'''

import time
import re
from calibre.web.feeds.news import BasicNewsRecipe

class heraldo(BasicNewsRecipe):
author = 'desUBIKado'
description = 'Daily newspaper from Biscay'
title = u'El Correo'
publisher = 'Vocento'
category = 'News, politics, culture, economy, general interest'
oldest_article = 2
delay = 1
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
language = 'es'
timefmt = '[%a, %d %b, %Y]'
encoding = 'iso-8859-1'
remove_empty_feeds = True
remove_javascript = False

feeds = [
(u'Portada', u'http://www.elcorreo.com/vizcaya/portada.xml'),
(u'Local', u'http://www.elcorreo.com/vizcaya/rss/feeds/vizcaya.xml'),
(u'Internacional', u'hhttp://www.elcorreo.com/vizcaya/rss/feeds/internacional.xml'),
(u'Econom\xeda', u'http://www.elcorreo.com/vizcaya/rss/feeds/economia.xml'),
(u'Pol\xedtica', u'http://www.elcorreo.com/vizcaya/rss/feeds/politica.xml'),
(u'Opini\xf3n', u'http://www.elcorreo.com/vizcaya/rss/feeds/opinion.xml'),
(u'Deportes', u'http://www.elcorreo.com/vizcaya/rss/feeds/deportes.xml'),
(u'Sociedad', u'http://www.elcorreo.com/vizcaya/rss/feeds/sociedad.xml'),
(u'Cultura', u'http://www.elcorreo.com/vizcaya/rss/feeds/cultura.xml'),
(u'Televisi\xf3n', u'http://www.elcorreo.com/vizcaya/rss/feeds/television.xml'),
(u'Gente', u'http://www.elcorreo.com/vizcaya/rss/feeds/gente.xml')
]

keep_only_tags = [
dict(name='div', attrs={'class':['grouphead','date','art_head','story-texto','text','colC_articulo','contenido_comentari os']}),
dict(name='div' , attrs={'id':['articulo','story-texto','story-entradilla']})
]

remove_tags = [
dict(name='div', attrs={'class':['art_barra','detalles-opinion','formdenunciar','modulo calculadoras','nubetags','pie']}),
dict(name='div', attrs={'class':['mod_lomas','bloque_lomas','blm_header','link-app3','link-app4','botones_listado']}),
dict(name='div', attrs={'class':['navegacion_galeria','modulocanalpromocion','separ a','separacion','compartir','tags_relacionados']}),
dict(name='div', attrs={'class':['moduloBuscadorDeportes','modulo-gente','moddestacadopeq','OpcArt','articulopinione s']}),
dict(name='div', attrs={'class':['modulo-especial','publiEspecial']}),
dict(name='div', attrs={'id':['articulopina']}),
dict(name='br', attrs={'class':'clear'}),
dict(name='form', attrs={'name':'frm_conversor2'})
]

remove_tags_before = dict(name='div' , attrs={'class':'articulo '})
remove_tags_after = dict(name='div' , attrs={'class':'comentarios'})

def get_cover_url(self):
cover = None
st = time.localtime()
year = str(st.tm_year)
month = "%.2d" % st.tm_mon
day = "%.2d" % st.tm_mday
#http://img.kiosko.net/2011/01/02/es/elcorreo.750.jpg
#http://info.elcorreo.com/pdf/06012011-viz.pdf
cover='http://info.elcorreo.com/pdf/'+ day + month + year +'-viz.pdf'

br = BasicNewsRecipe.get_browser()
try:
br.open(cover)
except:
self.log("\nPortada no disponible")
cover ='http://www.elcorreo.com/vizcaya/noticias/201002/02/Media/logo-elcorreo-nuevo.png'
return cover

extra_css = '''
h1, .headline {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:30px;}
h2, .subhead {font-family:Arial,Helvetica,sans-serif; font-style:italic; font-weight:normal;font-size:18px;}
h3, .overhead {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:16px;}
h4 {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:16px;}
h5 {font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:16px;}
h6 {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:16px;}
.date,.byline, .photo {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:14px;}
img{margin-bottom: 0.4em}
'''



preprocess_regexps = [

# To present the image of the embedded video
(re.compile(r'var RUTA_IMAGEN', re.DOTALL|re.IGNORECASE), lambda match: '</script><img src'),
(re.compile(r'.jpg";', re.DOTALL|re.IGNORECASE), lambda match: '.jpg">'),
(re.compile(r'var SITIO = "elcorreo";', re.DOTALL|re.IGNORECASE), lambda match: '<SCRIPT TYPE="text/JavaScript"'),

# To separate paragraphs with a blank line
(re.compile(r'<div class="p"', re.DOTALL|re.IGNORECASE), lambda match: '<p></p><div class="p"'),

# To put a blank line between the subtitle and the date and time of the news
(re.compile(r'<div class="date">', re.DOTALL|re.IGNORECASE), lambda match: '<br><div class="date">'),

# To put a blank line between the intro of the embedded videos and the previous text
(re.compile(r'<div class="video"', re.DOTALL|re.IGNORECASE), lambda match: '<br><div class="video"'),

# To view photos from the first when these are presented as a gallery
(re.compile(r'src="/img/shim.gif"', re.DOTALL|re.IGNORECASE), lambda match: ''),
(re.compile(r'rel=', re.DOTALL|re.IGNORECASE), lambda match: 'src='),

# To remove the link of the title
(re.compile(r'<h1 class="headline">\n<a href="', re.DOTALL|re.IGNORECASE), lambda match: '<h1 class="'),
(re.compile(r'</a>\n</h1>', re.DOTALL|re.IGNORECASE), lambda match: '</h1>'),

]



And an update to heraldo.es

Spoiler:

#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '04 December 2010, desUBIKado'
__author__ = 'desUBIKado'
__description__ = 'Daily newspaper from Aragon'
__version__ = 'v0.04'
__date__ = '6, Januery 2011'
'''
http://www.heraldo.es/
'''

import time
import re
from calibre.web.feeds.news import BasicNewsRecipe

class heraldo(BasicNewsRecipe):
author = 'desUBIKado'
description = 'Daily newspaper from Aragon'
title = u'Heraldo de Aragon'
publisher = 'OJD Nielsen'
category = 'News, politics, culture, economy, general interest'
language = 'es'
timefmt = '[%a, %d %b, %Y]'
oldest_article = 2
delay = 1
max_articles_per_feed = 100
use_embedded_content = False
remove_javascript = True
no_stylesheets = True


feeds = [
(u'Portadas', u'http://www.heraldo.es/index.php/mod.portadas/mem.rss')
]



keep_only_tags = [dict(name='div', attrs={'id':['dts','com']})]

remove_tags = [dict(name='a', attrs={'class':['com flo-r','enl-if','enl-df']}),
dict(name='div', attrs={'class':['brb-b-s con marg-btt','cnt-rel con']}),
dict(name='form', attrs={'class':'form'}),
dict(name='ul', attrs={'id':['cont-tags','pag-1']})]

remove_tags_before = dict(name='div' , attrs={'id':'dts'})
remove_tags_after = dict(name='div' , attrs={'id':'com'})

def get_cover_url(self):
cover = None
st = time.localtime()
year = str(st.tm_year)
month = "%.2d" % st.tm_mon
day = "%.2d" % st.tm_mday
#http://oldorigin-www.heraldo.es/2010...ada_aragon.pdf
cover='http://oldorigin-www.heraldo.es/'+ year + month + day +'/primeras/portada_aragon.pdf'
br = BasicNewsRecipe.get_browser()
try:
br.open(cover)
except:
self.log("\nPortada no disponible")
cover ='http://www.heraldo.es/MODULOS/global/publico/interfaces/img/logo-Heraldo.png'
return cover


extra_css = '''
.con strong{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:16px;}
.con h2{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:30px;}
.con span{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:12px;}
.ent {font-family:Arial,Helvetica,sans-serif; font-weight:normal; font-style:italic; font-size:18px;}
img{margin-bottom: 0.4em}
'''

preprocess_regexps = [

# To separate the comments with a blank line
(re.compile(r'<div id="com"', re.DOTALL|re.IGNORECASE), lambda match: '<br><div id="com"')
]


Greetings
desUBIKado is offline   Reply With Quote
Old 01-12-2011, 01:34 PM   #2
mis_suscripcione
Junior Member
mis_suscripcione began at the beginning.
 
Posts: 2
Karma: 10
Join Date: Jan 2011
Device: sony 650
Aleluya!

Hola desUBIcado... precisamente estaba yo buscando cómo poder leer en mi ereader El Correo, y me encuentro con tu post. Perdona que abuse ya de tu amabilidad, pero es que soy un total novato tanto con el e-reader que me han traido los Reyes (SONY 650) como con Calibre. Dos cuestiones:
a) ¿Cómo debo introducir ese codigo que has desarrollado en mi calibre para que descargue el Correo? Sorry: como si fuese para tontos...
b) No sé de qué edición de El Correo se trata, ¿puedo hacer algo para que descargue la edición de Alava?
Muchas gracias de antemano.
mis_suscripcione is offline   Reply With Quote
 
Advertisement
Old 01-14-2011, 03:38 PM   #3
desUBIKado
Member
desUBIKado began at the beginning.
 
Posts: 19
Karma: 12
Join Date: Feb 2009
Location: Zaragoza, Spain
Device: prs-505, iliad
Hi mis_*

a) You must create a text file with the code and name it as elcorreo.recipe. In Calibre at the right of the buttom "Descargar noticias (RSS)" there is a black inverted triangle. Push it. Select "Añadir una nueva fuente de noticas" and a new window will open.

Push the button "Cargar receta desde un fichero" and use the file elcorreo.recipe

Further up you will see user's new recipe "El Correo", and when you'll download news, you'll find it within the group "Personalizado".


b) It's the Biscay edition. If you prefer the Alava edition change this line in the code:


(u'Local', u'http://www.elcorreo.com/alava/rss/feeds/alava.xml),

instead of

(u'Local', u'http://www.elcorreo.com/vizcaya/rss/feeds/vizcaya.xml'),

Agur

Last edited by desUBIKado; 01-14-2011 at 03:50 PM.
desUBIKado is offline   Reply With Quote
Old 01-19-2011, 04:58 AM   #4
mis_suscripcione
Junior Member
mis_suscripcione began at the beginning.
 
Posts: 2
Karma: 10
Join Date: Jan 2011
Device: sony 650
Conseguido: ¡¡¡muchas gracias!!!!
mis_suscripcione is offline   Reply With Quote
Reply

Thread Tools Search this Thread
Search this Thread:

Advanced Search

Forum Jump

Similar Threads
Thread Thread Starter Forum Replies Last Post
Updated Telepolis (News+Artikel) Recipe syntaxis Recipes 8 05-15-2011 07:40 AM
Updated recipe for Le Monde? veezh Recipes 5 01-20-2011 10:06 PM
Animal Politico (@pajaropolitico in twitter) recipe - Spanish, Mexico leamsi Recipes 0 11-28-2010 02:40 PM
Updated New York Times recipe nickredding Recipes 2 11-20-2010 11:53 AM
Updated New Yorker recipe doesn't fetch comics yekim54 Recipes 2 10-09-2010 11:47 PM


All times are GMT -4. The time now is 01:13 AM.


MobileRead.com is a privately owned, operated and funded community.