12-15-2012, 04:07 AM
|
#1
|
Addict
Posts: 241
Karma: 1001369
Join Date: Sep 2010
Device: prs300, kindle keyboard 3g
|
birmingham post update
Correct cover fetched
Spoiler:
Code:
from calibre.web.feeds.news import BasicNewsRecipe
from calibre import browser
import re
import mechanize
from calibre.utils.magick import Image
class AdvancedUserRecipe1306097511(BasicNewsRecipe):
title = u'Birmingham post'
description = 'Author D.Asbury. News for Birmingham UK'
#timefmt = ''
__author__ = 'Dave Asbury'
cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/161987_9010212100_2035706408_n.jpg'
oldest_article = 2
max_articles_per_feed = 12
linearize_tables = True
remove_empty_feeds = True
remove_javascript = True
no_stylesheets = True
auto_cleanup = True
language = 'en_GB'
cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/161987_9010212100_2035706408_n.jpg'
masthead_url = 'http://www.trinitymirror.com/images/birminghampost-logo.gif'
def get_cover_url(self):
soup = self.index_to_soup('http://www.birminghampost.net')
# look for the block containing the sun button and url
cov = soup.find(attrs={'height' : re.compile('3'), 'alt' : re.compile('Birmingham Post')})
print
print '%%%%%%%%%%%%%%%',cov
print
cov2 = str(cov['src'])
# cov2=cov2[7:]
print '88888888 ',cov2,' 888888888888'
#cover_url=cov2
#return cover_url
br = mechanize.Browser()
br.set_handle_redirect(False)
try:
br.open_novisit(cov2)
cover_url = cov2
except:
cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/161987_9010212100_2035706408_n.jpg'
return cover_url
feeds = [
#(u'News',u'http://www.birminghampost.net/news/rss.xml'),
(u'West Mids. News', u'http://www.birminghampost.net/news/west-midlands-news/rss.xml'),
(u'UK News', u'http://www.birminghampost.net/news/uk-news/rss.xml'),
(u'Sports',u'http://www.birminghampost.net/midlands-birmingham-sport/rss.xml'),
(u'Bloggs & Comments',u'http://www.birminghampost.net/comment/rss.xml')
]
Last edited by scissors; 12-15-2012 at 04:39 AM.
|
|
|