Register Guidelines E-Books Search Today's Posts Mark Forums Read

Go Back   MobileRead Forums > E-Book Software > Calibre > Recipes

Notices

Reply
 
Thread Tools Search this Thread
Old 12-06-2012, 07:52 PM   #1
fleclerc
Junior Member
fleclerc began at the beginning.
 
Posts: 1
Karma: 10
Join Date: May 2012
Device: kindle
metro uk recipe update

Hi,

Metro uk recipe update is broken since yesterday's website upgrade.
Please find below a replacement

from calibre.web.feeds.news import BasicNewsRecipe
from calibre import strftime
import re
import datetime
import time

class AdvancedUserRecipe1306097511(BasicNewsRecipe):
title = u'Metro UK'
description = 'News as provide by The Metro -UK'
#timefmt = ''
__author__ = 'Dave Asbury'
#last update 9/6/12
cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/276636_117118184990145_2132092232_n.jpg'
oldest_article = 1
remove_empty_feeds = True
remove_javascript = True
auto_cleanup = True
encoding = 'UTF-8'

language = 'en_GB'
masthead_url = 'http://e-edition.metro.co.uk/images/metro_logo.gif'
keep_only_tags = [

]
remove_tags = []
def parse_index(self):
articles = {}
key = None
ans = []
feeds = [ ('UK', 'http://metro.co.uk/news/uk/'),
('World', 'http://metro.co.uk/news/world/'),
('Weird', 'http://metro.co.uk/news/weird/'),
('Money', 'http://metro.co.uk/news/money/'),
('Sport', 'http://metro.co.uk/sport/'),
('Guilty Pleasures', 'http://metro.co.uk/guilty-pleasures/')
]
for key, feed in feeds:
print key, feed
soup = self.index_to_soup(feed)
articles[key] = []
ans.append(key)

today = datetime.date.today()
today = time.mktime(today.timetuple())-60*60*24

tags = soup.findAll('a')
for a in soup.findAll('a'):
for name, value in a.attrs:
if name == "class" and value=="post":
url = a['href']
title = a['title']
print title
description = ''
m = re.search('^.*uk/([^/]*)/([^/]*)/([^/]*)/', url)
skip = 1
if len(m.groups()) == 3:
g = m.groups()
dt = datetime.datetime.strptime(''+g[0]+'-'+g[1]+'-'+g[2], '%Y-%m-%d')
pubdate = time.strftime('%a, %d %b', dt.timetuple())

dt = time.mktime(dt.timetuple())
if dt >= today:
print pubdate
skip = 0
else:
pubdate = strftime('%a, %d %b')

summary = a.find(True, attrs={'class':'excerpt'})
if summary:
description = self.tag_to_string(summary, use_alt=False)

if skip == 0:
articles[key].append(
dict(title=title, url=url, date=pubdate,
description=description,
content=''))
#ans = self.sort_index_by(ans, {'The Front Page':-1, 'Dining In, Dining Out':1, 'Obituaries':2})
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
return ans
fleclerc is offline   Reply With Quote
Old 12-15-2012, 04:10 AM   #2
scissors
Addict
scissors ought to be getting tired of karma fortunes by now.scissors ought to be getting tired of karma fortunes by now.scissors ought to be getting tired of karma fortunes by now.scissors ought to be getting tired of karma fortunes by now.scissors ought to be getting tired of karma fortunes by now.scissors ought to be getting tired of karma fortunes by now.scissors ought to be getting tired of karma fortunes by now.scissors ought to be getting tired of karma fortunes by now.scissors ought to be getting tired of karma fortunes by now.scissors ought to be getting tired of karma fortunes by now.scissors ought to be getting tired of karma fortunes by now.
 
Posts: 203
Karma: 1001369
Join Date: Sep 2010
Device: prs300, kindle keyboard 3g
It appears the old recipe actually still works, just the rss feeds had changed to this style

(u'Guilty Pleasures', u'http://metro.co.uk/guilty-pleasures/rss')

there was no need to go to the trouble of all that re-writing.

Last edited by scissors; 12-15-2012 at 02:54 PM.
scissors is offline   Reply With Quote
Old 01-20-2013, 02:30 PM   #3
scissors
Addict
scissors ought to be getting tired of karma fortunes by now.scissors ought to be getting tired of karma fortunes by now.scissors ought to be getting tired of karma fortunes by now.scissors ought to be getting tired of karma fortunes by now.scissors ought to be getting tired of karma fortunes by now.scissors ought to be getting tired of karma fortunes by now.scissors ought to be getting tired of karma fortunes by now.scissors ought to be getting tired of karma fortunes by now.scissors ought to be getting tired of karma fortunes by now.scissors ought to be getting tired of karma fortunes by now.scissors ought to be getting tired of karma fortunes by now.
 
Posts: 203
Karma: 1001369
Join Date: Sep 2010
Device: prs300, kindle keyboard 3g
cover url
remove articles duplicated across sections
limit articles to 12 (due to size of generated file) comment out if it's a problem

Spoiler:
Code:
from calibre.web.feeds.news import BasicNewsRecipe
from calibre import strftime
import re
import datetime
import time

class AdvancedUserRecipe1306097511(BasicNewsRecipe):
    title          = u'Metro UK'
    description = 'News as provided by The Metro -UK'
    #timefmt = ''
    __author__ = 'fleclerc & Dave Asbury'
    #last update 20/1/13
    #cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/276636_117118184990145_2132092232_n.jpg'
    
    cover_url = 'https://twimg0-a.akamaihd.net/profile_images/1638332595/METRO_LETTERS-01.jpg'
    remove_empty_feeds = True
    remove_javascript     = True
    auto_cleanup = True
    max_articles_per_feed = 12
    ignore_duplicate_articles = {'title', 'url'}
    encoding = 'UTF-8'

    language = 'en_GB'
    masthead_url        = 'http://e-edition.metro.co.uk/images/metro_logo.gif'

    def parse_index(self):
		articles = {}
		key = None
		ans = []
		feeds = [ ('UK', 'http://metro.co.uk/news/uk/'),
			('World', 'http://metro.co.uk/news/world/'),
			('Weird', 'http://metro.co.uk/news/weird/'),
			('Money', 'http://metro.co.uk/news/money/'),
			('Sport', 'http://metro.co.uk/sport/'),
			('Guilty Pleasures', 'http://metro.co.uk/guilty-pleasures/')
			]
		for key, feed in feeds:
			soup = self.index_to_soup(feed)
			articles[key] = []
			ans.append(key)

			today = datetime.date.today()
			today = time.mktime(today.timetuple())-60*60*24

			for a in soup.findAll('a'):
				for name, value in a.attrs:
					if name == "class" and value=="post":
						url = a['href']
						title = a['title']
						print title
						description = ''
						m = re.search('^.*uk/([^/]*)/([^/]*)/([^/]*)/', url)
						skip = 1
						if len(m.groups()) == 3:
							g = m.groups()
							dt = datetime.datetime.strptime(''+g[0]+'-'+g[1]+'-'+g[2], '%Y-%m-%d')
							pubdate = time.strftime('%a, %d %b', dt.timetuple())

							dt = time.mktime(dt.timetuple())
							if dt >= today:
								print pubdate
								skip = 0
						else:
							pubdate = strftime('%a, %d %b')

						summary = a.find(True, attrs={'class':'excerpt'})
						if summary:
							description = self.tag_to_string(summary, use_alt=False)

						if skip == 0:
							articles[key].append(
										dict(title=title, url=url, date=pubdate,
												description=description,
												content=''))
		#ans = self.sort_index_by(ans, {'The Front Page':-1, 'Dining In, Dining Out':1, 'Obituaries':2})
		ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
		return ans
scissors is offline   Reply With Quote
Reply

Thread Tools Search this Thread
Search this Thread:

Advanced Search

Forum Jump

Similar Threads
Thread Thread Starter Forum Replies Last Post
metro uk update 9/9/12 scissors Recipes 0 09-09-2012 03:32 AM
metro uk update 4/8/12 scissors Recipes 0 08-04-2012 02:46 PM
metro uk update 9/6/12 scissors Recipes 0 06-09-2012 03:32 AM
improved metro uk recipe scissors Recipes 0 12-03-2011 05:01 PM
Recipe for Metro UK Bogg Recipes 10 10-07-2011 01:06 PM


All times are GMT -4. The time now is 09:45 AM.


MobileRead.com is a privately owned, operated and funded community.