Can anyone help make recipes for the above sites? Really appreciate it.
How about this since you requested "help". Take and read up the conversations I had with Starson17 about Field and Streams. And also look up make_links and parse_index. Once you do that then write some code using it. If it doesn't work then take and post it in here using spoiler (the eye with the x) and code(the # icon) tags and then when I get time I will help or i'm sure someone else will help as well. the only way your gonna learn is by doing it. I will do one for you and you can follow my lead and do the rest....
Spoiler:
Code:
#!/usr/bin/env python
__license__ = 'GPL v3'
__author__ = 'Tony Stegall'
__copyright__ = '2010, Tony Stegall or Tonythebookworm on mobiread.com'
__version__ = '1'
__date__ = '16, October 2010'
__docformat__ = 'English'
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup
class MalaysianMirror(BasicNewsRecipe):
title = 'MalaysianMirror'
__author__ = 'Tonythebookworm'
description = 'The Pulse of the Nation'
language = 'en'
no_stylesheets = True
publisher = 'Tonythebookworm'
category = 'news'
use_embedded_content= False
no_stylesheets = True
oldest_article = 24
remove_javascript = True
remove_empty_feeds = True
conversion_options = {'linearize_tables' : True}
extra_css = '''
#content_heading{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
td{text-align:right; font-size:small;margin-top:0px;margin-bottom: 0px;}
#content_body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
'''
keep_only_tags = [dict(name='table', attrs={'class':['contentpaneopen']})
]
remove_tags = [dict(name='table', attrs={'class':['buttonheading']})]
#######################################################################################################################
max_articles_per_feed = 10
'''
Make a variable that will hold the url for the main site because our links do not include the index
'''
INDEX = 'http://www.malaysianmirror.com'
def parse_index(self):
feeds = []
for title, url in [
(u"Media Buzz", u"http://www.malaysianmirror.com/media-buzz-front"),
(u"Life Style", u"http://www.malaysianmirror.com/lifestylefront"),
(u"Features", u"http://www.malaysianmirror.com/featurefront"),
]:
articles = self.make_links(url)
if articles:
feeds.append((title, articles))
return feeds
def make_links(self, url):
title = 'Temp'
current_articles = []
soup = self.index_to_soup(url)
# print 'The soup is: ', soup
for item in soup.findAll('div', attrs={'class':'contentheading'}):
print 'item is: ', item
link = item.find('a')
print 'the link is: ', link
if link:
url = self.INDEX + link['href']
title = self.tag_to_string(link)
print 'the title is: ', title
print 'the url is: ', url
print 'the title is: ', title
current_articles.append({'title': title, 'url': url, 'description':'', 'date':''}) # append all this
return current_articles
def preprocess_html(self, soup):
for item in soup.findAll(attrs={'style':True}):
del item['style']
return soup