Quote:
Originally Posted by cynvision
I took a look at this one and if there was a way to get the RSS of the weekly archive... but I don't see one. Maybe it's members only?
|
I didn't see an rss for it so I just parsed the links.
I'm not certain if the link will list the feeds each time for I have no known way of testing that. But the following code will use the link provided by the original poster and then parse the links on that page and look for Alison Berkley in the link text. If it finds it then that link will be used and converted in the print_url to the pretty print version...
Spoiler:
Code:
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, re
class FIELDSTREAM(BasicNewsRecipe):
title = 'Alison Berkley Column'
__author__ = 'Tonythebookworm'
description = 'Some dudes column'
language = 'en'
no_stylesheets = True
publisher = 'Tonythebookworm'
category = 'column'
use_embedded_content= False
no_stylesheets = True
oldest_article = 24
remove_javascript = True
remove_empty_feeds = True
max_articles_per_feed = 10
INDEX = 'http://www.aspentimes.com'
def parse_index(self):
feeds = []
for title, url in [
(u"Alison Berkley", u"http://www.aspentimes.com/SECTION/&Profile=1021&ParentProfile=1061"),
]:
articles = self.make_links(url)
if articles:
feeds.append((title, articles))
return feeds
def make_links(self, url):
title = 'Temp'
current_articles = []
soup = self.index_to_soup(url)
print 'The soup is: ', soup
for item in soup.findAll('div',attrs={'class':'title'}):
print 'item is: ', item
link = item.find('a')
print 'the link is: ', link
titlecheck = self.tag_to_string(link)
#once we get a link we need to check to see if it contains Alison Berkley and if it does use it
if link.find(text=re.compile('Alison Berkley')) :
print 'FOUND TITLE AND IT IS : ', titlecheck
url = self.INDEX + link['href']
title = self.tag_to_string(link)
print 'the title is: ', title
print 'the url is: ', url
print 'the title is: ', title
current_articles.append({'title': title, 'url': url, 'description':'', 'date':''}) # append all this
return current_articles
def print_version(self, url):
split1 = url.split("article")
print 'THE SPLIT IS: ', split1
#original is: http://www.aspentimes.com/article/20100909/COLUMN/100909869/1021&parentprofile=1061
#need this to be print_url:
#http://www.aspentimes.com/apps/pbcs.dll/article?AID=/20100909/COLUMN/100909869/1021&parentprofile=1061&template=printart
print_url = 'http://www.aspentimes.com/apps/pbcs.dll/article?AID=' + split1[1] + '&template=printart'
print 'THIS URL WILL PRINT: ', print_url # this is a test string to see what the url is it will return
return print_url