10-14-2010, 12:01 AM
|
#2
|
Addict
Posts: 264
Karma: 62
Join Date: May 2010
Device: kindle 2, kindle 3, Kindle fire
|
here:
Spoiler:
Code:
#!/usr/bin/env python
__license__ = 'GPL v3'
__author__ = 'Tony Stegall'
__copyright__ = '2010, Tony Stegall or Tonythebookworm on mobiread.com'
__version__ = '1'
__date__ = '13, October 2010'
__docformat__ = 'English'
'''
http://www.morphzone.org/modules/news/
'''
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup
class Morphzone(BasicNewsRecipe):
title = 'MorphZone'
__author__ = 'Tonythebookworm'
description = 'Forum of the Morph'
language = 'en'
no_stylesheets = True
publisher = 'Tonythebookworm'
category = 'forum'
use_embedded_content= False
no_stylesheets = True
oldest_article = 24
remove_javascript = True
remove_empty_feeds = True
#masthead_url = ''
max_articles_per_feed = 10
conversion_options = {'linearize_tables' : True}
def parse_index(self):
feeds = []
for title, url in [
(u"Forum", u"http://www.morphzone.org/modules/lastposts/"),
]:
articles = self.make_links(url)
if articles:
feeds.append((title, articles))
return feeds
def make_links(self, url):
title = 'Temp'
current_articles = []
soup = self.index_to_soup(url)
for item in soup.findAll('table', attrs = {'class': 'bg2'}):
for tr in item.findAll('tr', attrs = {'class': 'bg1'}):
print 'tr is: ', tr
for td in tr.findAll('td')[1]:
print 'td is: ', td
link = td.find('a')
print 'the link is: ', td
if td:
url = td['href']
title = self.tag_to_string(td)
print 'the title is: ', title
print 'the url is: ', url
print 'the title is: ', title
current_articles.append({'title': title, 'url': url, 'description':'', 'date':''}) # append all this
return current_articles
def print_version(self, url):
#original is: http://www.morphzone.org/modules/newbb_plus/viewtopic.php?topic_id=7418&forum=11
#need this to be print_url:
#http://www.morphzone.org/modules/newbb_plus/print.php?forum=11&topic_id=7418
print_url = url.replace('viewtopic.php','print.php')
print 'THIS URL WILL PRINT: ', print_url # this is a test string to see what the url is it will return
return print_url
def preprocess_html(self, soup):
for item in soup.findAll(attrs={'style':True}):
del item['style']
return soup
|
|
|