View Single Post
Old 10-15-2010, 02:35 PM   #2
Starson17
Wizard
Starson17 can program the VCR without an owner's manual.Starson17 can program the VCR without an owner's manual.Starson17 can program the VCR without an owner's manual.Starson17 can program the VCR without an owner's manual.Starson17 can program the VCR without an owner's manual.Starson17 can program the VCR without an owner's manual.Starson17 can program the VCR without an owner's manual.Starson17 can program the VCR without an owner's manual.Starson17 can program the VCR without an owner's manual.Starson17 can program the VCR without an owner's manual.Starson17 can program the VCR without an owner's manual.
 
Posts: 4,004
Karma: 177841
Join Date: Dec 2009
Device: WinMo: IPAQ; Android: HTC HD2, Archos 7o; Java:Gravity T
Morphzone.com - forum recipe - by TonyTheBookworm

Despite the title, this is really a recipe for morphzone.org .
Spoiler:
Code:
#!/usr/bin/env  python
__license__   = 'GPL v3'
__author__    = 'Tony Stegall' 
__copyright__ = '2010, Tony Stegall or Tonythebookworm on mobiread.com'
__version__   = '1'
__date__      = '13, October 2010'
__docformat__ = 'English'
'''
http://www.morphzone.org/modules/news/
'''


from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup
class Morphzone(BasicNewsRecipe):
    title      = 'MorphZone'
    __author__ = 'Tonythebookworm'
    description = 'Forum of the Morph'
    language = 'en'
    no_stylesheets = True
    publisher           = 'Tonythebookworm'
    category            = 'forum'
    use_embedded_content= False
    no_stylesheets      = True
    oldest_article      = 24
    remove_javascript   = True
    remove_empty_feeds  = True
    #masthead_url        = ''
    
    
    max_articles_per_feed = 10
    conversion_options = {'linearize_tables' : True}
    
    
    def parse_index(self):
        feeds = []
        for title, url in [
                            (u"Forum", u"http://www.morphzone.org/modules/lastposts/"),
                            
                            
                             ]:
            articles = self.make_links(url)
            if articles:
                feeds.append((title, articles))
        return feeds
        
    def make_links(self, url):
        title = 'Temp'
        current_articles = []
        soup = self.index_to_soup(url)
        for item in soup.findAll('table', attrs = {'class': 'bg2'}):
         for tr  in item.findAll('tr',    attrs = {'class': 'bg1'}):
          print 'tr is: ', tr
          for td in tr.findAll('td')[1]:
            print 'td is: ', td
            link = td.find('a')
            print 'the link is: ', td
            if td:
                url         = td['href']
                title       = self.tag_to_string(td)
                print 'the title is: ', title
                print 'the url is: ', url
                print 'the title is: ', title
                current_articles.append({'title': title, 'url': url, 'description':'', 'date':''}) # append all this
        return current_articles
      
      
    def print_version(self, url):
        
        #original is: http://www.morphzone.org/modules/newbb_plus/viewtopic.php?topic_id=7418&forum=11
        #need this to be print_url:
        #http://www.morphzone.org/modules/newbb_plus/print.php?forum=11&topic_id=7418         
         
        print_url = url.replace('viewtopic.php','print.php')
        print 'THIS URL WILL PRINT: ', print_url # this is a test string to see what the url is it will return
        return print_url

    def preprocess_html(self, soup):
        for item in soup.findAll(attrs={'style':True}):
            del item['style']
        return soup
Attached Files
File Type: zip morphzone.zip (2.2 KB, 342 views)

Last edited by Starson17; 10-15-2010 at 02:39 PM.
Starson17 is offline   Reply With Quote