View Single Post
Old 10-14-2010, 01:01 AM   #2
TonytheBookworm
Addict
TonytheBookworm is on a distinguished road
 
TonytheBookworm's Avatar
 
Posts: 264
Karma: 62
Join Date: May 2010
Device: kindle 2, kindle 3, Kindle fire
here:
Spoiler:

Code:
#!/usr/bin/env  python
__license__   = 'GPL v3'
__author__    = 'Tony Stegall' 
__copyright__ = '2010, Tony Stegall or Tonythebookworm on mobiread.com'
__version__   = '1'
__date__      = '13, October 2010'
__docformat__ = 'English'
'''
http://www.morphzone.org/modules/news/
'''


from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup
class Morphzone(BasicNewsRecipe):
    title      = 'MorphZone'
    __author__ = 'Tonythebookworm'
    description = 'Forum of the Morph'
    language = 'en'
    no_stylesheets = True
    publisher           = 'Tonythebookworm'
    category            = 'forum'
    use_embedded_content= False
    no_stylesheets      = True
    oldest_article      = 24
    remove_javascript   = True
    remove_empty_feeds  = True
    #masthead_url        = ''
    
    
    max_articles_per_feed = 10
    conversion_options = {'linearize_tables' : True}
    
    
    def parse_index(self):
        feeds = []
        for title, url in [
                            (u"Forum", u"http://www.morphzone.org/modules/lastposts/"),
                            
                            
                             ]:
            articles = self.make_links(url)
            if articles:
                feeds.append((title, articles))
        return feeds
        
    def make_links(self, url):
        title = 'Temp'
        current_articles = []
        soup = self.index_to_soup(url)
        for item in soup.findAll('table', attrs = {'class': 'bg2'}):
         for tr  in item.findAll('tr',    attrs = {'class': 'bg1'}):
          print 'tr is: ', tr
          for td in tr.findAll('td')[1]:
            print 'td is: ', td
            link = td.find('a')
            print 'the link is: ', td
            if td:
                url         = td['href']
                title       = self.tag_to_string(td)
                print 'the title is: ', title
                print 'the url is: ', url
                print 'the title is: ', title
                current_articles.append({'title': title, 'url': url, 'description':'', 'date':''}) # append all this
        return current_articles
      
      
    def print_version(self, url):
        
        #original is: http://www.morphzone.org/modules/newbb_plus/viewtopic.php?topic_id=7418&forum=11
        #need this to be print_url:
        #http://www.morphzone.org/modules/newbb_plus/print.php?forum=11&topic_id=7418         
         
        print_url = url.replace('viewtopic.php','print.php')
        print 'THIS URL WILL PRINT: ', print_url # this is a test string to see what the url is it will return
        return print_url

    def preprocess_html(self, soup):
        for item in soup.findAll(attrs={'style':True}):
            del item['style']
        return soup
TonytheBookworm is offline   Reply With Quote