Register Guidelines E-Books Today's Posts Search

Go Back   MobileRead Forums > E-Book Software > Calibre > Recipes

Notices

Reply
 
Thread Tools Search this Thread
Old 10-07-2012, 12:09 PM   #1
scissors
Addict
scissors ought to be getting tired of karma fortunes by now.scissors ought to be getting tired of karma fortunes by now.scissors ought to be getting tired of karma fortunes by now.scissors ought to be getting tired of karma fortunes by now.scissors ought to be getting tired of karma fortunes by now.scissors ought to be getting tired of karma fortunes by now.scissors ought to be getting tired of karma fortunes by now.scissors ought to be getting tired of karma fortunes by now.scissors ought to be getting tired of karma fortunes by now.scissors ought to be getting tired of karma fortunes by now.scissors ought to be getting tired of karma fortunes by now.
 
Posts: 241
Karma: 1001369
Join Date: Sep 2010
Device: prs300, kindle keyboard 3g
FHM update 7/10/12

use sites rss feeds
added duplication removal

Spoiler:
Code:
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.constants import config_dir, CONFIG_DIR_MODE

   #declare global temp file
Feeds_File = config_dir+'\\feeds.txt'

class AdvancedUserRecipe1325006965(BasicNewsRecipe):
    title          = u'FHM UK'
    description = 'Author D.Asbury. Using feed43 Good News for Men.'
    cover_url = 'http://www.greatmagazines.co.uk/covers/large/w197/current/fhm.jpg'
    #   cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/373529_38324934806_64930243_n.jpg'
    masthead_url = 'http://www.fhm.com/App_Resources/Images/Site/re-design/logo.gif'
    __author__ = 'Dave Asbury'
    # last updated 7/10/12
    language = 'en_GB'
    oldest_article = 31
    max_articles_per_feed = 15
    remove_empty_feeds = True
    no_stylesheets = True
    #auto_cleanup = True
   # articles_are_obfuscated = True

   #global variables required for getting rid of duplicate articles
    article_already_exists = False


# needed for getting rid of repeat feeds

    keep_only_tags = [
               dict(name='h1'),
               dict(name='img',attrs={'id' : 'ctl00_Body_imgMainImage'}),
               dict(name='div',attrs={'id' : ['profileLeft','articleLeft','profileRight','profileBody']}),
               dict(name='div',attrs={'class' : ['imagesCenterArticle','containerCenterArticle','articleBody',]}),

        ]

    remove_tags    = [
                              dict(attrs={'id' : ['ctl00_Body_divSlideShow' ]}),

    ]
    feeds          = [
                        # repeatable search = </div>{|}<a href="{%}" class="{*}">{%}</a>{|}<p>{*}</p>
    	(u'Homepage',u'http://rss.feedsportal.com/c/375/f/434908/index.rss'),
    	(u'Funny',u'http://rss.feedsportal.com/c/375/f/434910/index.rss'),
    	(u'Girls',u'http://rss.feedsportal.com/c/375/f/434913/index.rss'),
]

    
    print '@@@@@@@',Feeds_File
    def parse_feeds(self):
        feeds = BasicNewsRecipe.parse_feeds(self)
        print 'create empty file'
        print
       
        #open and close empty file - otherwise crashes as you can't append a file that doesn't exist?

        read_file=open(Feeds_File,'w+')
        read_file.close()

        # repeat for all feeds
        for feed in feeds:
            print 'Feed file = ',Feeds_File
            
            # for each section do
            print
            print 'Feed section is ',feed.title
            # for each artcile in each section check if it's in the feeds file
            for article in feed.articles[:]:
                 article_already_exists = False
                 
                 print
                #open the file and reads lines of text
                 read_file=open(Feeds_File)
                 while 1:
                          line=read_file.readline()
                          print
                          print'****'
                          print 'Value of line:',line
                          print 'article.title is:',article.title
                          if str(line) == str(article.title+'\n'):
                             article_already_exists = True
                             print 'repeated article'
                             break
                          print'*****'
                          print                         
                          # eof reached   
                          if not line: break
                          
                 read_file.close()
                 # couldn't find article so write it to file
                 if article_already_exists == False:
                    read_file=open(Feeds_File,'a')
                    read_file.write(article.title+'\n')
                    read_file.close()
                 if article_already_exists == True:
                    article.url ='' # delete the url so won't download
        return feeds


    extra_css = '''
                    h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
                    h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
                    p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
                    body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
		'''
scissors is offline   Reply With Quote
Reply


Forum Jump

Similar Threads
Thread Thread Starter Forum Replies Last Post
FHM update scissors Recipes 2 08-04-2012 08:00 AM
fhm update 17/3/12 scissors Recipes 0 03-17-2012 01:46 PM
FHM uk - updated scissors Recipes 0 01-27-2012 11:46 AM
new recipe FHM UK scissors Recipes 0 12-31-2011 11:48 AM
won't update...after completing the update cycle edge returns to the first screen WeAreBorrg enTourage Archive 2 03-18-2011 06:39 PM


All times are GMT -4. The time now is 02:47 PM.


MobileRead.com is a privately owned, operated and funded community.