FHM update 7/10/12

scissors · 10-07-2012, 12:09 PM

use sites rss feeds
added duplication removal

Spoiler:

Code:

from calibre.web.feeds.news import BasicNewsRecipe
from calibre.constants import config_dir, CONFIG_DIR_MODE

   #declare global temp file
Feeds_File = config_dir+'\\feeds.txt'

class AdvancedUserRecipe1325006965(BasicNewsRecipe):
    title          = u'FHM UK'
    description = 'Author D.Asbury. Using feed43 Good News for Men.'
    cover_url = 'http://www.greatmagazines.co.uk/covers/large/w197/current/fhm.jpg'
    #   cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/373529_38324934806_64930243_n.jpg'
    masthead_url = 'http://www.fhm.com/App_Resources/Images/Site/re-design/logo.gif'
    __author__ = 'Dave Asbury'
    # last updated 7/10/12
    language = 'en_GB'
    oldest_article = 31
    max_articles_per_feed = 15
    remove_empty_feeds = True
    no_stylesheets = True
    #auto_cleanup = True
   # articles_are_obfuscated = True

   #global variables required for getting rid of duplicate articles
    article_already_exists = False


# needed for getting rid of repeat feeds

    keep_only_tags = [
               dict(name='h1'),
               dict(name='img',attrs={'id' : 'ctl00_Body_imgMainImage'}),
               dict(name='div',attrs={'id' : ['profileLeft','articleLeft','profileRight','profileBody']}),
               dict(name='div',attrs={'class' : ['imagesCenterArticle','containerCenterArticle','articleBody',]}),

        ]

    remove_tags    = [
                              dict(attrs={'id' : ['ctl00_Body_divSlideShow' ]}),

    ]
    feeds          = [
                        # repeatable search = </div>{|}<a href="{%}" class="{*}">{%}</a>{|}<p>{*}</p>
    	(u'Homepage',u'http://rss.feedsportal.com/c/375/f/434908/index.rss'),
    	(u'Funny',u'http://rss.feedsportal.com/c/375/f/434910/index.rss'),
    	(u'Girls',u'http://rss.feedsportal.com/c/375/f/434913/index.rss'),
]

    
    print '@@@@@@@',Feeds_File
    def parse_feeds(self):
        feeds = BasicNewsRecipe.parse_feeds(self)
        print 'create empty file'
        print
       
        #open and close empty file - otherwise crashes as you can't append a file that doesn't exist?

        read_file=open(Feeds_File,'w+')
        read_file.close()

        # repeat for all feeds
        for feed in feeds:
            print 'Feed file = ',Feeds_File
            
            # for each section do
            print
            print 'Feed section is ',feed.title
            # for each artcile in each section check if it's in the feeds file
            for article in feed.articles[:]:
                 article_already_exists = False
                 
                 print
                #open the file and reads lines of text
                 read_file=open(Feeds_File)
                 while 1:
                          line=read_file.readline()
                          print
                          print'****'
                          print 'Value of line:',line
                          print 'article.title is:',article.title
                          if str(line) == str(article.title+'\n'):
                             article_already_exists = True
                             print 'repeated article'
                             break
                          print'*****'
                          print                         
                          # eof reached   
                          if not line: break
                          
                 read_file.close()
                 # couldn't find article so write it to file
                 if article_already_exists == False:
                    read_file=open(Feeds_File,'a')
                    read_file.write(article.title+'\n')
                    read_file.close()
                 if article_already_exists == True:
                    article.url ='' # delete the url so won't download
        return feeds


    extra_css = '''
                    h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
                    h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
                    p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
                    body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
		'''

Similar Threads
Thread	Thread Starter	Forum	Replies	Last Post
FHM update	scissors	Recipes	2	08-04-2012 08:00 AM
fhm update 17/3/12	scissors	Recipes	0	03-17-2012 01:46 PM
FHM uk - updated	scissors	Recipes	0	01-27-2012 11:46 AM
new recipe FHM UK	scissors	Recipes	0	12-31-2011 11:48 AM
won't update...after completing the update cycle edge returns to the first screen	WeAreBorrg	enTourage Archive	2	03-18-2011 06:39 PM