Register Guidelines E-Books Today's Posts Search

Go Back   MobileRead Forums > E-Book Software > Calibre > Recipes

Notices

Reply
 
Thread Tools Search this Thread
Old 09-01-2013, 01:08 PM   #1
scissors
Addict
scissors ought to be getting tired of karma fortunes by now.scissors ought to be getting tired of karma fortunes by now.scissors ought to be getting tired of karma fortunes by now.scissors ought to be getting tired of karma fortunes by now.scissors ought to be getting tired of karma fortunes by now.scissors ought to be getting tired of karma fortunes by now.scissors ought to be getting tired of karma fortunes by now.scissors ought to be getting tired of karma fortunes by now.scissors ought to be getting tired of karma fortunes by now.scissors ought to be getting tired of karma fortunes by now.scissors ought to be getting tired of karma fortunes by now.
 
Posts: 241
Karma: 1001369
Join Date: Sep 2010
Device: prs300, kindle keyboard 3g
Country file minor change

Just Removed "| Countryfile" from every heading

Spoiler:
Code:
from calibre import browser
from calibre.web.feeds.news import BasicNewsRecipe
import mechanize
from calibre.constants import config_dir, CONFIG_DIR_MODE
import os, os.path, urllib


class AdvancedUserRecipe1325006965(BasicNewsRecipe):
    title          = u'Countryfile.com'
    #cover_url = 'http://www.countryfile.com/sites/default/files/imagecache/160px_wide/cover/2_1.jpg'
    __author__ = 'Dave Asbury'
    description           = 'The official website of Countryfile Magazine'
    # last updated 19/10/12
    language = 'en_GB'
    oldest_article = 30
    max_articles_per_feed = 25
    remove_empty_feeds = True
    no_stylesheets = True
    auto_cleanup = True
    compress_news_images = True
    ignore_duplicate_articles = {'title', 'url'}
    #articles_are_obfuscated = True
    #article_already_exists = False
    #feed_hash = ''
    def get_cover_url(self):
            soup = self.index_to_soup('http://www.countryfile.com/magazine')
            cov = soup.find(attrs={'class' : re.compile('imagecache imagecache-250px')})#'width' : '160', 
            #print '&&&&&&&& ',cov,' ***'
            
            cov=str(cov)
            cov=cov[10:]
            cov=cov[:-135]
            #print '++++ ',cov,' ++++'
            br = mechanize.Browser()
            
            br.set_handle_redirect(False)
            try:
                br.open_novisit(cov)
                cover_url = cov
            except:
                  cover_url = 'http://www.countryfile.com/sites/default/files/imagecache/160px_wide/cover/2_1.jpg'
            return cover_url
    preprocess_regexps = [
        (re.compile(r' \| Countryfile.com', re.IGNORECASE | re.DOTALL), lambda match: '')]
    remove_tags    = [
                             # dict(attrs={'class' : ['player']}),

    ]
    feeds          = [
    (u'Homepage', u'http://www.countryfile.com/rss/home'),
    (u'Country News', u'http://www.countryfile.com/rss/news'),
    (u'Countryside', u'http://www.countryfile.com/rss/countryside'),
            ]

    
#    def parse_feeds(self):
#      feeds = BasicNewsRecipe.parse_feeds(self)
 #       print 'create empty file'
 #       print
       
        #open and close empty file - otherwise crashes as you can't append a file that doesn't exist?

   #     read_file=open(Feeds_File,'w+')
    #    read_file.close()

        # repeat for all feeds
     #   for feed in feeds:
      #      print 'Feed file = ',Feeds_File
            
            # for each section do
     #       print
      #      print 'Feed section is ',feed.title
            # for each artcile in each section check if it's in the feeds file
      #      for article in feed.articles[:]:
      #           article_already_exists = False
                 
     #            print
                #open the file and reads lines of text
      #           read_file=open(Feeds_File)
      #           while 1:
        #                  line=read_file.readline()
       #                   print
        #                  print'****'
        #                  print 'Value of line:',line
         #                 print 'article.title is:',article.title
        #                  if str(line) == str(article.title+'\n'):
          #                   article_already_exists = True
         #                    print 'repeated article'
        #                     break
          #                print'*****'
          #                print                         
           #               # eof reached   
             #             if not line: break
                          
            #     read_file.close()
                 # couldn't find article so write it to file
         #        if article_already_exists == False:
           #         read_file=open(Feeds_File,'a')
           #         read_file.write(article.title+'\n')
          #          read_file.close()
                    
         #        if article_already_exists == True:
           #         article.url ='' # delete the url so won't download
       # return feeds
scissors is offline   Reply With Quote
Reply


Forum Jump

Similar Threads
Thread Thread Starter Forum Replies Last Post
metro uk minor change scissors Recipes 0 08-18-2013 12:56 PM
Change country setting for Kindle bonjurkes Amazon Kindle 4 04-25-2012 08:03 AM
Opus Updating EPUB file won't change publisher data on file listing spaze Bookeen 1 03-08-2011 01:34 AM
What happens to my US books if I change my country? ensyed Amazon Kindle 6 09-04-2010 12:04 PM
How do I change my country in the Sony store? ficbot Sony Reader 1 08-26-2010 05:03 AM


All times are GMT -4. The time now is 08:32 PM.


MobileRead.com is a privately owned, operated and funded community.