Register Guidelines E-Books Today's Posts Search

Go Back   MobileRead Forums > E-Book Software > Calibre > Recipes

Notices

Reply
 
Thread Tools Search this Thread
Old 10-25-2014, 05:16 AM   #1
scissors
Addict
scissors ought to be getting tired of karma fortunes by now.scissors ought to be getting tired of karma fortunes by now.scissors ought to be getting tired of karma fortunes by now.scissors ought to be getting tired of karma fortunes by now.scissors ought to be getting tired of karma fortunes by now.scissors ought to be getting tired of karma fortunes by now.scissors ought to be getting tired of karma fortunes by now.scissors ought to be getting tired of karma fortunes by now.scissors ought to be getting tired of karma fortunes by now.scissors ought to be getting tired of karma fortunes by now.scissors ought to be getting tired of karma fortunes by now.
 
Posts: 241
Karma: 1001369
Join Date: Sep 2010
Device: prs300, kindle keyboard 3g
countryfile.com recipe change

Due to website changes causing nil articles, now using feed43

Spoiler:

Code:
from calibre import browser
from calibre.web.feeds.news import BasicNewsRecipe
import mechanize
from calibre.constants import config_dir, CONFIG_DIR_MODE
import os, os.path, urllib


class AdvancedUserRecipe1325006965(BasicNewsRecipe):
    title          = u'Countryfile.com'
    #cover_url = 'http://www.countryfile.com/sites/default/files/imagecache/160px_wide/cover/2_1.jpg'
    __author__ = 'Dave Asbury'
    description           = 'The official website of Countryfile Magazine'
    # last updated 24.10.14
    language = 'en_GB'
    oldest_article = 30
    max_articles_per_feed = 25
    remove_empty_feeds = True
    no_stylesheets = True
    auto_cleanup = True
    compress_news_images = True
    ignore_duplicate_articles = {'title', 'url'}
    #articles_are_obfuscated = True
    #article_already_exists = False
    #feed_hash = ''
    def get_cover_url(self):
            soup = self.index_to_soup('http://www.countryfile.com/magazine')
            cov = soup.find(attrs={'class' : re.compile('imagecache imagecache-250px')})#'width' : '160', 
            #print '&&&&&&&& ',cov,' ***'
            
            cov=str(cov)
            cov=cov[10:]
            cov=cov[:-135]
            #print '++++ ',cov,' ++++'
            br = mechanize.Browser()
            
            br.set_handle_redirect(False)
            try:
                br.open_novisit(cov)
                cover_url = cov
            except:
                  cover_url = 'http://www.countryfile.com/sites/default/files/imagecache/160px_wide/cover/2_1.jpg'
            return cover_url
    preprocess_regexps = [
        (re.compile(r' \| Countryfile.com', re.IGNORECASE | re.DOTALL), lambda match: '')]
    remove_tags    = [
                             # dict(attrs={'class' : ['player']}),

    ]
    feeds          = [    
    (u'Country News', u'http://www.feed43.com/7204505705648666.xml'),
    (u'Articles', u'http://www.feed43.com/8542080013204443.xml'),
            ]

    
#    def parse_feeds(self):
#      feeds = BasicNewsRecipe.parse_feeds(self)
 #       print 'create empty file'
 #       print
       
        #open and close empty file - otherwise crashes as you can't append a file that doesn't exist?

   #     read_file=open(Feeds_File,'w+')
    #    read_file.close()

        # repeat for all feeds
     #   for feed in feeds:
      #      print 'Feed file = ',Feeds_File
            
            # for each section do
     #       print
      #      print 'Feed section is ',feed.title
            # for each artcile in each section check if it's in the feeds file
      #      for article in feed.articles[:]:
      #           article_already_exists = False
                 
     #            print
                #open the file and reads lines of text
      #           read_file=open(Feeds_File)
      #           while 1:
        #                  line=read_file.readline()
       #                   print
        #                  print'****'
        #                  print 'Value of line:',line
         #                 print 'article.title is:',article.title
        #                  if str(line) == str(article.title+'\n'):
          #                   article_already_exists = True
         #                    print 'repeated article'
        #                     break
          #                print'*****'
          #                print                         
           #               # eof reached   
             #             if not line: break
                          
            #     read_file.close()
                 # couldn't find article so write it to file
         #        if article_already_exists == False:
           #         read_file=open(Feeds_File,'a')
           #         read_file.write(article.title+'\n')
          #          read_file.close()
                    
         #        if article_already_exists == True:
           #         article.url ='' # delete the url so won't download
       # return feeds
scissors is offline   Reply With Quote
Reply


Forum Jump

Similar Threads
Thread Thread Starter Forum Replies Last Post
countryfile.com update scissors Recipes 0 12-16-2012 06:13 AM
countryfile 19/10/12 scissors Recipes 0 10-19-2012 09:49 AM
countryfile.com scissors Recipes 0 09-09-2012 03:07 AM
countryfile.com update scissors Recipes 0 04-15-2012 10:53 AM
Is there a way to change author metadata in recipe? dochase Calibre 1 01-06-2011 07:18 PM


All times are GMT -4. The time now is 09:06 PM.


MobileRead.com is a privately owned, operated and funded community.