View Single Post
Old 10-12-2012, 03:05 AM   #1
scissors
Addict
scissors ought to be getting tired of karma fortunes by now.scissors ought to be getting tired of karma fortunes by now.scissors ought to be getting tired of karma fortunes by now.scissors ought to be getting tired of karma fortunes by now.scissors ought to be getting tired of karma fortunes by now.scissors ought to be getting tired of karma fortunes by now.scissors ought to be getting tired of karma fortunes by now.scissors ought to be getting tired of karma fortunes by now.scissors ought to be getting tired of karma fortunes by now.scissors ought to be getting tired of karma fortunes by now.scissors ought to be getting tired of karma fortunes by now.
 
Posts: 241
Karma: 1001369
Join Date: Sep 2010
Device: prs300, kindle keyboard 3g
bbc nature update 12/10/12

remove articles of type VIDEO:

Spoiler:
Code:
from calibre.constants import config_dir, CONFIG_DIR_MODE
import os, os.path, urllib
from hashlib import md5

#declare global temp file
#Feeds_File = config_dir+'\\feeds.txt'

# needed for getting rid of repeat feeds

class AdvancedUserRecipe1339395836(BasicNewsRecipe):
    title          = u'BBC Nature'
    cover_url = 'http://news.bbcimg.co.uk/img/3_0_0/cream/hi/nature/nature-blocks.gif'
    __author__ = 'Dave Asbury'
    description = 'Author D.Asbury. News From The BBC Nature Website'
    # last updated 12/10/12
    language = 'en_GB'
    oldest_article = 32
    max_articles_per_feed = 25
    remove_empty_feeds = True
    remove_javascript     = True
    no_stylesheets = True
    
    auto_cleanup_keep = '//*[@id="videoemp"]' #will keep all divs with id="videoemp"
    auto_cleanup = True
    ignore_duplicate_articles = {'title'}
    #global variables required for getting rid of duplicate articles
    #article_already_exists = False
    #feed_hash = ''
   
    remove_tags = [
	dict(attrs={'class' : ['player']}),
                           ]
    feeds = [
                          (u'BBC Nature', u'http://feeds.bbci.co.uk/nature/rss.xml'),
     	      (u'BBC Nature Features', u'http://feeds.bbci.co.uk/nature/features/rss.xml'),
	      (u'BBC Nature - Whats New', u'http://www.bbc.co.uk/nature/wildlife/by/updated.rss'),
   

		]
    # starsons code
    def parse_feeds (self): 
      feeds = BasicNewsRecipe.parse_feeds(self) 
      for feed in feeds:
        for article in feed.articles[:]:
          print 'article.title is: ', article.title
          if 'VIDEO:' in article.title.upper():
            feed.articles.remove(article)
          
      return feeds




extra_css = '''
                    h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:medium;}
                    h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
                    p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
                    body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
		'''
scissors is offline   Reply With Quote