remove articles of type VIDEO:
Spoiler:
Code:
from calibre.constants import config_dir, CONFIG_DIR_MODE
import os, os.path, urllib
from hashlib import md5
#declare global temp file
#Feeds_File = config_dir+'\\feeds.txt'
# needed for getting rid of repeat feeds
class AdvancedUserRecipe1339395836(BasicNewsRecipe):
title = u'BBC Nature'
cover_url = 'http://news.bbcimg.co.uk/img/3_0_0/cream/hi/nature/nature-blocks.gif'
__author__ = 'Dave Asbury'
description = 'Author D.Asbury. News From The BBC Nature Website'
# last updated 12/10/12
language = 'en_GB'
oldest_article = 32
max_articles_per_feed = 25
remove_empty_feeds = True
remove_javascript = True
no_stylesheets = True
auto_cleanup_keep = '//*[@id="videoemp"]' #will keep all divs with id="videoemp"
auto_cleanup = True
ignore_duplicate_articles = {'title'}
#global variables required for getting rid of duplicate articles
#article_already_exists = False
#feed_hash = ''
remove_tags = [
dict(attrs={'class' : ['player']}),
]
feeds = [
(u'BBC Nature', u'http://feeds.bbci.co.uk/nature/rss.xml'),
(u'BBC Nature Features', u'http://feeds.bbci.co.uk/nature/features/rss.xml'),
(u'BBC Nature - Whats New', u'http://www.bbc.co.uk/nature/wildlife/by/updated.rss'),
]
# starsons code
def parse_feeds (self):
feeds = BasicNewsRecipe.parse_feeds(self)
for feed in feeds:
for article in feed.articles[:]:
print 'article.title is: ', article.title
if 'VIDEO:' in article.title.upper():
feed.articles.remove(article)
return feeds
extra_css = '''
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:medium;}
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
'''