Thank you!
That was exactly where I needed to start. I copied some things from the other espn script, and other things I don't know what they do enough to copy them over and understand what is going on. Here's my script for now, in case anyone else wants to use it.
Code:
#!/usr/bin/env python2
# vim:fileencoding=utf-8
from __future__ import unicode_literals, division, absolute_import, print_function
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1596778396(BasicNewsRecipe):
title = 'espn_modified'
description = 'Sports news'
__author__ = 'Rob Walker'
language = 'en'
no_stylesheets = True
use_embedded_content = False
remove_javascript = True
encoding = 'ISO-8859-1'
oldest_article = 7
max_articles_per_feed = 100
auto_cleanup = True
remove_tags_before = dict(name='font', attrs={'class': 'date'})
remove_tags = [
dict(name='font', attrs={'class': 'footer'}), dict(
name='hr', noshade='noshade'),
dict(name='img', src='/winnercomm/horseracing/DRF.jpg')
]
extra_css = '''
body{font-family:Verdana,Arial,Helvetica,sans-serif; font-size:x-small; font-weight:normal;}
.subhead{color:#666666;font-family:Verdana,sans-serif; font-size:x-small; font-weight:bold;}
.clearfix{font-family:Verdana,sans-serif; font-size:xx-small; }
.date{ font-family:Verdana,Arial,Helvetica,sans-serif ; font-size:xx-small;color:#7A7A7A;}
.byline{ font-family:Verdana,Arial,Helvetica,sans-serif ; font-size:xx-small;color:#666666;}
.headline{font-family:Verdana,Arial,Helvetica,sans-serif ; font-size:large; font-weight:bold;}
'''
feeds = [
('Top Headlines', 'https://www.espn.com/espn/rss/news'),
('NFL', 'https://www.espn.com/espn/rss/nfl/news'),
('NBA', 'https://www.espn.com/espn/rss/nba/news'),
('MLB', 'https://www.espn.com/espn/rss/mlb/news'),
('NHL', 'https://www.espn.com/espn/rss/nhl/news'),
('Golf', 'https://www.espn.com/espn/rss/golf/news'),
('RPM', 'https://www.espn.com/espn/rss/rpm/news'),
('Boxing', 'https://www.espn.com/espn/rss/boxing/news'),
('Soccer', 'https://www.espn.com/espn/rss/soccer/news'),
('NCB', 'https://www.espn.com/espn/rss/ncb/news'),
('NCF', 'https://www.espn.com/espn/rss/ncf/news'),
('NCAA', 'https://www.espn.com/espn/rss/ncaa/news'),
('Olympics', 'https://www.espn.com/espn/rss/oly/news'),
('Equestrian', 'https://www.espn.com/espn/rss/horse/news'),
]
def preprocess_html(self, soup):
for div in soup.findAll('div', style=True):
if 'px' in div['style']:
div['style'] = ''
return soup
def postprocess_html(self, soup, first_fetch):
for div in soup.findAll('div', style=True):
div['style'] = div['style'].replace('center', 'left')
return soup