View Single Post
Old 08-07-2020, 02:10 AM   #11
biffhero
Junior Member
biffhero began at the beginning.
 
Posts: 8
Karma: 10
Join Date: Aug 2020
Device: kobo libre h20
Thank you!

That was exactly where I needed to start. I copied some things from the other espn script, and other things I don't know what they do enough to copy them over and understand what is going on. Here's my script for now, in case anyone else wants to use it.

Code:
#!/usr/bin/env python2
# vim:fileencoding=utf-8
from __future__ import unicode_literals, division, absolute_import, print_function
from calibre.web.feeds.news import BasicNewsRecipe

class AdvancedUserRecipe1596778396(BasicNewsRecipe):
    title          = 'espn_modified'
    description = 'Sports news'
    __author__ = 'Rob Walker'
    language = 'en'
    no_stylesheets = True
    use_embedded_content = False
    remove_javascript = True
    encoding = 'ISO-8859-1'
    oldest_article = 7
    max_articles_per_feed = 100
    auto_cleanup   = True

    remove_tags_before = dict(name='font', attrs={'class': 'date'})
    remove_tags = [
        dict(name='font', attrs={'class': 'footer'}), dict(
            name='hr', noshade='noshade'),
        dict(name='img', src='/winnercomm/horseracing/DRF.jpg')
    ]

    extra_css = '''
                body{font-family:Verdana,Arial,Helvetica,sans-serif; font-size:x-small; font-weight:normal;}
                .subhead{color:#666666;font-family:Verdana,sans-serif; font-size:x-small; font-weight:bold;}
                .clearfix{font-family:Verdana,sans-serif; font-size:xx-small; }
                .date{ font-family:Verdana,Arial,Helvetica,sans-serif ; font-size:xx-small;color:#7A7A7A;}
                .byline{ font-family:Verdana,Arial,Helvetica,sans-serif ; font-size:xx-small;color:#666666;}
                .headline{font-family:Verdana,Arial,Helvetica,sans-serif ; font-size:large; font-weight:bold;}
                '''
    
    feeds          = [
        ('Top Headlines', 'https://www.espn.com/espn/rss/news'),
        ('NFL', 'https://www.espn.com/espn/rss/nfl/news'),
        ('NBA', 'https://www.espn.com/espn/rss/nba/news'),
        ('MLB', 'https://www.espn.com/espn/rss/mlb/news'),
        ('NHL', 'https://www.espn.com/espn/rss/nhl/news'),
        ('Golf', 'https://www.espn.com/espn/rss/golf/news'),
        ('RPM', 'https://www.espn.com/espn/rss/rpm/news'),
        ('Boxing', 'https://www.espn.com/espn/rss/boxing/news'),
        ('Soccer', 'https://www.espn.com/espn/rss/soccer/news'),
        ('NCB', 'https://www.espn.com/espn/rss/ncb/news'),
        ('NCF', 'https://www.espn.com/espn/rss/ncf/news'),
        ('NCAA', 'https://www.espn.com/espn/rss/ncaa/news'),
        ('Olympics', 'https://www.espn.com/espn/rss/oly/news'),
        ('Equestrian', 'https://www.espn.com/espn/rss/horse/news'),
    ]
    
    def preprocess_html(self, soup):
        for div in soup.findAll('div', style=True):
            if 'px' in div['style']:
                div['style'] = ''

        return soup

    def postprocess_html(self, soup, first_fetch):
        for div in soup.findAll('div', style=True):
            div['style'] = div['style'].replace('center', 'left')

        return soup

Last edited by kovidgoyal; 08-07-2020 at 02:49 AM.
biffhero is offline   Reply With Quote