Indeed, it appears that the OC Register changed their URLs a while back.
I was able to fix it.
Here's the updated recipe in case anyone stumbles across this thread:
Code:
#!/usr/bin/env python
__license__ = 'GPL v3'
__author__ = 'Lorenzo Vigentini (updated by rrrrrrrrrrrryan at gmail.com)'
__copyright__ = '2009, Lorenzo Vigentini <l.vigentini at gmail.com>'
description = 'News from Orange county - v1.02 (10, August 2014)'
'''
http://www.ocregister.com/
'''
from calibre.web.feeds.news import BasicNewsRecipe
class ocRegister(BasicNewsRecipe):
author = 'Lorenzo Vigentini'
description = 'News from the Orange county'
cover_url = 'http://images.onset.freedom.com/ocregister/logo.gif'
title = u'Orange County Register'
publisher = 'Orange County Register Communication'
category = 'News, finance, economy, politics'
language = 'en'
timefmt = '[%a, %d %b, %Y]'
oldest_article = 1
max_articles_per_feed = 25
use_embedded_content = False
recursion = 10
# remove_javascript = True
no_stylesheets = True
needs_subscription = "optional"
use_javascript_to_login = True
def javascript_login(self, browser, username, password):
browser.visit('http://www.ocregister.com/sections/login')
form = browser.select_form(nr=1) # Select the second form on the page
form['username'] = username
form['password_temp'] = password
browser.submit(timeout=120) # Submit the form and wait at most two minutes for loading to complete
def print_version(self,url):
printUrl = 'http://www.ocregister.com/common/printer/view.php?db=ocregister&id='
segments = url.split('/')
subSegments = (segments[4]).split('.')
subSubSegments = (subSegments[0]).split('-')
myArticle = (subSubSegments[1])
myURL= printUrl + myArticle
return myURL
keep_only_tags = [
dict(name='div', attrs={'id':'ArticleContentWrap'})
]
remove_tags = [
dict(name='div', attrs={'class':'hideForPrint'}),
dict(name='div', attrs={'id':'ContentFooter'})
]
feeds = [
(u'News', u'http://www.ocregister.com/common/rss/rss.php?catID=18800'),
(u'Top Stories', u'http://www.ocregister.com/common/rss/rss.php?catID=23541'),
(u'Business', u'http://www.ocregister.com/common/rss/rss.php?catID=18909'),
(u'Cars', u'http://www.ocregister.com/common/rss/rss.php?catID=20128'),
(u'Entertainment', u'http://www.ocregister.com/common/rss/rss.php?catID=18926'),
(u'Home', u'http://www.ocregister.com/common/rss/rss.php?catID=19142'),
(u'Life', u'http://www.ocregister.com/common/rss/rss.php?catID=18936'),
(u'Opinion', u'http://www.ocregister.com/common/rss/rss.php?catID=18963'),
(u'Sports', u'http://www.ocregister.com/common/rss/rss.php?catID=18901'),
(u'Travel', u'http://www.ocregister.com/common/rss/rss.php?catID=18959')
]
extra_css = '''
h1 {color:#ff6600;font-family:Arial,Helvetica,sans-serif; font-size:20px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:20px;}
h2 {color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:16px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:16px; }
h3 {color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:15px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:15px;}
h4 {color:#333333; font-family:Arial,Helvetica,sans-serif;font-size:13px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:13px; }
h5 {color:#333333; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:11px; text-transform:uppercase;}
#articledate {color:#333333;font-family:Arial,Helvetica,sans-serif;font-size:10px; font-size-adjust:none; font-stretch:normal; font-style:italic; font-variant:normal; font-weight:bold; line-height:10px; text-decoration:none;}
#articlebyline {color:#4D4D4D;font-family:Arial,Helvetica,sans-serif;font-size:10px; font-size-adjust:none; font-stretch:normal; font-style:bold; font-variant:normal; font-weight:bold; line-height:10px; text-decoration:none;}
img {align:left;}
#topstoryhead {color:#ff6600;font-family:Arial,Helvetica,sans-serif; font-size:22px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:20px;}
'''