Enthusiast
Posts: 33
Karma: 10
Join Date: Apr 2012
Device: Amazon Kindle Paperwhite
|
Kovid,
Thanks for the suggestion to update recipe with the correct links. I don't know how to code in python but this is something I can do to help.
I checked every link in the recipe (there are a lot of them). I removed comics that are no longer on the site. In some cases the link in the recipe worked but redirected to an updated link. In this case I replaced the existing link with the redirect.
I did not go through the site to find new comics and add them to the list. I'll be happy to do this if someone fixes the recipe.
List of the changes I made:
Code:
Links Deleted:
# (u"Brainwaves", u"http://www.gocomics.com/brainwaves"),
# (u"Brenda Starr", u"http://www.gocomics.com/brendastarr"),
# (u"Deep Cover", u"http://www.gocomics.com/deepcover"),
# (u"Doodles", u"http://www.gocomics.com/doodles"),
# (u"Ferd'nand", u"http://www.gocomics.com/ferdnand"),
# (u"Flight Deck", u"http://www.gocomics.com/flightdeck"),
# (u"Girls & Sports", u"http://www.gocomics.com/girlsandsports"),
# (u"Housebroken", u"http://www.gocomics.com/housebroken"),
# (u"Hubert and Abby", u"http://www.gocomics.com/hubertandabby"),
# (u"Little Otto", u"http://www.gocomics.com/littleotto"),
# (u"Love Is...", u"http://www.gocomics.com/loveis"),
# (u"Middle-Aged White Guy", u"http://www.gocomics.com/middleagedwhiteguy"),
# (u"Natural Selection", u"http://www.gocomics.com/naturalselection"),
# (u"One Big Happy Classics", u"http://www.gocomics.com/onebighappyclassics"),
# (u"Red Meat", u"http://www.gocomics.com/redmeat"),
# (u"Reynolds Unwrapped", u"http://www.gocomics.com/reynoldsunwrapped"),
# (u"Ronaldinho Gaucho", u"http://www.gocomics.com/ronaldinhogaucho"),
# (u"Slowpoke", u"http://www.gocomics.com/slowpoke"),
# (u"State of the Union", u"http://www.gocomics.com/stateoftheunion"),
# (u"The Boiling Point", u"http://www.gocomics.com/theboilingpoint"),
# (u"The Quigmans", u"http://www.gocomics.com/thequigmans"),
# (u"Winnie the Pooh", u"http://www.gocomics.com/winniethepooh"),
#
# EDITORIAL CARTOONS #####################
# (u"Adam Zyglis", u"http://www.gocomics.com/adamzyglis"),
# (u"Andy Singer", u"http://www.gocomics.com/andysinger"),
# (u"Ben Sargent",u"http://www.gocomics.com/bensargent"),
# (u"Bill Day", u"http://www.gocomics.com/billday"),
# (u"Bill Schorr", u"http://www.gocomics.com/billschorr"),
# (u"Bob Englehart", u"http://www.gocomics.com/bobenglehart"),
# (u"Brian Fairrington", u"http://www.gocomics.com/brianfairrington"),
# (u"Bruce Beattie", u"http://www.gocomics.com/brucebeattie"),
# (u"Cam Cardow", u"http://www.gocomics.com/camcardow"),
# (u"Chuck Asay",u"http://www.gocomics.com/chuckasay"),
# (u"Daryl Cagle", u"http://www.gocomics.com/darylcagle"),
# (u"David Fitzsimmons", u"http://www.gocomics.com/davidfitzsimmons"),
# (u"Dick Locher",u"http://www.gocomics.com/dicklocher"),
# (u"Don Wright",u"http://www.gocomics.com/donwright"),
# (u"Donna Barstow",u"http://www.gocomics.com/donnabarstow"),
# (u"Ed Stein", u"http://www.gocomics.com/edstein"),
# (u"Eric Allie", u"http://www.gocomics.com/ericallie"),
# (u"Gary McCoy", u"http://www.gocomics.com/garymccoy"),
# (u"JD Crowe", u"http://www.gocomics.com/jdcrowe"),
# (u"Jeff Parker", u"http://www.gocomics.com/jeffparker"),
# (u"John Cole", u"http://www.gocomics.com/johncole"),
# (u"John Darkow", u"http://www.gocomics.com/johndarkow"),
# (u"John Sherffius", u"http://www.gocomics.com/johnsherffius"),
# (u"Larry Wright", u"http://www.gocomics.com/larrywright"),
# (u"Mike Keefe", u"http://www.gocomics.com/mikekeefe"),
# (u"MIke Thompson", u"http://www.gocomics.com/mikethompson"),
# (u"Monte Wolverton", u"http://www.gocomics.com/montewolverton"),
# (u"Mr. Fish", u"http://www.gocomics.com/mrfish"),
# (u"Nate Beeler", u"http://www.gocomics.com/natebeeler"),
# (u"Pat Bagley", u"http://www.gocomics.com/patbagley"),
# (u"Paul Conrad",u"http://www.gocomics.com/paulconrad"),
# (u"RJ Matson", u"http://www.gocomics.com/rjmatson"),
# (u"Robert Ariail", u"http://www.gocomics.com/robertariail"),
# (u"Small World",u"http://www.gocomics.com/smallworld"),
# (u"Steve Sack", u"http://www.gocomics.com/stevesack"),
# (u"Wayne Stayskal",u"http://www.gocomics.com/waynestayskal"),
Links Updated:
Brevity
Cheap Thrills
Cow and Boy
Fat Cats Classics
Frank and Ernest
Geech Classics
Grand Avenue
Kit and Carlyle
Little Abner Classics
Meg Classics
Moderately Confused
Motley Classics
Raising Duncan Classics
Shirley and Son Classics
Soup to Nuts
Spot the Frog
Tarzan Classics
That's Life
The Born Loser
The Humble Stumble
The Sunshine Club
Tiny Sepuku
Unstrange Phenomena
Working Daze
Yenny
Links Added:
Tim Eagan
This is the full recipe updated with the changes:
Code:
__license__ = 'GPL v3'
__copyright__ = 'Copyright 2010 Starson17'
'''
www.gocomics.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class GoComics(BasicNewsRecipe):
title = 'Go Comics'
__author__ = 'Starson17'
__version__ = '1.06'
__date__ = '07 June 2011'
description = u'200+ Comics - Customize for more days/comics: Defaults to 1 day, 25 comics - 20 general, 5 editorial.'
category = 'news, comics'
language = 'en'
no_stylesheets = True
remove_javascript = True
remove_attributes = ['style']
# USER PREFERENCES - COMICS AND NUMBER OF COMICS TO RETRIEVE ########
# num_comics_to_get - I've tried up to 99 on Calvin&Hobbes
num_comics_to_get = 1
# CHOOSE COMIC STRIPS BELOW - REMOVE COMMENT '# ' FROM IN FRONT OF DESIRED STRIPS
# Please do not overload their servers by selecting all comics and 1000
# strips from each!
keep_only_tags = [
dict(name='h1'),
dict(name='div', id=lambda x: x and x.startswith('mutable_')),
]
def get_browser(self):
br = BasicNewsRecipe.get_browser(self)
br.addheaders = [('Referer', 'http://www.gocomics.com/')]
return br
def parse_index(self):
feeds = []
for i, (title, url) in enumerate([ # {{{
# (u"2 Cows and a Chicken", u"http://www.gocomics.com/2cowsandachicken"),
# (u"9 Chickweed Lane", u"http://www.gocomics.com/9chickweedlane"),
# (u"Adam At Home", u"http://www.gocomics.com/adamathome"),
# (u"Agnes", u"http://www.gocomics.com/agnes"),
# (u"Alley Oop", u"http://www.gocomics.com/alleyoop"),
# (u"Andy Capp", u"http://www.gocomics.com/andycapp"),
(u"Animal Crackers", u"http://www.gocomics.com/animalcrackers"),
# (u"Annie", u"http://www.gocomics.com/annie"),
# (u"Arlo & Janis", u"http://www.gocomics.com/arloandjanis"),
# (u"Ask Shagg", u"http://www.gocomics.com/askshagg"),
(u"B.C.", u"http://www.gocomics.com/bc"),
# (u"Back in the Day", u"http://www.gocomics.com/backintheday"),
# (u"Bad Reporter", u"http://www.gocomics.com/badreporter"),
(u"Baldo", u"http://www.gocomics.com/baldo"),
# (u"Ballard Street", u"http://www.gocomics.com/ballardstreet"),
# (u"Barkeater Lake", u"http://www.gocomics.com/barkeaterlake"),
# (u"Basic Instructions", u"http://www.gocomics.com/basicinstructions"),
# (u"Ben", u"http://www.gocomics.com/ben"),
# (u"Betty", u"http://www.gocomics.com/betty"),
# (u"Bewley", u"http://www.gocomics.com/bewley"),
# (u"Big Nate", u"http://www.gocomics.com/bignate"),
# (u"Big Top", u"http://www.gocomics.com/bigtop"),
# (u"Biographic", u"http://www.gocomics.com/biographic"),
# (u"Birdbrains", u"http://www.gocomics.com/birdbrains"),
# (u"Bleeker: The Rechargeable Dog", u"http://www.gocomics.com/bleeker"),
# (u"Bliss", u"http://www.gocomics.com/bliss"),
# (u"Bloom County", u"http://www.gocomics.com/bloomcounty"),
# (u"Bo Nanas", u"http://www.gocomics.com/bonanas"),
# (u"Bob the Squirrel", u"http://www.gocomics.com/bobthesquirrel"),
# (u"Boomerangs", u"http://www.gocomics.com/boomerangs"),
# (u"Bottomliners", u"http://www.gocomics.com/bottomliners"),
(u"Bound and Gagged", u"http://www.gocomics.com/boundandgagged"),
# (u"Brevity", u"http://www.gocomics.com/brevitypanel"),
# (u"Brewster Rockit", u"http://www.gocomics.com/brewsterrockit"),
(u"Broom Hilda", u"http://www.gocomics.com/broomhilda"),
(u"Calvin and Hobbes", u"http://www.gocomics.com/calvinandhobbes"),
# (u"Candorville", u"http://www.gocomics.com/candorville"),
# (u"Cathy", u"http://www.gocomics.com/cathy"),
# (u"C'est la Vie", u"http://www.gocomics.com/cestlavie"),
# (u"Cheap Thrills", u"http://www.gocomics.com/cheap-thrills-cuisine"),
# (u"Chuckle Bros", u"http://www.gocomics.com/chucklebros"),
# (u"Citizen Dog", u"http://www.gocomics.com/citizendog"),
# (u"Cleats", u"http://www.gocomics.com/cleats"),
# (u"Close to Home", u"http://www.gocomics.com/closetohome"),
# (u"Committed", u"http://www.gocomics.com/committed"),
# (u"Compu-toon", u"http://www.gocomics.com/compu-toon"),
# (u"Cornered", u"http://www.gocomics.com/cornered"),
# (u"Cow & Boy", u"http://www.gocomics.com/cowandboy"),
# (u"Cul de Sac", u"http://www.gocomics.com/culdesac"),
# (u"Daddy's Home", u"http://www.gocomics.com/daddyshome"),
# (u"Dick Tracy", u"http://www.gocomics.com/dicktracy"),
# (u"Dog Eat Doug", u"http://www.gocomics.com/dogeatdoug"),
# (u"Domestic Abuse", u"http://www.gocomics.com/domesticabuse"),
# (u"Doonesbury", u"http://www.gocomics.com/doonesbury"),
# (u"Drabble", u"http://www.gocomics.com/drabble"),
# (u"Eek!", u"http://www.gocomics.com/eek"),
# (u"F Minus", u"http://www.gocomics.com/fminus"),
# (u"Family Tree", u"http://www.gocomics.com/familytree"),
# (u"Farcus", u"http://www.gocomics.com/farcus"),
# (u"Fat Cats", u"http://www.gocomics.com/fat-cats"),
# (u"Flo and Friends", u"http://www.gocomics.com/floandfriends"),
(u"For Better or For Worse", u"http://www.gocomics.com/forbetterorforworse"),
# (u"For Heaven's Sake", u"http://www.gocomics.com/forheavenssake"),
# (u"Fort Knox", u"http://www.gocomics.com/fortknox"),
# (u"FoxTrot Classics", u"http://www.gocomics.com/foxtrotclassics"),
# (u"FoxTrot", u"http://www.gocomics.com/foxtrot"),
(u"Frank & Ernest", u"http://www.gocomics.com/frank-and-ernest"),
# (u"Frazz", u"http://www.gocomics.com/frazz"),
# (u"Fred Basset", u"http://www.gocomics.com/fredbasset"),
# (u"Free Range", u"http://www.gocomics.com/freerange"),
# (u"Frog Applause", u"http://www.gocomics.com/frogapplause"),
# (u"Garfield Minus Garfield", u"http://www.gocomics.com/garfieldminusgarfield"),
(u"Garfield", u"http://www.gocomics.com/garfield"),
# (u"Gasoline Alley", u"http://www.gocomics.com/gasolinealley"),
# (u"Geech", u"http://www.gocomics.com/geech"),
(u"Get Fuzzy", u"http://www.gocomics.com/getfuzzy"),
# (u"Gil Thorp", u"http://www.gocomics.com/gilthorp"),
# (u"Ginger Meggs", u"http://www.gocomics.com/gingermeggs"),
# (u"Graffiti", u"http://www.gocomics.com/graffiti"),
# (u"Grand Avenue", u"http://www.gocomics.com/grand-avenue"),
# (u"Haiku Ewe", u"http://www.gocomics.com/haikuewe"),
# (u"Heart of the City", u"http://www.gocomics.com/heartofthecity"),
# (u"Herb and Jamaal", u"http://www.gocomics.com/herbandjamaal"),
# (u"Home and Away", u"http://www.gocomics.com/homeandaway"),
# (u"Imagine This", u"http://www.gocomics.com/imaginethis"),
# (u"In the Bleachers", u"http://www.gocomics.com/inthebleachers"),
# (u"In the Sticks", u"http://www.gocomics.com/inthesticks"),
# (u"Ink Pen", u"http://www.gocomics.com/inkpen"),
# (u"It's All About You", u"http://www.gocomics.com/itsallaboutyou"),
# (u"Jane's World", u"http://www.gocomics.com/janesworld"),
# (u"Joe Vanilla", u"http://www.gocomics.com/joevanilla"),
# (u"Jump Start", u"http://www.gocomics.com/jumpstart"),
# (u"Kit 'N' Carlyle", u"http://www.gocomics.com/kitncarlyle"),
# (u"La Cucaracha", u"http://www.gocomics.com/lacucaracha"),
# (u"Last Kiss", u"http://www.gocomics.com/lastkiss"),
# (u"Legend of Bill", u"http://www.gocomics.com/legendofbill"),
# (u"Liberty Meadows", u"http://www.gocomics.com/libertymeadows"),
# (u"Li'l Abner", u"http://www.gocomics.com/lil-abner"),
# (u"Lio", u"http://www.gocomics.com/lio"),
# (u"Little Dog Lost", u"http://www.gocomics.com/littledoglost"),
# (u"Lola", u"http://www.gocomics.com/lola"),
(u"Luann", u"http://www.gocomics.com/luann"),
# (u"Maintaining", u"http://www.gocomics.com/maintaining"),
# (u"Meg! Classics", u"http://www.gocomics.com/meg-classics"),
# (u"Minimum Security", u"http://www.gocomics.com/minimumsecurity"),
# (u"Moderately Confused", u"http://www.gocomics.com/moderately-confused"),
(u"Momma", u"http://www.gocomics.com/momma"),
# (u"Monty", u"http://www.gocomics.com/monty"),
# (u"Motley Classics", u"http://www.gocomics.com/motley-classics"),
# (u"Mutt & Jeff", u"http://www.gocomics.com/muttandjeff"),
# (u"Mythtickle", u"http://www.gocomics.com/mythtickle"),
# (u"Nancy", u"http://www.gocomics.com/nancy"),
# (u"Nest Heads", u"http://www.gocomics.com/nestheads"),
# (u"NEUROTICA", u"http://www.gocomics.com/neurotica"),
# (u"New Adventures of Queen Victoria", u"http://www.gocomics.com/thenewadventuresofqueenvictoria"),
(u"Non Sequitur", u"http://www.gocomics.com/nonsequitur"),
# (u"Off The Mark", u"http://www.gocomics.com/offthemark"),
# (u"On A Claire Day", u"http://www.gocomics.com/onaclaireday"),
# (u"One Big Happy", u"http://www.gocomics.com/onebighappy"),
# (u"Out of the Gene Pool Re-Runs", u"http://www.gocomics.com/outofthegenepool"),
# (u"Over the Hedge", u"http://www.gocomics.com/overthehedge"),
# (u"Overboard", u"http://www.gocomics.com/overboard"),
# (u"PC and Pixel", u"http://www.gocomics.com/pcandpixel"),
(u"Peanuts", u"http://www.gocomics.com/peanuts"),
(u"Pearls Before Swine",
u"http://www.gocomics.com/pearlsbeforeswine"),
# (u"Pibgorn Sketches", u"http://www.gocomics.com/pibgornsketches"),
# (u"Pibgorn", u"http://www.gocomics.com/pibgorn"),
# (u"Pickles", u"http://www.gocomics.com/pickles"),
# (u"Pinkerton", u"http://www.gocomics.com/pinkerton"),
# (u"Pluggers", u"http://www.gocomics.com/pluggers"),
(u"Pooch Cafe", u"http://www.gocomics.com/poochcafe"),
# (u"PreTeena", u"http://www.gocomics.com/preteena"),
# (u"Prickly City", u"http://www.gocomics.com/pricklycity"),
# (u"Rabbits Against Magic", u"http://www.gocomics.com/rabbitsagainstmagic"),
# (u"Raising Duncan", u"http://www.gocomics.com/raising-duncan"),
# (u"Real Life Adventures", u"http://www.gocomics.com/reallifeadventures"),
# (u"Reality Check", u"http://www.gocomics.com/realitycheck"),
# (u"Red and Rover", u"http://www.gocomics.com/redandrover"),
# (u"Rip Haywire", u"http://www.gocomics.com/riphaywire"),
(u"Rose Is Rose", u"http://www.gocomics.com/roseisrose"),
# (u"Rudy Park", u"http://www.gocomics.com/rudypark"),
# (u"Scary Gary", u"http://www.gocomics.com/scarygary"),
# (u"Shirley and Son Classics", u"http://www.gocomics.com/shirley-and-son-classics"),
(u"Shoe", u"http://www.gocomics.com/shoe"),
# (u"Shoecabbage", u"http://www.gocomics.com/shoecabbage"),
# (u"Skin Horse", u"http://www.gocomics.com/skinhorse"),
# (u"Soup To Nutz", u"http://www.gocomics.com/soup-to-nutz"),
# (u"Spot The Frog", u"http://www.gocomics.com/spot-the-frog"),
# (u"Stone Soup", u"http://www.gocomics.com/stonesoup"),
# (u"Sylvia", u"http://www.gocomics.com/sylvia"),
# (u"Tank McNamara", u"http://www.gocomics.com/tankmcnamara"),
# (u"Tarzan", u"http://www.gocomics.com/tarzan"),
# (u"That's Life", u"http://www.gocomics.com/thats-life"),
# (u"The Academia Waltz", u"http://www.gocomics.com/academiawaltz"),
# (u"The Barn", u"http://www.gocomics.com/thebarn"),
# (u"The Boondocks", u"http://www.gocomics.com/boondocks"),
(u"The Born Loser", u"http://www.gocomics.com/the-born-loser"),
# (u"The Buckets", u"http://www.gocomics.com/thebuckets"),
# (u"The City", u"http://www.gocomics.com/thecity"),
# (u"The Dinette Set", u"http://www.gocomics.com/dinetteset"),
# (u"The Doozies", u"http://www.gocomics.com/thedoozies"),
# (u"The Duplex", u"http://www.gocomics.com/duplex"),
# (u"The Elderberries", u"http://www.gocomics.com/theelderberries"),
# (u"The Flying McCoys", u"http://www.gocomics.com/theflyingmccoys"),
# (u"The Fusco Brothers", u"http://www.gocomics.com/thefuscobrothers"),
# (u"The Grizzwells", u"http://www.gocomics.com/thegrizzwells"),
# (u"The Humble Stumble", u"http://www.gocomics.com/humble-stumble"),
# (u"The Knight Life", u"http://www.gocomics.com/theknightlife"),
# (u"The Meaning of Lila", u"http://www.gocomics.com/meaningoflila"),
(u"The Middletons", u"http://www.gocomics.com/themiddletons"),
# (u"The Norm", u"http://www.gocomics.com/thenorm"),
# (u"The Other Coast", u"http://www.gocomics.com/theothercoast"),
# (u"The Sunshine Club", u"http://www.gocomics.com/the-sunshine-club"),
# (u"Tiny Sepuk", u"http://www.gocomics.com/tinysepuku"),
# (u"TOBY", u"http://www.gocomics.com/toby"),
# (u"Tom the Dancing Bug", u"http://www.gocomics.com/tomthedancingbug"),
# (u"Too Much Coffee Man", u"http://www.gocomics.com/toomuchcoffeeman"),
# (u"Unstrange Phenomena", u"http://www.gocomics.com/unstrange-phenomena"),
# (u"W.T. Duck", u"http://www.gocomics.com/wtduck"),
# (u"Watch Your Head", u"http://www.gocomics.com/watchyourhead"),
# (u"Wee Pals", u"http://www.gocomics.com/weepals"),
(u"Wizard of Id", u"http://www.gocomics.com/wizardofid"),
# (u"Working Daze", u"http://www.gocomics.com/working-daze"),
# (u"Working It Out", u"http://www.gocomics.com/workingitout"),
# (u"Yenny Lopez", u"http://www.gocomics.com/yenny-lopez"),
# (u"Zack Hill", u"http://www.gocomics.com/zackhill"),
# (u"Ziggy", u"http://www.gocomics.com/ziggy"),
(u"9 to 5", u"http://www.gocomics.com/9to5"),
(u"Heathcliff", u"http://www.gocomics.com/heathcliff"),
(u"Herman", u"http://www.gocomics.com/herman"),
(u"Loose Parts", u"http://www.gocomics.com/looseparts"),
(u"Marmaduke", u"http://www.gocomics.com/marmaduke"),
(u"Ripley's Believe It or Not!",
u"http://www.gocomics.com/ripleysbelieveitornot"),
(u"Rubes", u"http://www.gocomics.com/rubes"),
(u"Speed Bump", u"http://www.gocomics.com/speedbump"),
(u"Strange Brew", u"http://www.gocomics.com/strangebrew"),
(u"The Argyle Sweater",
u"http://www.gocomics.com/theargylesweater"),
#
# EDITORIAL CARTOONS #####################
# (u"Bob Gorrell",u"http://www.gocomics.com/bobgorrell"),
# (u"Chan Lowe",u"http://www.gocomics.com/chanlowe"),
# (u"Chip Bok",u"http://www.gocomics.com/chipbok"),
# (u"Chris Britt",u"http://www.gocomics.com/chrisbritt"),
# (u"Clay Bennett",u"http://www.gocomics.com/claybennett"),
# (u"Clay Jones",u"http://www.gocomics.com/clayjones"),
# (u"Dan Wasserman",u"http://www.gocomics.com/danwasserman"),
# (u"Dana Summers",u"http://www.gocomics.com/danasummers"),
# (u"Drew Litton", u"http://www.gocomics.com/drewlitton"),
# (u"Drew Sheneman",u"http://www.gocomics.com/drewsheneman"),
# (u"Gary Markstein", u"http://www.gocomics.com/garymarkstein"),
# (u"Gary Varvel", u"http://www.gocomics.com/garyvarvel"),
# (u"Glenn McCoy",u"http://www.gocomics.com/glennmccoy"),
# (u"Henry Payne", u"http://www.gocomics.com/henrypayne"),
# (u"Jack Ohman",u"http://www.gocomics.com/jackohman"),
# (u"Jeff Danziger",u"http://www.gocomics.com/jeffdanziger"),
# (u"Jeff Stahler", u"http://www.gocomics.com/jeffstahler"),
# (u"Jerry Holbert", u"http://www.gocomics.com/jerryholbert"),
# (u"Jim Morin",u"http://www.gocomics.com/jimmorin"),
# (u"Joel Pett",u"http://www.gocomics.com/joelpett"),
# (u"John Deering",u"http://www.gocomics.com/johndeering"),
# (u"Ken Catalino",u"http://www.gocomics.com/kencatalino"),
# (u"Kerry Waghorn",u"http://www.gocomics.com/facesinthenews"),
# (u"Kevin Kallaugher",u"http://www.gocomics.com/kevinkallaugher"),
# (u"Lalo Alcaraz",u"http://www.gocomics.com/laloalcaraz"),
# (u"Lisa Benson", u"http://www.gocomics.com/lisabenson"),
# (u"Marshall Ramsey", u"http://www.gocomics.com/marshallramsey"),
# (u"Matt Bors", u"http://www.gocomics.com/matt-bors"),
# (u"Matt Davies",u"http://www.gocomics.com/mattdavies"),
# (u"Michael Ramirez", u"http://www.gocomics.com/michaelramirez"),
# (u"Mike Luckovich", u"http://www.gocomics.com/mikeluckovich"),
# (u"Nick Anderson", u"http://www.gocomics.com/nickanderson"),
# (u"Pat Oliphant",u"http://www.gocomics.com/patoliphant"),
# (u"Paul Szep", u"http://www.gocomics.com/paulszep"),
# (u"Rob Rogers", u"http://www.gocomics.com/robrogers"),
# (u"Scott Stantis", u"http://www.gocomics.com/scottstantis"),
# (u"Signe Wilkinson", u"http://www.gocomics.com/signewilkinson"),
# (u"Steve Benson", u"http://www.gocomics.com/stevebenson"),
# (u"Steve Breen", u"http://www.gocomics.com/stevebreen"),
# (u"Steve Kelley", u"http://www.gocomics.com/stevekelley"),
# (u"Stuart Carlson",u"http://www.gocomics.com/stuartcarlson"),
# (u"Ted Rall",u"http://www.gocomics.com/ted-rall"),
# (u"(Th)ink", u"http://www.gocomics.com/think"),
# (u"Tim Eagan", u"http://www.gocomics.com/tim-eagan"),
# (u"Tom Toles",u"http://www.gocomics.com/tomtoles"),
# (u"Tony Auth",u"http://www.gocomics.com/tonyauth"),
# (u"Views of the World",u"http://www.gocomics.com/viewsoftheworld"),
# (u"ViewsAfrica",u"http://www.gocomics.com/viewsafrica"),
# (u"ViewsAmerica",u"http://www.gocomics.com/viewsamerica"),
# (u"ViewsAsia",u"http://www.gocomics.com/viewsasia"),
# (u"ViewsBusiness",u"http://www.gocomics.com/viewsbusiness"),
# (u"ViewsEurope",u"http://www.gocomics.com/viewseurope"),
# (u"ViewsLatinAmerica",u"http://www.gocomics.com/viewslatinamerica"),
# (u"ViewsMidEast",u"http://www.gocomics.com/viewsmideast"),
# (u"Walt Handelsman",u"http://www.gocomics.com/walthandelsman"),
# (u"Wit of the World",u"http://www.gocomics.com/witoftheworld"),
]): # }}}
self.log('Working on: ', title, url)
articles = self.make_links(url)
if articles:
feeds.append((title, articles))
if self.test and i > 0:
break
return feeds
def make_links(self, url):
title = 'Temp'
current_articles = []
if self.test:
self.num_comics_to_get = 2
num = self.num_comics_to_get
while num > 0:
num -= 1
page_soup = self.index_to_soup(url)
if not page_soup:
break
content = page_soup.find(id='content')
if content is None:
break
feature = content.find(name='div', attrs={'class': 'feature'})
feature_nav = content.find(
name='ul', attrs={'class': 'feature-nav'})
if feature is None or feature_nav is None:
break
try:
a = feature.find('h1').find('a', href=True)
except:
self.log.exception('Failed to find current page link')
break
page_url = a['href']
if page_url.startswith('/'):
page_url = 'http://www.gocomics.com' + page_url
try:
strip_title = self.tag_to_string(
feature.find('h1').find('a', href=True))
except:
strip_title = 'Error - no Title found'
try:
date_title = self.tag_to_string(feature_nav.find('li'))
except:
date_title = 'Error - no Date found'
title = strip_title + ' - ' + date_title
current_articles.append(
{'title': title, 'url': page_url, 'description': '', 'date': ''})
a = feature_nav.find('a', href=True, attrs={'class': 'prev'})
if a is None:
break
url = a['href']
if url.startswith('/'):
url = 'http://www.gocomics.com' + url
current_articles.reverse()
return current_articles
def preprocess_html(self, soup):
headings = soup.findAll('h1')
for h1 in headings[1:]:
h1.extract()
self.adeify_images(soup)
return soup
|