|
![]() |
|
Thread Tools | Search this Thread |
![]() |
#1 |
Junior Member
![]() Posts: 7
Karma: 12
Join Date: Nov 2010
Location: Mexico
Device: Kindle
|
Animal Politico (@pajaropolitico in twitter) recipe - Spanish, Mexico
Just hacked together this recipe to get the non-multimedia parts of the new journalistic project called Animal Político by the same guys of the @pajaropolitico twitter user. (In case you don't know them and the name isn't obvious enough, this is mostly political news and opinion, with a very interactive approach)
Posting here in case anyone finds useful. You can find the latest version in my github: https://github.com/leamsi/Animal-Pol...olitico.recipe Code:
#!/usr/bin/python # encoding: utf-8 from calibre.web.feeds.news import BasicNewsRecipe class AdvancedUserRecipe1290663986(BasicNewsRecipe): title = u'Animal Pol\u00EDtico' publisher = u'Animal Pol\u00EDtico' category = u'News, Mexico' description = u'Noticias Pol\u00EDticas' masthead_url = 'http://www.animalpolitico.com/wp-content/themes/animal_mu/images/logo.png' oldest_article = 1 max_articles_per_feed = 100 language = 'es' #feeds = [(u'Animal Politico', u'http://www.animalpolitico.com/feed/')] remove_tags_before = dict(name='div', id='main') remove_tags = [dict(name='div', attrs={'class':'fb-like-button'})] keep_only_tags = [dict(name='h1', attrs={'class':'entry-title'}), dict(name='div', attrs={'class':'entry-content'})] remove_javascript = True INDEX = 'http://www.animalpolitico.com/' def generic_parse(self, soup): articles = [] for entry in soup.findAll(lambda tag: tag.name == 'li' and tag.has_key('class') and tag['class'].find('hentry') != -1): #soup.findAll('li', 'hentry'): article_url = entry.a['href'] + '?print=yes' article_title= entry.find('h3', 'entry-title') article_title= self.tag_to_string(article_title) article_date = entry.find('span', 'the-time') article_date = self.tag_to_string(article_date) article_desc = self.tag_to_string(entry.find('p')) #print 'Article:',article_title, article_date,article_url #print entry['class'] articles.append({'title' : article_title, 'date' : article_date, 'description' : article_desc, 'url' : article_url}) # Avoid including the multimedia stuff. if entry['class'].find('last') != -1: break return articles def plumaje_parse(self, soup): articles = [] blogs_soup = soup.find(lambda tag: tag.name == 'ul' and tag.has_key('class') and tag['class'].find('bloglist-fecha') != -1) for entry in blogs_soup.findAll('li'): article_title = entry.p article_url = article_title.a['href'] + '?print=yes' article_date = article_title.nextSibling article_title = self.tag_to_string(article_title) article_date = self.tag_to_string(article_date).replace(u'Last Updated: ', '') article_desc = self.tag_to_string(entry.find('h4')) #print 'Article:',article_title, article_date,article_url articles.append({'title' : article_title, 'date' : article_date, 'description' : article_desc, 'url' : article_url}) return articles def boca_parse(self, soup): articles = [] for entry in soup.findAll(lambda tag: tag.name == 'div' and tag.has_key('class') and tag['class'].find('hentry') != -1): #soup.findAll('li', 'hentry'): article_title= entry.find('h2', 'entry-title') article_url = article_title.a['href'] + '?print=yes' article_title= self.tag_to_string(article_title) article_date = entry.find('span', 'entry-date') article_date = self.tag_to_string(article_date) article_desc = self.tag_to_string(entry.find('div', 'entry-content')) #print 'Article:',article_title, article_date,article_url #print entry['class'] articles.append({'title' : article_title, 'date' : article_date, 'description' : article_desc, 'url' : article_url}) # Avoid including the multimedia stuff. if entry['class'].find('last') != -1: break return articles def parse_index(self): gobierno_soup = self.index_to_soup(self.INDEX+'gobierno/') congreso_soup = self.index_to_soup(self.INDEX+'congreso/') seguridad_soup = self.index_to_soup(self.INDEX+'seguridad/') comunidad_soup = self.index_to_soup(self.INDEX+'comunidad/') plumaje_soup = self.index_to_soup(self.INDEX+'plumaje/') la_boca_del_lobo_soup = self.index_to_soup(self.INDEX+'category/la-boca-del-lobo/') gobierno_articles = self.generic_parse(gobierno_soup) congreso_articles = self.generic_parse(congreso_soup) seguridad_articles = self.generic_parse(seguridad_soup) comunidad_articles = self.generic_parse(comunidad_soup) plumaje_articles = self.plumaje_parse(plumaje_soup) la_boca_del_lobo_articles = self.boca_parse(la_boca_del_lobo_soup) return [ (u'Gobierno', gobierno_articles), (u'Congreso', congreso_articles), (u'Seguridad', seguridad_articles), (u'Comunidad', comunidad_articles), (u'Plumaje', plumaje_articles), (u'La Boca del Lobo', la_boca_del_lobo_articles), ] |
![]() |
![]() |
![]() |
#2 |
Newsbeamer dev
![]() ![]() ![]() ![]() ![]() ![]() ![]() ![]() Posts: 123
Karma: 1000
Join Date: Dec 2011
Device: Kindle Voyage
|
Hi. I know this is a really old thread, but the animal politico recipe is no longer working for me, nor the one on your github. Error message below:
Thanks! Code:
File "/usr/bin/ebook-convert", line 20, in <module> sys.exit(main()) File "/usr/lib/calibre/calibre/ebooks/conversion/cli.py", line 362, in main parser, plumber = create_option_parser(args, log) File "/usr/lib/calibre/calibre/ebooks/conversion/cli.py", line 320, in create_option_parser plumber = Plumber(input, output, log, reporter) File "/usr/lib/calibre/calibre/ebooks/conversion/plumber.py", line 752, in __init__ raise ValueError('No plugin to handle input format: '+input_fmt) ValueError: No plugin to handle input format: 1 |
![]() |
![]() |
![]() |
#3 |
Enthusiast
![]() Posts: 36
Karma: 10
Join Date: Dec 2017
Location: Los Angeles, CA
Device: Smart Phone
|
a rewrite of animal politico
Hello there duluoz,
Here's a little something a came up with: Animal Politico: Code:
#!/usr/bin/python2 # encoding: utf-8 import re from calibre.web.feeds.news import BasicNewsRecipe def classes(classes): q = frozenset(classes.split(' ')) return dict( attrs={'class': lambda x: x and frozenset(x.split()).intersection(q)} ) class AnimalPolitico(BasicNewsRecipe): title = u'Animal Político' description = u'Noticias Políticas' __author__ = 'Jose Ortiz' masthead_url = 'https://www.animalpolitico.com/wp-content/themes/animalpolitico-2019/static/assets/logo_black.svg' language = 'es_MX' ignore_duplicate_articles = {'title', 'url'} conversion_options = { 'tags': 'News, Mexico', 'publisher': 'Animal Politico', 'comments': description } keep_only_tags = [classes('ap_single_first ap_single_content ax_single')] remove_tags = [classes('ap_single_sharers_head ap_single_sharers_share')] def parse_index(self): soup = self.index_to_soup('http://www.animalpolitico.com/') articles = [] for a in soup(**{ 'name': 'a', 'attrs': { 'href': True, 'title': True, 'data-author': True, 'data-type': True, 'data-home-title': True } }): title = a['title'] url = a['href'] author = a['data-author'] self.log('\t', title, ' at ', url) articles.append({'title': title, 'author': author, 'url': url}) ans = {} for article in articles: if re.match(r'https?://www\.animalpolitico\.com/elsabueso/.', article['url'], re.I): ans.setdefault('El Sabueso', []).append(article) elif re.match(r'https?://www\.animalpolitico\.com/.', article['url'], re.I): ans.setdefault('Noticias', []).append(article) elif re.match(r'https?://www\.animalgourmet\.com/.', article['url'], re.I): ans.setdefault('Comida', []).append(article) return [(sec, ans[sec]) for sec in sorted(ans)] def populate_article_metadata(self, article, soup, first): if re.match(r'https?://www\.animalpolitico\.com/.', article.url, re.I): article.formatted_date = self.tag_to_string( soup.find( **classes('ap_single_first')).find( **classes('ap_single_first_info_date'))) |
![]() |
![]() |
![]() |
#4 |
Newsbeamer dev
![]() ![]() ![]() ![]() ![]() ![]() ![]() ![]() Posts: 123
Karma: 1000
Join Date: Dec 2011
Device: Kindle Voyage
|
[QUOTE=lui1;3978662]Hello there duluoz,
Here's a little something a came up with: Animal Politico: This works beautifully, thanks. I hope you don't mind, I'm using the recipe in my android app - Newsbeamer - https://play.google.com/store/apps/d...eamer&hl=en_AU Thanks again |
![]() |
![]() |
![]() |
Tags |
mexico, news, political commentary, politics, recipe |
|
![]() |
||||
Thread | Thread Starter | Forum | Replies | Last Post |
Recipe works when mocked up as Python file, fails when converted to Recipe | ode | Recipes | 7 | 09-04-2011 04:57 AM |
Recreational animal torturer nearly killed by animal. | ardeegee | Lounge | 41 | 08-20-2010 09:46 AM |
Unutterably Silly Next great animal? | pshrynk | Lounge | 307 | 04-21-2009 09:46 PM |
Could some kind soul make a recipe for politico.com | Dragoro | Calibre | 3 | 03-13-2009 11:48 PM |
Now Politico Available for the Kindle | bookwormfjl | Amazon Kindle | 1 | 01-17-2009 07:51 PM |