|
|
#1 |
|
Junior Member
![]() Posts: 1
Karma: 10
Join Date: Aug 2023
Device: none
|
Updated recipe for The Baffler
Hi there,
This needed updating. Here it is: Code:
from calibre.web.feeds.news import BasicNewsRecipe, classes
class TheBaffler(BasicNewsRecipe):
title = 'The Baffler'
__author__ = 'flobotnik' #based on unkn0wn's version
description = ('This magazine contains left-wing criticism, cultural analysis, shorts'
' stories, poems and art. They publish six print issues annually.')
language = 'en'
encoding = 'UTF-8'
no_javascript = True
no_stylesheets = True
remove_attributes = ['style','height','width']
extra_css = '''
.entry-subtitle{color:#202020; font-style:italic; text:align:left;}
blockquote{color:gray;}
em{color:#404040;}
.wp-caption-text{font-size:small; text-align:center;}
.lg:text-xs{color:gray; font-size:small; text-align:center;}
.author-meta{font-size:small; color:gray;}
'''
keep_only_tags = [
dict(name='main', attrs={'id':'main'})
]
remove_tags = [
classes('entry-date issue-number-segment single-article-vertical donation-footer story-footer ml-4 mt-14'),
dict(name='footer'),
dict(name='a', class_='ml-4 pr-px font-sans text-sm lg:text-xs whitespace-nowrap')
]
def get_cover_url(self):
soup = self.index_to_soup('https://shop.exacteditions.com/us/the-baffler')
tag = soup.find('div', attrs={'class': 'row'})
if tag:
self.cover_url = tag.find('img')['src']
return getattr(self, 'cover_url', self.cover_url)
def parse_index(self):
soup = self.index_to_soup('https://thebaffler.com/issues')
issue = soup.find('article')
edition = self.tag_to_string(issue.find('h2')).strip().split('—')[1]
if edition:
self.log('Downloading Issue: ', edition)
self.title = 'The Baffler : ' + edition
self.timefmt = ' [' + self.tag_to_string(issue.find('div', **classes('font-lion'))).strip() + ']'
a = issue.find('a')
self.description = (
u'\n\n' + self.tag_to_string(a).strip() +
u'\n\n' + self.description
)
soup = self.index_to_soup(a['href'])
ans = []
main = soup.find('main', attrs={'id':'main'})
for section in main.findAll('section'):
current_section = self.tag_to_string(section.h3).strip()
self.log(current_section)
articles = []
for h4 in section.findAll('h4'):
title = self.tag_to_string(h4)
url = h4.a['href']
desc = ''
span = h4.findNext('span')
if span:
desc = self.tag_to_string(span).strip()
span2 = span.findNext('span')
if span2:
desc = self.tag_to_string(span2).strip() + ' | ' + desc
self.log('\t', title, '\n\t', desc, '\n\t\t', url)
articles.append(
{'title': title, 'url': url, 'description': desc})
if articles:
ans.append((current_section,articles))
return ans
def preprocess_html(self, soup):
div = soup.find('div', **classes('entry-title'))
if div:
div.name = 'h1'
for p in soup.findAll('p', attrs={'class':'parasectionhed'}):
p.name = 'h4'
return soup
|
|
|
|
|
|
#2 |
|
creator of calibre
![]() ![]() ![]() ![]() ![]() ![]() ![]() ![]() ![]() ![]() ![]() Posts: 45,618
Karma: 28549044
Join Date: Oct 2006
Location: Mumbai, India
Device: Various
|
|
|
|
|
| Advert | |
|
|
![]() |
|
Similar Threads
|
||||
| Thread | Thread Starter | Forum | Replies | Last Post |
| Updated engadget recipe | epubli | Recipes | 1 | 09-18-2022 10:47 AM |
| the baffler update | unkn0wn | Recipes | 1 | 09-04-2022 07:17 AM |
| [Recipe Request] The Baffler | duluoz | Recipes | 2 | 05-23-2019 11:27 PM |
| Updated Recipe - O Globo | claviola | Recipes | 0 | 06-18-2012 07:30 PM |
| Updated RealClear*.com Recipe | TechnoCat | Recipes | 0 | 04-01-2012 09:28 PM |