|
|
#1 |
|
Member
![]() Posts: 21
Karma: 54
Join Date: Dec 2024
Device: kindle scribe
|
Courrier International
Code:
#!/usr/bin/env python
# vim:fileencoding=utf-8
__license__ = 'GPL v3'
__copyright__ = ('2009, Mathieu Godlewski <mathieu at godlewski.fr>, '
'2015, Rémi Vanicat <vanicat at debian.org>, '
'2026, Kabonix')
'''
Courrier International Premium Unlocked
'''
import re
import json
from calibre.web.feeds.news import BasicNewsRecipe
class CourrierInternational(BasicNewsRecipe):
title = 'Courrier International'
__author__ = 'Mathieu Godlewski, Kabonix'
description = 'Global news in french - Edition intégrale via API Mobile Bypass'
oldest_article = 7
language = 'fr'
encoding = 'utf-8'
# --- CONFIGURATION DU BYPASS PREMIUM ---
# On utilise l'identité de l'application Android 3.5.4
browser_user_agent = 'CourrierInternational/3.5.4 (Android; 14)'
def get_browser(self, *args, **kwargs):
br = BasicNewsRecipe.get_browser(self, *args, **kwargs)
# On injecte le Token universel du groupe Le Monde
br.addheaders = [
('User-Agent', self.browser_user_agent),
('X-Lmd-Token', 'TWPLMOLMO'),
('Accept', 'application/json')
]
return br
# --- NOUVEAU : GESTION DE LA COUVERTURE ---
def get_cover_url(self):
cover_url = None
try:
self.log('🔍 Recherche de la couverture du dernier numéro...')
# On charge la page kiosque
soup = self.index_to_soup('https://www.courrierinternational.com/magazine')
# On cherche le premier article de type "magazine" dans la liste
# Le sélecteur correspond au HTML que tu as fourni
first_mag = soup.find('div', class_='magazines-list').find('article', class_='item')
if first_mag:
img = first_mag.find('img')
if img and img.has_attr('src'):
url = img['src']
# L'URL est en 320x0 (ex: .../320x0/2026/...)
# On la passe en HD (1280x0) pour la liseuse
cover_url = url.replace('/320x0/', '/1280x0/')
self.log(f'✅ Couverture trouvée et améliorée : {cover_url}')
if not cover_url:
self.log('⚠️ Aucune couverture trouvée.')
except Exception as e:
self.log(f'❌ Erreur lors de la récupération de la couverture : {e}')
return cover_url
def get_article_url(self, article):
url = BasicNewsRecipe.get_article_url(self, article)
# On extrait l'ID à la fin de l'URL (ex: ..._240469)
match = re.search(r'_(\d+)$', url)
if match:
article_id = match.group(1)
# On redirige Calibre vers l'API mobile Premium
return f"https://apps.courrierinternational.com/cri/v1/premium-android-phone/article?id={article_id}"
return url
def preprocess_raw_html(self, raw_html, url):
# Si on est sur l'API, on décode le JSON pour extraire le HTML intégral
if "/cri/v1/" in url:
try:
data = json.loads(raw_html)
# Le texte est dans templates -> raw_content -> content
content = data['templates']['raw_content']['content']
# Fix des images (placeholders {{width}})
content = content.replace('{{width}}', '1200').replace('{{height}}', '800')
content = content.replace('%7B%7Bwidth%7D%7D', '1200')
return f'<html><body>{content}</body></html>'
except Exception as e:
self.log(f"Erreur de décodage pour {url}: {str(e)}")
return raw_html
return raw_html
# ---------------------------------------
max_articles_per_feed = 50
ignore_duplicate_articles = {'title'}
no_stylesheets = True
keep_only_tags = [
dict(name='article'),
dict(name='span', attrs={'class': 'strapline'}),
dict(name='h1', attrs={'class': 'article-title'}),
dict(name='p', attrs={'class': 'article-lede'}),
dict(name='div', attrs={'class': 'article-text'})
]
remove_tags = [
dict(attrs={'class': [
'asset-read-more', 'article-secondary', 'article-aside', 'item',
'source-logo', 'source-lang', 'info-time', 'article-paywall',
'article-readmore', 'article-outbrain', 'ci-services', 'article-tools'
]})
]
extra_css = '''
.strapline { color: #cc0000; font-weight: bold; text-transform: uppercase; display: block; margin-bottom: 5px; }
.article-title { font-size: 1.6em; font-weight: bold; }
.article-lede { font-weight: bold; margin-bottom: 1em; color: #333; }
.article-text { line-height: 1.5; }
.caption, .credit { font-size: 0.8em; font-style: italic; color: #666; }
p { margin-bottom: 1em; }
img { display: block; margin: 1em auto; max-width: 100%; }
'''
def preprocess_html(self, soup):
# Gestion des images via srcset
for img in soup.find_all('img'):
if img.has_attr('srcset'):
best_link = img['srcset'].split(',')[-1].strip().split(' ')[0]
img['src'] = best_link
# Fix des liens relatifs
if img.has_attr('src') and img['src'].startswith('/'):
img['src'] = 'https://www.courrierinternational.com' + img['src']
for link in soup.findAll('a', href=re.compile(r'^/')):
link['href'] = 'https://www.courrierinternational.com' + link['href']
return soup
feeds = [
('France', 'https://www.courrierinternational.com/feed/rubrique/france/rss.xml'),
('Geopolitique', 'https://www.courrierinternational.com/feed/rubrique/geopolitique/rss.xml'),
('Economie', 'https://www.courrierinternational.com/feed/rubrique/economie/rss.xml'),
('Société', 'https://www.courrierinternational.com/feed/rubrique/societe/rss.xml'),
('Politique', 'https://www.courrierinternational.com/feed/rubrique/politique/rss.xml'),
('Sciences & Environnement', 'https://www.courrierinternational.com/feed/rubrique/science-environnement/rss.xml'),
('Culture', 'https://www.courrierinternational.com/feed/rubrique/culture/rss.xml'),
('Expat', 'https://www.courrierinternational.com/feed/rubrique/expat/rss.xml'),
('Autres', 'https://www.courrierinternational.com/feed/all/rss.xml'),
]
|
|
|
|
|
|
#2 |
|
creator of calibre
![]() ![]() ![]() ![]() ![]() ![]() ![]() ![]() ![]() ![]() ![]() Posts: 46,333
Karma: 29630876
Join Date: Oct 2006
Location: Mumbai, India
Device: Various
|
|
|
|
|
| Advert | |
|
|
![]() |
|
Similar Threads
|
||||
| Thread | Thread Starter | Forum | Replies | Last Post |
| Courrier International recipe | darkl | Recipes | 1 | 05-07-2015 01:26 PM |
| Courrier International | josepinto | Recipes | 1 | 03-27-2015 10:18 AM |
| Courrier international recipe | darkl | Recipes | 4 | 03-25-2014 09:07 AM |
| Recipe for Courrier International Abonné | Mwandishi | Recipes | 0 | 11-17-2012 08:27 AM |
| Kindle 3 3G International Coverage US vs International model | fidjit | Amazon Kindle | 4 | 02-04-2012 07:19 AM |