Courrier International

alphonk · 03-17-2026, 07:01 PM

Code:

#!/usr/bin/env python
# vim:fileencoding=utf-8

__license__ = 'GPL v3'
__copyright__ = ('2009, Mathieu Godlewski <mathieu at godlewski.fr>, '
                 '2015, Rémi Vanicat <vanicat at debian.org>, '
                 '2026, Kabonix')
'''
Courrier International Premium Unlocked
'''

import re
import json
from calibre.web.feeds.news import BasicNewsRecipe


class CourrierInternational(BasicNewsRecipe):
    title = 'Courrier International'
    __author__ = 'Mathieu Godlewski, Kabonix'
    description = 'Global news in french - Edition intégrale via API Mobile Bypass'
    oldest_article = 7
    language = 'fr'
    encoding = 'utf-8'
    
    # --- CONFIGURATION DU BYPASS PREMIUM ---
    # On utilise l'identité de l'application Android 3.5.4
    browser_user_agent = 'CourrierInternational/3.5.4 (Android; 14)'
    
    def get_browser(self, *args, **kwargs):
        br = BasicNewsRecipe.get_browser(self, *args, **kwargs)
        # On injecte le Token universel du groupe Le Monde
        br.addheaders = [
            ('User-Agent', self.browser_user_agent),
            ('X-Lmd-Token', 'TWPLMOLMO'),
            ('Accept', 'application/json')
        ]
        return br

    # --- NOUVEAU : GESTION DE LA COUVERTURE ---
    def get_cover_url(self):
        cover_url = None
        try:
            self.log('🔍 Recherche de la couverture du dernier numéro...')
            # On charge la page kiosque
            soup = self.index_to_soup('https://www.courrierinternational.com/magazine')
            
            # On cherche le premier article de type "magazine" dans la liste
            # Le sélecteur correspond au HTML que tu as fourni
            first_mag = soup.find('div', class_='magazines-list').find('article', class_='item')
            
            if first_mag:
                img = first_mag.find('img')
                if img and img.has_attr('src'):
                    url = img['src']
                    # L'URL est en 320x0 (ex: .../320x0/2026/...)
                    # On la passe en HD (1280x0) pour la liseuse
                    cover_url = url.replace('/320x0/', '/1280x0/')
                    self.log(f'✅ Couverture trouvée et améliorée : {cover_url}')
            
            if not cover_url:
                self.log('⚠️ Aucune couverture trouvée.')
                
        except Exception as e:
            self.log(f'❌ Erreur lors de la récupération de la couverture : {e}')
            
        return cover_url

    def get_article_url(self, article):
        url = BasicNewsRecipe.get_article_url(self, article)
        # On extrait l'ID à la fin de l'URL (ex: ..._240469)
        match = re.search(r'_(\d+)$', url)
        if match:
            article_id = match.group(1)
            # On redirige Calibre vers l'API mobile Premium
            return f"https://apps.courrierinternational.com/cri/v1/premium-android-phone/article?id={article_id}"
        return url

    def preprocess_raw_html(self, raw_html, url):
        # Si on est sur l'API, on décode le JSON pour extraire le HTML intégral
        if "/cri/v1/" in url:
            try:
                data = json.loads(raw_html)
                # Le texte est dans templates -> raw_content -> content
                content = data['templates']['raw_content']['content']
                
                # Fix des images (placeholders {{width}})
                content = content.replace('{{width}}', '1200').replace('{{height}}', '800')
                content = content.replace('%7B%7Bwidth%7D%7D', '1200')
                
                return f'<html><body>{content}</body></html>'
            except Exception as e:
                self.log(f"Erreur de décodage pour {url}: {str(e)}")
                return raw_html
        return raw_html
    # ---------------------------------------

    max_articles_per_feed = 50
    ignore_duplicate_articles = {'title'}
    no_stylesheets = True

    keep_only_tags = [
        dict(name='article'),
        dict(name='span', attrs={'class': 'strapline'}),
        dict(name='h1', attrs={'class': 'article-title'}),
        dict(name='p', attrs={'class': 'article-lede'}),
        dict(name='div', attrs={'class': 'article-text'})
    ]

    remove_tags = [
        dict(attrs={'class': [
            'asset-read-more', 'article-secondary', 'article-aside', 'item', 
            'source-logo', 'source-lang', 'info-time', 'article-paywall',
            'article-readmore', 'article-outbrain', 'ci-services', 'article-tools'
        ]})
    ]

    extra_css = '''
        .strapline { color: #cc0000; font-weight: bold; text-transform: uppercase; display: block; margin-bottom: 5px; }
        .article-title { font-size: 1.6em; font-weight: bold; }
        .article-lede { font-weight: bold; margin-bottom: 1em; color: #333; }
        .article-text { line-height: 1.5; }
        .caption, .credit { font-size: 0.8em; font-style: italic; color: #666; }
        p { margin-bottom: 1em; }
        img { display: block; margin: 1em auto; max-width: 100%; }
    '''

    def preprocess_html(self, soup):
        # Gestion des images via srcset
        for img in soup.find_all('img'):
            if img.has_attr('srcset'):
                best_link = img['srcset'].split(',')[-1].strip().split(' ')[0]
                img['src'] = best_link
            # Fix des liens relatifs
            if img.has_attr('src') and img['src'].startswith('/'):
                img['src'] = 'https://www.courrierinternational.com' + img['src']
        
        for link in soup.findAll('a', href=re.compile(r'^/')):
            link['href'] = 'https://www.courrierinternational.com' + link['href']
        return soup

    feeds = [
        ('France', 'https://www.courrierinternational.com/feed/rubrique/france/rss.xml'),
        ('Geopolitique', 'https://www.courrierinternational.com/feed/rubrique/geopolitique/rss.xml'),
        ('Economie', 'https://www.courrierinternational.com/feed/rubrique/economie/rss.xml'),
        ('Société', 'https://www.courrierinternational.com/feed/rubrique/societe/rss.xml'),
        ('Politique', 'https://www.courrierinternational.com/feed/rubrique/politique/rss.xml'),
        ('Sciences & Environnement', 'https://www.courrierinternational.com/feed/rubrique/science-environnement/rss.xml'),
        ('Culture', 'https://www.courrierinternational.com/feed/rubrique/culture/rss.xml'),
        ('Expat', 'https://www.courrierinternational.com/feed/rubrique/expat/rss.xml'),
        ('Autres', 'https://www.courrierinternational.com/feed/all/rss.xml'),
    ]

kovidgoyal · 03-17-2026, 10:57 PM

https://github.com/kovidgoyal/calibr...aa8b76e7a73547

Similar Threads
Thread	Thread Starter	Forum	Replies	Last Post
Courrier International recipe	darkl	Recipes	1	05-07-2015 01:26 PM
Courrier International	josepinto	Recipes	1	03-27-2015 10:18 AM
Courrier international recipe	darkl	Recipes	4	03-25-2014 09:07 AM
Recipe for Courrier International Abonné	Mwandishi	Recipes	0	11-17-2012 08:27 AM
Kindle 3 3G International Coverage US vs International model	fidjit	Amazon Kindle	4	02-04-2012 07:19 AM

03-17-2026, 10:57 PM	#2
kovidgoyal creator of calibre Posts: 46,333 Karma: 29630876 Join Date: Oct 2006 Location: Mumbai, India Device: Various	https://github.com/kovidgoyal/calibr...aa8b76e7a73547

Advert