Register Guidelines E-Books Today's Posts Search

Go Back   MobileRead Forums > E-Book Software > Calibre > Recipes

Notices

Reply
 
Thread Tools Search this Thread
Old 03-17-2026, 07:01 PM   #1
alphonk
Member
alphonk is on a distinguished road
 
Posts: 21
Karma: 54
Join Date: Dec 2024
Device: kindle scribe
Courrier International

Code:
#!/usr/bin/env python
# vim:fileencoding=utf-8

__license__ = 'GPL v3'
__copyright__ = ('2009, Mathieu Godlewski <mathieu at godlewski.fr>, '
                 '2015, Rémi Vanicat <vanicat at debian.org>, '
                 '2026, Kabonix')
'''
Courrier International Premium Unlocked
'''

import re
import json
from calibre.web.feeds.news import BasicNewsRecipe


class CourrierInternational(BasicNewsRecipe):
    title = 'Courrier International'
    __author__ = 'Mathieu Godlewski, Kabonix'
    description = 'Global news in french - Edition intégrale via API Mobile Bypass'
    oldest_article = 7
    language = 'fr'
    encoding = 'utf-8'
    
    # --- CONFIGURATION DU BYPASS PREMIUM ---
    # On utilise l'identité de l'application Android 3.5.4
    browser_user_agent = 'CourrierInternational/3.5.4 (Android; 14)'
    
    def get_browser(self, *args, **kwargs):
        br = BasicNewsRecipe.get_browser(self, *args, **kwargs)
        # On injecte le Token universel du groupe Le Monde
        br.addheaders = [
            ('User-Agent', self.browser_user_agent),
            ('X-Lmd-Token', 'TWPLMOLMO'),
            ('Accept', 'application/json')
        ]
        return br

    # --- NOUVEAU : GESTION DE LA COUVERTURE ---
    def get_cover_url(self):
        cover_url = None
        try:
            self.log('🔍 Recherche de la couverture du dernier numéro...')
            # On charge la page kiosque
            soup = self.index_to_soup('https://www.courrierinternational.com/magazine')
            
            # On cherche le premier article de type "magazine" dans la liste
            # Le sélecteur correspond au HTML que tu as fourni
            first_mag = soup.find('div', class_='magazines-list').find('article', class_='item')
            
            if first_mag:
                img = first_mag.find('img')
                if img and img.has_attr('src'):
                    url = img['src']
                    # L'URL est en 320x0 (ex: .../320x0/2026/...)
                    # On la passe en HD (1280x0) pour la liseuse
                    cover_url = url.replace('/320x0/', '/1280x0/')
                    self.log(f'✅ Couverture trouvée et améliorée : {cover_url}')
            
            if not cover_url:
                self.log('⚠️ Aucune couverture trouvée.')
                
        except Exception as e:
            self.log(f'❌ Erreur lors de la récupération de la couverture : {e}')
            
        return cover_url

    def get_article_url(self, article):
        url = BasicNewsRecipe.get_article_url(self, article)
        # On extrait l'ID à la fin de l'URL (ex: ..._240469)
        match = re.search(r'_(\d+)$', url)
        if match:
            article_id = match.group(1)
            # On redirige Calibre vers l'API mobile Premium
            return f"https://apps.courrierinternational.com/cri/v1/premium-android-phone/article?id={article_id}"
        return url

    def preprocess_raw_html(self, raw_html, url):
        # Si on est sur l'API, on décode le JSON pour extraire le HTML intégral
        if "/cri/v1/" in url:
            try:
                data = json.loads(raw_html)
                # Le texte est dans templates -> raw_content -> content
                content = data['templates']['raw_content']['content']
                
                # Fix des images (placeholders {{width}})
                content = content.replace('{{width}}', '1200').replace('{{height}}', '800')
                content = content.replace('%7B%7Bwidth%7D%7D', '1200')
                
                return f'<html><body>{content}</body></html>'
            except Exception as e:
                self.log(f"Erreur de décodage pour {url}: {str(e)}")
                return raw_html
        return raw_html
    # ---------------------------------------

    max_articles_per_feed = 50
    ignore_duplicate_articles = {'title'}
    no_stylesheets = True

    keep_only_tags = [
        dict(name='article'),
        dict(name='span', attrs={'class': 'strapline'}),
        dict(name='h1', attrs={'class': 'article-title'}),
        dict(name='p', attrs={'class': 'article-lede'}),
        dict(name='div', attrs={'class': 'article-text'})
    ]

    remove_tags = [
        dict(attrs={'class': [
            'asset-read-more', 'article-secondary', 'article-aside', 'item', 
            'source-logo', 'source-lang', 'info-time', 'article-paywall',
            'article-readmore', 'article-outbrain', 'ci-services', 'article-tools'
        ]})
    ]

    extra_css = '''
        .strapline { color: #cc0000; font-weight: bold; text-transform: uppercase; display: block; margin-bottom: 5px; }
        .article-title { font-size: 1.6em; font-weight: bold; }
        .article-lede { font-weight: bold; margin-bottom: 1em; color: #333; }
        .article-text { line-height: 1.5; }
        .caption, .credit { font-size: 0.8em; font-style: italic; color: #666; }
        p { margin-bottom: 1em; }
        img { display: block; margin: 1em auto; max-width: 100%; }
    '''

    def preprocess_html(self, soup):
        # Gestion des images via srcset
        for img in soup.find_all('img'):
            if img.has_attr('srcset'):
                best_link = img['srcset'].split(',')[-1].strip().split(' ')[0]
                img['src'] = best_link
            # Fix des liens relatifs
            if img.has_attr('src') and img['src'].startswith('/'):
                img['src'] = 'https://www.courrierinternational.com' + img['src']
        
        for link in soup.findAll('a', href=re.compile(r'^/')):
            link['href'] = 'https://www.courrierinternational.com' + link['href']
        return soup

    feeds = [
        ('France', 'https://www.courrierinternational.com/feed/rubrique/france/rss.xml'),
        ('Geopolitique', 'https://www.courrierinternational.com/feed/rubrique/geopolitique/rss.xml'),
        ('Economie', 'https://www.courrierinternational.com/feed/rubrique/economie/rss.xml'),
        ('Société', 'https://www.courrierinternational.com/feed/rubrique/societe/rss.xml'),
        ('Politique', 'https://www.courrierinternational.com/feed/rubrique/politique/rss.xml'),
        ('Sciences & Environnement', 'https://www.courrierinternational.com/feed/rubrique/science-environnement/rss.xml'),
        ('Culture', 'https://www.courrierinternational.com/feed/rubrique/culture/rss.xml'),
        ('Expat', 'https://www.courrierinternational.com/feed/rubrique/expat/rss.xml'),
        ('Autres', 'https://www.courrierinternational.com/feed/all/rss.xml'),
    ]
alphonk is offline   Reply With Quote
Old 03-17-2026, 10:57 PM   #2
kovidgoyal
creator of calibre
kovidgoyal ought to be getting tired of karma fortunes by now.kovidgoyal ought to be getting tired of karma fortunes by now.kovidgoyal ought to be getting tired of karma fortunes by now.kovidgoyal ought to be getting tired of karma fortunes by now.kovidgoyal ought to be getting tired of karma fortunes by now.kovidgoyal ought to be getting tired of karma fortunes by now.kovidgoyal ought to be getting tired of karma fortunes by now.kovidgoyal ought to be getting tired of karma fortunes by now.kovidgoyal ought to be getting tired of karma fortunes by now.kovidgoyal ought to be getting tired of karma fortunes by now.kovidgoyal ought to be getting tired of karma fortunes by now.
 
kovidgoyal's Avatar
 
Posts: 46,333
Karma: 29630876
Join Date: Oct 2006
Location: Mumbai, India
Device: Various
https://github.com/kovidgoyal/calibr...aa8b76e7a73547
kovidgoyal is online now   Reply With Quote
Advert
Reply


Forum Jump

Similar Threads
Thread Thread Starter Forum Replies Last Post
Courrier International recipe darkl Recipes 1 05-07-2015 01:26 PM
Courrier International josepinto Recipes 1 03-27-2015 10:18 AM
Courrier international recipe darkl Recipes 4 03-25-2014 09:07 AM
Recipe for Courrier International Abonné Mwandishi Recipes 0 11-17-2012 08:27 AM
Kindle 3 3G International Coverage US vs International model fidjit Amazon Kindle 4 02-04-2012 07:19 AM


All times are GMT -4. The time now is 09:51 AM.


MobileRead.com is a privately owned, operated and funded community.