Register Guidelines E-Books Today's Posts Search

Go Back   MobileRead Forums > E-Book Software > Calibre > Recipes

Notices

Reply
 
Thread Tools Search this Thread
Old 03-17-2026, 06:55 PM   #1
alphonk
Member
alphonk is on a distinguished road
 
Posts: 20
Karma: 54
Join Date: Dec 2024
Device: kindle scribe
Le Monde (new recipe)

Code:
#!/usr/bin/env python
# vim:fileencoding=utf-8
from __future__ import absolute_import, division, print_function, unicode_literals

import re
import json
from datetime import date
from calibre.web.feeds.news import BasicNewsRecipe

class LeMondePremium(BasicNewsRecipe):
    title = 'Le Monde'
    __author__ = 'veezh, Martin Villard, Kabonix'
    description = 'Édition complète sans pub ni bloc "Lire aussi" (Bypass Mobile API + Fix Images)'
    publisher = 'Société Editrice du Monde'
    publication_type = 'newspaper'
    language = 'fr'
    encoding = 'utf-8'

    oldest_article = 1
    no_stylesheets = True
    ignore_duplicate_articles = {'title', 'url'}
    reverse_article_order = True
    remove_empty_feeds = True
    
    # Autoriser Calibre à télécharger les images
    auto_cleanup = False
    delay = 1

    # --- LOGIQUE DE DÉVERROUILLAGE ---
    browser_user_agent = 'LeMonde/9.20.1 (Android; 14)'
    
    def get_browser(self, *args, **kwargs):
        br = BasicNewsRecipe.get_browser(self, *args, **kwargs)
        br.addheaders = [
            ('User-Agent', self.browser_user_agent),
            ('X-Lmd-Token', 'TWPLMOLMO'),
            ('Accept', 'application/json')
        ]
        return br

    def get_article_url(self, article):
        url = BasicNewsRecipe.get_article_url(self, article)
        # On transforme l'URL web en URL API mobile
        match = re.search(r'_(\d+)_\d+\.html', url)
        if match:
            article_id = match.group(1)
            return f"https://apps.lemonde.fr/aec/v1/premium-android-phone/article/{article_id}"
        return url

    def preprocess_raw_html(self, raw_html, url):
        # L'API renvoie du JSON, on extrait le HTML brut contenu dedans
        if "/aec/v1/" in url:
            try:
                data = json.loads(raw_html)
                content = data['template_vars']['content']
                title = data['template_vars'].get('seo_title', 'Le Monde')
                
                # 1. FIX URL GENERATION : On remplace les placeholders {{width}} par une valeur fixe
                # Cela active les URLs d'images qui sont souvent sous la forme template
                content = content.replace('%7B%7Bwidth%7D%7D', '1000').replace('{{width}}', '1000')
                content = content.replace('%7B%7Bheight%7D%7D', '600').replace('{{height}}', '600')
                
                return f'<html><head><title>{title}</title></head><body>{content}</body></html>'
            except:
                return raw_html
        return raw_html

    # --- COUVERTURE ---
    def get_cover_url(self):
        cover_id = date.today().strftime('%Y%m%d')
        return 'https://www.lemonde.fr/thumbnail/journal/' + cover_id + '/1000/1490'

    # --- NETTOYAGE ---
    keep_only_tags = [
        dict(name='h1', attrs={'class': ['heading', 'article__title']}),
        dict(name='div', attrs={'class': ['kicker', 'article__desc']}),
        # On garde le conteneur principal et les figures (images)
        dict(name='div', attrs={'class': ['article_content', 'article__content']}),
        dict(name='figure')
    ]

    remove_tags = [
        dict(name='div', attrs={'class': [
            'see-also-container', 'inread-container', 'premium-container', 
            'restricted-reading', 'offer-container', 'authors-container',
            'js-init-line-clamp', 'bloc-reactions', 'meta__publisher'
        ]}),
        dict(name=['aside', 'footer', 'button', 'svg', 'script', 'style', 'video'])
    ]

    extra_css = '''
        h1 { font-size: 1.6em; font-weight: bold; font-family: serif; mb: 0.5em; }
        .kicker { font-size: 1.1em; font-style: italic; color: #444; margin-bottom: 1.5em; }
        p { margin-bottom: 1em; text-align: justify; line-height: 1.4; }
        figure { margin: 1em 0; padding: 0; text-align: center; }
        img { display: block; margin: 0 auto; max-width: 100%; height: auto; }
        figcaption, .caption { font-size: 0.8em; color: #666; font-family: sans-serif; margin-top: 0.5em; }
    '''

    def preprocess_html(self, soup):
        # --- FIX DES BLOCS VIDES (IMAGES) ---
        for img in soup.find_all('img'):
            # 1. Gestion du Lazy Loading : Si 'data-src' existe, c'est la vraie image
            if img.has_attr('data-src'):
                img['src'] = img['data-src']
            
            # 2. Gestion des srcsets : On essaie de récupérer la meilleure qualité dispo
            if img.has_attr('srcset'):
                try:
                    # On prend le dernier élément de la liste (souvent le plus large)
                    candidates = img['srcset'].split(',')
                    url = candidates[-1].strip().split(' ')[0]
                    if url.startswith('http'):
                        img['src'] = url
                except:
                    pass
            
            # Nettoyage pour éviter les conflits
            for attr in ['srcset', 'data-srcset', 'data-src', 'loading']:
                if img.has_attr(attr):
                    del img[attr]

        return soup

    feeds = [
        ('À la une', 'https://www.lemonde.fr/rss/une.xml'),
        ('Économie', 'https://www.lemonde.fr/economie/rss_full.xml'),
        ('International', 'https://www.lemonde.fr/international/rss_full.xml'),
        ('Planète', 'https://www.lemonde.fr/planete/rss_full.xml'),
        ('M le Mag', 'https://www.lemonde.fr/m-le-mag/rss_full.xml')
    ]
alphonk is offline   Reply With Quote
Old 03-17-2026, 11:06 PM   #2
kovidgoyal
creator of calibre
kovidgoyal ought to be getting tired of karma fortunes by now.kovidgoyal ought to be getting tired of karma fortunes by now.kovidgoyal ought to be getting tired of karma fortunes by now.kovidgoyal ought to be getting tired of karma fortunes by now.kovidgoyal ought to be getting tired of karma fortunes by now.kovidgoyal ought to be getting tired of karma fortunes by now.kovidgoyal ought to be getting tired of karma fortunes by now.kovidgoyal ought to be getting tired of karma fortunes by now.kovidgoyal ought to be getting tired of karma fortunes by now.kovidgoyal ought to be getting tired of karma fortunes by now.kovidgoyal ought to be getting tired of karma fortunes by now.
 
kovidgoyal's Avatar
 
Posts: 46,167
Karma: 29626604
Join Date: Oct 2006
Location: Mumbai, India
Device: Various
https://github.com/kovidgoyal/calibr...0fbe313cd5c3c9
kovidgoyal is offline   Reply With Quote
Advert
Reply


Forum Jump

Similar Threads
Thread Thread Starter Forum Replies Last Post
Le monde recipe's issue Acryde Recipes 1 09-14-2017 08:53 AM
Le Monde: updated recipe veezh Recipes 0 03-27-2012 12:49 PM
Recipe for Le Monde subscribers? Thomas92 Recipes 0 12-27-2011 04:50 AM
Improved recipe for Le Monde veezh Recipes 0 02-25-2011 04:14 AM
Updated recipe for Le Monde? veezh Recipes 5 01-20-2011 09:06 PM


All times are GMT -4. The time now is 10:43 PM.


MobileRead.com is a privately owned, operated and funded community.