Oops, sorry, I made an update to the Mediapart one some time ago, but forgot to share.
I've switched to the print version, which they much improved on the site (most of the code existed but was commented out)
Spoiler:
Code:
__license__ = 'GPL v3'
__copyright__ = '2009, Mathieu Godlewski <mathieu at godlewski.fr>; 2010, 2011, Louis Gesbert <meta at antislash dot info>'
'''
Mediapart
'''
import re
from calibre.ebooks.BeautifulSoup import BeautifulSoup
from calibre.ebooks.BeautifulSoup import Tag
from calibre.web.feeds.news import BasicNewsRecipe
class Mediapart(BasicNewsRecipe):
title = 'Mediapart'
__author__ = 'Mathieu Godlewski, Louis Gesbert'
description = 'Global news in french from news site Mediapart'
oldest_article = 7
language = 'fr'
needs_subscription = True
max_articles_per_feed = 50
no_stylesheets = True
cover_url = 'http://static.mediapart.fr/files/pave_mediapart.jpg'
feeds = [
('Les articles', 'http://www.mediapart.fr/articles/feed'),
]
# -- print-version
preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE|re.DOTALL), i[1]) for i in
[
(r'<div class="print-title">([^>]+)</div>', lambda match : '<h2>'+match.group(1)+'</h2>'),
(r'\'', lambda match: '’')
]
]
remove_tags = [ dict(name='div', attrs={'class':'print-source_url'}) ]
def print_version(self, url):
raw = self.browser.open(url).read()
soup = BeautifulSoup(raw.decode('utf8', 'replace'))
link = soup.find('a', {'title':'Imprimer'})
if link is None:
return None
return link['href']
# -- Handle login
def get_browser(self):
br = BasicNewsRecipe.get_browser()
if self.username is not None and self.password is not None:
br.open('http://www.mediapart.fr/')
br.select_form(nr=0)
br['name'] = self.username
br['pass'] = self.password
br.submit()
return br
I don't have a fix for rue89 right now though, I'll try to find the time to look into it.