# coding: ISO-8859-1
##    Copyright (C) 2007 Kovid Goyal kovid@kovidgoyal.net
##    This program is free software; you can redistribute it and/or modify
##    it under the terms of the GNU General Public License as published by
##    the Free Software Foundation; either version 2 of the License, or
##    (at your option) any later version.
##
##    This program is distributed in the hope that it will be useful,
##    but WITHOUT ANY WARRANTY; without even the implied warranty of
##    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
##    GNU General Public License for more details.
##
##    You should have received a copy of the GNU General Public License along
##    with this program; if not, write to the Free Software Foundation, Inc.,
##    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
'''
Profile to download LE MONDE - French newspaper
'''
import re
from libprs500.ebooks.lrf.web.profiles import DefaultProfile

class MONDE(DefaultProfile):
    
    title = 'Le Monde'
    max_recursions = 2
    max_articles_per_feed = 15    # Maximum number of articles to download from each feed
    timefmt  = ' [%d %b %Y]'
    html_description = True
    no_stylesheets = True
    oldest_article = 10           # Articles only 10 day old.
    
   
    DEBUT='<head>'
    TITRE='<title>'
    FIN_TITRE='</title>'
    ARTICLE='<div class="articleText">'
    FIN_ARTICLE='</div><!-- end class="articleText" -->'
    DEPECHE='<div class="ar-txt">'
    FIN_DEPECHE='<!-- /gab-pv-depeches_accueil.php -->'
    
    REG0=r'<script>.*?</script>' # strip scripts
    REG1 = r''+DEBUT+'.*?'+TITRE+'(.*?)'+FIN_TITRE+'.*?'+ARTICLE+'(.*?)'+FIN_ARTICLE+'.*?</html>'      #article type 1 
    REG2 = r''+DEBUT+'.*?'+TITRE+'(.*?)'+FIN_TITRE+'.*?'+DEPECHE+'(.*?)'+FIN_DEPECHE+'.*?</html>'      #article 2 "Depeches"
    REG3= r'<head>.*?<div class=.?type-gr.?>([\sa-zA-Z][\snohiar][\sfladntrs)](?!oni).*?)</div>.*?</html>' #article can't be displayed
    preprocess_regexps = [(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in [
        
	 
	  (REG0,lambda match: ' '),
          (REG1,lambda match : '<html><h2>'+ match.group(1)+'</h2>'+'<br>'+match.group(2)+'<br></html>'),   
          (REG2,lambda match : '<html><h2>'+ match.group(1)+'</h2>'+'<br>'+match.group(2)+'<br>*** </html>'),    
	  (REG3,lambda match: '<htlm><Article inexistant- '+match.group(1)+'</html>'),     
	     
        ] ]


        
    def get_feeds(self):
        return [
		
		 ('International','http://www.lemonde.fr/rss/sequence/0,2-3210,1-0,0.xml'),
		 ('Opinions','http://www.lemonde.fr/rss/sequence/0,2-3232,1-0,0.xmlRebonds'),
		 #('Societe','http://www.lemonde.fr/rss/sequence/0,2-3224,1-0,0.xml'),
		 #('Livres','http://www.lemonde.fr/rss/sequence/0,2-3260,1-0,0.xml'),
		 #('Cinema','http://www.lemonde.fr/rss/sequence/0,2-3476,1-0,0.xml'),
		 #('Politique','http://www.lemonde.fr/rss/fil/0,57-0,64-823353,0.xml'),
		 #('Elections Americaines','http://www.lemonde.fr/rss/sequence/0,2-829254,1-0,0.xml'),
		 #('Economie','http://www.lemonde.fr/rss/sequence/0,2-3234,1-0,0.xml'),
		 ('A la Une','http://www.lemonde.fr/rss/sequence/0,2-3208,1-0,0.xml'),
                 #('Rendez vous','http://www.lemonde.fr/rss/sequence/0,2-3238,1-0,0.xml'),
		 #('Europe','http://www.lemonde.fr/rss/sequence/0,2-3214,1-0,0.xml'),
		 #('Media','http://www.lemonde.fr/rss/sequence/0,2-3236,1-0,0.xml'),
		 #('Sports','http://www.lemonde.fr/rss/sequence/0,2-3242,1-0,0.xml'),
		 #('Environnement Sciences','http://www.lemonde.fr/rss/sequence/0,2-3244,1-0,0.xml'),
		 #('Culture','http://www.lemonde.fr/rss/sequence/0,2-3246,1-0,0.xml'),
		 #('Technologies','http://www.lemonde.fr/rss/sequence/0,2-651865,1-0,0.xml'),
		 #('Voyages','http://www.lemonde.fr/rss/sequence/0,2-3546,1-0,0.xml'),
		 #('Examens','http://www.lemonde.fr/rss/sequence/0,2-3404,1-0,0.xml'),
		 #('Municipales','http://www.lemonde.fr/rss/fil/0,57-0,64-987718,0.xml')
		   ]

    
