View Single Post
Old 09-25-2010, 12:17 PM   #6
TonytheBookworm
Addict
TonytheBookworm is on a distinguished road
 
TonytheBookworm's Avatar
 
Posts: 264
Karma: 62
Join Date: May 2010
Device: kindle 2, kindle 3, Kindle fire
Try this code. and let me know if this is what you wanted.
Spoiler:

Code:
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, re
class JerusalemPost(BasicNewsRecipe):
    title = 'Jerusalem post'
    language = 'fr'
    __author__ = 'TonytheBookworm'
    description = 'news'
    publisher = 'jpost'
    category = 'news'
    oldest_article = 30
    max_articles_per_feed = 100
    linearize_tables = True
    no_stylesheets = True
    remove_javascript   = True
    
    masthead_url = 'http://static.jpost.com/JPSITES/images/JFrench/2008/site/jplogo.JFrench.gif'
   
    remove_tags = [
                   dict(name='a', attrs={'href':['javascript:window.print()']}),
                   dict(name='div', attrs={'class':['bot']}),
       
                   ]
    
    feeds          = [
                      ('NEWS', 'http://fr.jpost.com/servlet/Satellite?collId=1216805762036&pagename=JFrench%2FPage%2FRSS'),
                      ('JFrench En route vers la paix', 'http://fr.jpost.com/servlet/Satellite?collId=1216805762201&pagename=JFrench%2FPage%2FRSS'),
                      ('JFrench Politique', 'http://fr.jpost.com/servlet/Satellite?collId=1215356737334&pagename=JFrench%2FPage%2FRSS'),
                      ('JFrench Securite', 'http://fr.jpost.com/servlet/Satellite?collId=1215356737338&pagename=JFrench%2FPage%2FRSS'),
                      ('JFrench Moyen Orient', 'http://fr.jpost.com/servlet/Satellite?collId=1215356737342&pagename=JFrench%2FPage%2FRSS'),
                      ('JFrench Diplomatie / Monde', 'http://fr.jpost.com/servlet/Satellite?collId=1215356737346&pagename=JFrench%2FPage%2FRSS'),
                      ('JFrench Economie / Sciences', 'http://fr.jpost.com/servlet/Satellite?collId=1215356737358&pagename=JFrench%2FPage%2FRSS'),
                      ('JFrench Societe', 'http://fr.jpost.com/servlet/Satellite?collId=1215356737354&pagename=JFrench%2FPage%2FRSS'),
                      ('JFrench Opinions', 'http://fr.jpost.com/servlet/Satellite?collId=1215356737350&pagename=JFrench%2FPage%2FRSS'),
                      ('JFrench Monde juif', 'http://fr.jpost.com/servlet/Satellite?collId=1215356737366&pagename=JFrench%2FPage%2FRSS'),
                      ('JFrench Culture / Sport', 'http://fr.jpost.com/servlet/Satellite?collId=1215356737362&pagename=JFrench%2FPage%2FRSS')
                    ]
    def print_version(self, url):
        split1 = url.split("cid=")
        #for testing only -------
        #print 'SPLIT IS: ', split1
        #print 'ORG URL IS: ', url
        #---------------------------
        idnum = split1[1] # get the actual value of the id article
        #for testing only --------------------
        #print 'the idnum is: ', idnum
        #-------------------------------------- 
        print_url = 'http://fr.jpost.com/servlet/Satellite?cid=' + idnum + '&pagename=JFrench%2FJPArticle%2FPrinter'
        #for testing only -------------------------
        #print 'PRINT URL IS: ', print_url
        #------------------------------------------
        return print_url
      
    #example of how links should be formatted
    #--------------------------------------------------------------------------------------------------------------              
    #org   version =  http://fr.jpost.com/servlet/Satellite?pagename=JFrench/JPArticle/ShowFull&cid=1282804806075
    #print version =  http://fr.jpost.com/servlet/Satellite?cid=1282804806075&pagename=JFrench%2FJPArticle%2FPrinter
    #------------------------------------------------------------------------------------------------------------------
TonytheBookworm is offline   Reply With Quote