View Single Post
Old 11-18-2010, 12:56 PM   #3
marbs
Zealot
marbs began at the beginning.
 
Posts: 122
Karma: 10
Join Date: Jul 2010
Device: nook
got it

thanks kovid
ready to be built in:
Code:
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import re

class AdvancedUserRecipe1283848012(BasicNewsRecipe):
    description   = 'This is a recipe of Globs.co.il.'
    cover_url      = 'http://www.the7eye.org.il/SiteCollectionImages/BAKTANA/arye_avnery_010709_377.jpg'
    title          = u'Globes'
    language              = 'he'
    __author__ = 'marbs'
    extra_css='img {max-width:100%;} body{direction: rtl;max-width:100%;}title{direction: rtl; } article_description{direction: rtl; }, a.article{direction: rtl;max-width:100%;} calibre_feed_description{direction: rtl; }'
    simultaneous_downloads = 5
    remove_javascript     = True
    timefmt        = '[%a, %d %b, %Y]'
    oldest_article = 1
    max_articles_per_feed = 100
    remove_attributes = ['width','style']


    feeds          = [(u'שוק ההון', u'http://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=585'),
                           (u'נדל"ן', u'http://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=607'),
                           (u'וול סטריט ושווקי העולם', u'http://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=1225'),
                           (u'ניתוח טכני', u'http://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=1294'),
                           (u'היי טק', u'http://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=594'),
                           (u'נתח שוק וצרכנות', u'http://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=821'),
                           (u'דין וחשבון', u'http://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=829'),
                           (u'רכב', u'http://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=3220'),
                           (u'דעות', u'http://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=845'),
                           (u'קניון המניות - טור שבועי', u'http://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=3175'),
                           (u'סביבה', u'http://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=3221')]

    def print_version(self, url):
        split1 = url.split("=")
        print_url = 'http://www.globes.co.il/serve/globes/printwindow.asp?did=' + split1[1]
        return print_url


    def preprocess_html(self, soup):
        soup.find('tr',attrs={'bgcolor':'black'}).findPrevious('tr').extract()
        soup.find('tr',attrs={'bgcolor':'black'}).extract()
        print 'soup is',soup,'end of soup'
        return soup

    def fixChars(self,string):
        # Replace lsquo (\x91)
        fixed = re.sub("■","■",string)
        return fixed

Last edited by kovidgoyal; 11-18-2010 at 01:08 PM.
marbs is offline   Reply With Quote