View Single Post
Old 11-02-2011, 02:39 PM   #12
scissors
Addict
scissors ought to be getting tired of karma fortunes by now.scissors ought to be getting tired of karma fortunes by now.scissors ought to be getting tired of karma fortunes by now.scissors ought to be getting tired of karma fortunes by now.scissors ought to be getting tired of karma fortunes by now.scissors ought to be getting tired of karma fortunes by now.scissors ought to be getting tired of karma fortunes by now.scissors ought to be getting tired of karma fortunes by now.scissors ought to be getting tired of karma fortunes by now.scissors ought to be getting tired of karma fortunes by now.scissors ought to be getting tired of karma fortunes by now.
 
Posts: 241
Karma: 1001369
Join Date: Sep 2010
Device: prs300, kindle keyboard 3g
Thanks for the replies guys

both
colorspace='GRAYColorspace'
img.colorspace='GRAYColorspace'

compile. I added starson's code to the end of the Sun via google recipe.
Images rotate, but are still colour. :-(

I put the command in both IF statements to try and cover all bases, but nothing seems to happen.

Spoiler:
Code:
import urllib, re, mechanize
from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre import __appname__
from calibre.utils.magick import Image, PixelWand

class GoogleReader(BasicNewsRecipe):
    title   = 'The Sun UK Via Google Reader2'
    
    #last updated 30/10/11 tidyied some stray classes

    cover_url = 'http://www.thesun.co.uk/img/global/new-masthead-logo.png'
     
    description = 'A Recipe for The Sun tabloid UK using the google reader recipe. You need to set up a gmail account solely for the reader, then subscribe to the suns RSS feeds at http://www.thesun.co.uk/sol/homepage...icle247949.ece'
    needs_subscription = True
    __author__ = ' Dave Asbury, davec, rollercoaster, Starson17'
    base_url = 'http://www.google.com/reader/atom/'
    oldest_article = 1
    max_articles_per_feed = 20
    get_options = '?n=%d&xt=user/-/state/com.google/read' % max_articles_per_feed
   # use_embedded_content = True

    masthead_url = 'http://www.thesun.co.uk/sol/img/global/Sun-logo.gif'
    #encoding = 'iso-8859-1'
    
    encoding = 'cp1252'
    remove_empty_feeds = True
    remove_javascript     = True
    no_stylesheets = True
    
    extra_css  = '''
	body{ text-align: justify; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:normal;}
                	 '''
    
    preprocess_regexps = [
    (re.compile(r'<div class="foot-copyright".*?</div>', re.IGNORECASE | re.DOTALL), lambda match: '')]
    
      
   
    keep_only_tags = [
                               dict(name='h1'),dict(name='h2',attrs={'class' : 'medium centered'}),
	           dict(name='div',attrs={'class' : 'text-center'}),
	           dict(name='div',attrs={'id' : 'bodyText'})
	           # dict(name='p')
	           ]
    remove_tags=[
	       #dict(name='head'),
	       dict(attrs={'class' : ['mystery-meat-link','ltbx-container','ltbx-var ltbx-hbxpn','ltbx-var ltbx-nav-loop','ltbx-var ltbx-url']}),
                           dict(name='div',attrs={'class' : 'cf'}),
	       dict(attrs={'title' : 'download flash'}),
                           dict(attrs={'style' : 'padding: 5px'})
	      
	       ]
    
   

    def get_browser(self):
        br = BasicNewsRecipe.get_browser(self)
        if self.username is not None and self.password is not None:
            request = urllib.urlencode([('Email', self.username), ('Passwd', self.password),
                                        ('service', 'reader'), ('accountType', 'HOSTED_OR_GOOGLE'), ('source', __appname__)])
            response = br.open('https://www.google.com/accounts/ClientLogin', request)
            auth = re.search('Auth=(\S*)', response.read()).group(1)
            cookies = mechanize.CookieJar()
            br = mechanize.build_opener(mechanize.HTTPCookieProcessor(cookies))
            br.addheaders = [('Authorization', 'GoogleLogin auth='+auth)]
        return br
    

    def get_feeds(self):
        feeds = []
        soup = self.index_to_soup('http://www.google.com/reader/api/0/tag/list')
        for id in soup.findAll(True, attrs={'name':['id']}):
            url = id.contents[0]
            feeds.append((re.search('/([^/]*)$', url).group(1),
                          self.base_url + urllib.quote(url.encode('utf-8')) + self.get_options))
        return feeds

    def postprocess_html(self, soup, first):
        #process all the images. assumes that the new html has the correct path
        for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
            iurl = tag['src']
            img = Image()
            img.open(iurl)
            width, height = img.size
            print 'img is: ', iurl, 'width is: ', width, 'height is: ', height 
            colorspace='GRAYColorspace'
            img.colorspace='GRAYColorspace'
            if img < 0:
                raise RuntimeError('Out of memory')
            pw = PixelWand()
            if( width > height ) :
                print 'Rotate image'
                colorspace='GRAYColorspace'
                img.rotate(pw, -90)
                img.colorspace='GRAYColorspace'
                img.save(iurl)
        return soup
        #auto_cleanup = True

Last edited by scissors; 11-02-2011 at 03:00 PM.
scissors is offline   Reply With Quote