View Single Post
Old 06-09-2012, 07:53 AM   #1
scissors
Addict
scissors ought to be getting tired of karma fortunes by now.scissors ought to be getting tired of karma fortunes by now.scissors ought to be getting tired of karma fortunes by now.scissors ought to be getting tired of karma fortunes by now.scissors ought to be getting tired of karma fortunes by now.scissors ought to be getting tired of karma fortunes by now.scissors ought to be getting tired of karma fortunes by now.scissors ought to be getting tired of karma fortunes by now.scissors ought to be getting tired of karma fortunes by now.scissors ought to be getting tired of karma fortunes by now.scissors ought to be getting tired of karma fortunes by now.
 
Posts: 241
Karma: 1001369
Join Date: Sep 2010
Device: prs300, kindle keyboard 3g
New Musical Express update 9/6/12

added cover
reviews and blogs via feed43 (feeds not working?)
Spoiler:
Code:
from calibre.web.feeds.news import BasicNewsRecipe
from calibre import browser
from calibre.utils.magick import Image
import re
class AdvancedUserRecipe1306061239(BasicNewsRecipe):
    title          = u'New Musical Express Magazine'
    description = 'Author D.Asbury. UK Rock & Pop Mag. '
    __author__ = 'Dave Asbury'
    # last updated 9/6/12
    remove_empty_feeds = True
    remove_javascript     = True
    no_stylesheets = True
    oldest_article = 7
    max_articles_per_feed = 20
    #auto_cleanup = True
    language = 'en_GB'

    def get_cover_url(self):
          soup = self.index_to_soup('http://www.magazinesdirect.com/categories/mens/tv-and-music/')
          cov = soup.find(attrs={'title' : 'NME magazine subscriptions'})
          cov2 = 'http://www.magazinesdirect.com'+cov['src']
          print '***cov =  ',cov2,' ***'
          
          cover_url = str(cov2)
         # print '**** Cov url =*', cover_url,'***'
          #print '**** Cov url =*','http://www.magazinesdirect.com/article_images/articledir_3138/1569221/1_largelisting.jpg','***'
          
          br = browser()
          br.set_handle_redirect(False)
          try:
               br.open_novisit(cov2)
               cover_url = str(cov2)
          except:
                    cover_url = 'http://tawanda3000.files.wordpress.com/2011/02/nme-logo.jpg'
          return cover_url
          masthead_url   = 'http://tawanda3000.files.wordpress.com/2011/02/nme-logo.jpg'

          
          remove_tags = [
	    dict( attrs={'class':'clear_icons'}),
	    dict( attrs={'class':'share_links'}),
                        dict( attrs={'id':'right_panel'}),
	    dict( attrs={'class':'today box'}),
	                          

                      ]	

    keep_only_tags = [

	dict(name='h1'),
	#dict(name='h3'),
	dict(attrs={'class' :  'BText'}),
	dict(attrs={'class' :  'Bmore'}),
	dict(attrs={'class' : 'bPosts'}),
	dict(attrs={'class' :  'text'}),
	dict(attrs={'id' :  'article_gallery'}),
                    #dict(attrs={'class' :  'image'}),
	dict(attrs={'class' :  'article_text'})

]


	         

    feeds          = [
	(u'NME News', u'http://feeds2.feedburner.com/nmecom/rss/newsxml'),
	#(u'Reviews', u'http://feeds2.feedburner.com/nme/SdML'),
	(u'Reviews',u'http://feed43.com/4138608576351646.xml'),
                    (u'Bloggs',u'http://feed43.com/3326754333186048.xml'),
	
	]
    extra_css = '''
                    h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
                    h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
                    p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
                    body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
		'''

Last edited by scissors; 06-09-2012 at 01:13 PM. Reason: extra css for fonts added
scissors is offline   Reply With Quote