MobileRead Forums - View Single Post

mufc · 11-27-2010, 11:01 AM

oK I did my own recipe for Macleans and this works.

Code:

class AdvancedUserRecipe1289709253(BasicNewsRecipe):
    title          = u'Macleans Magazine'
    oldest_article = 7
    max_articles_per_feed = 100
    use_embedded_content  = False
    no_stylesheets = True

    remove_javascript     = True
    extra_css      = '.headline {font-size: x-large;} \n .fact { padding-top: 10pt  }'

    remove_tags_before = dict(id='')
    remove_tags = [dict(name='div', attrs={'class':['wp-caption','']}),
                   dict(name='div', attrs={'id':['headerimg','footer',]}),
                   dict(name='ul', attrs={'class':['']}),
                   dict(name='ul', attrs={'id':['']}),
				   dict(name='ol', attrs={'id':['']}),
				   dict(name='span', attrs={'class':['']}),
				   dict(name='p', attrs={'class':'postmetadata'}),
                   dict(name='img'),]

    feeds          = [(u'Canada', u'http://www2.macleans.ca/category/canada/feed/')
                          
                          ]
						  
    def preprocess_html(self, soup):
        for alink in soup.findAll('a'):
            if alink.string is not None:
               tstr = alink.string
               alink.replaceWith(tstr)
        return soup


    def print_version(self, url):
        return url + "print/"

But Men's Fitness would not

Code:

class AdvancedUserRecipe1289709253(BasicNewsRecipe):
  title = u'test'
  oldest_article = 7
  max_articles_per_feed = 100
  use_embedded_content = False
  no_stylesheets = True

  remove_javascript = True
  extra_css = '.headline {font-size: x-large;} \n .fact { padding-top: 10pt }'






  feeds = (u'News', u'http://www.mensfitness.com/rss_global/')



  def preprocess_html(self, soup):
  for alink in soup.findAll('a'):
  if alink.string is not None:
  tstr = alink.string
  alink.replaceWith(tstr)
  return soup

  def print_version(self, url):
      return url + "?print=1"

I have since redone Men's Fitness using to remove_tags etc but being OCD
I want to get the print option correct.

Code:

class AdvancedUserRecipe1289709253(BasicNewsRecipe):
    title          = u'Mens Fitness'
    oldest_article = 7
    max_articles_per_feed = 100
    use_embedded_content  = False
    no_stylesheets = True

    remove_javascript     = True
    extra_css      = '.headline {font-size: x-large;} \n .fact { padding-top: 10pt  }'


    keep_only_tags = [dict(name='div', attrs={'id':'content_items'})]
    remove_attributes = ['style', '']

    remove_tags_before = dict(id='')
    remove_tags = [dict(name='div', attrs={'class':['','']}),
                   dict(name='div', attrs={'id':['','',]}),
                   dict(name='ul', attrs={'class':['']}),
                   dict(name='ul', attrs={'id':['']}),
	dict(name='ol', attrs={'id':['']}),
	dict(name='span', attrs={'class':['']}),
	dict(name='p', attrs={'id':''}),
                   dict(name='img'),]

    feeds          = (u'News', u'http://www.mensfitness.com/rss_global/')



    def preprocess_html(self, soup):
        for alink in soup.findAll('a'):
            if alink.string is not None:
               tstr = alink.string
               alink.replaceWith(tstr)
        return soup