View Single Post
Old 05-27-2013, 04:09 PM   #15
Camper65
Enthusiast
Camper65 began at the beginning.
 
Posts: 32
Karma: 10
Join Date: Apr 2011
Device: Kindle wifi; Dell 2in1
Smile

Kovid,

Thank you!!!! That did it. The articles now download like normal and do not have that extra page in there.

Here is the updated recipe again so you can use it next time you do updates to Calibre.

Also, thank you for creating such a great ebook organizer/news download program.

Code:
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, BeautifulStoneSoup
from calibre.web.feeds import feed_from_xml, templates, feeds_from_index, Feed
from calibre.ptempfile import PersistentTemporaryFile

class dotnetMagazine (BasicNewsRecipe):
    __author__ = u'Bonni Salles - post in forum if questions for me'
    __version__ = '1.1'
    __license__   = 'GPL v3'
    __copyright__ = u'2013, Bonni Salles'
    title                 = '.net magazine'
    oldest_article        = 7
    no_stylesheets        = True
    encoding              = 'utf8'
    use_embedded_content  = False
    #recursion = 1
    language              = 'en'
    remove_empty_feeds    = True
    extra_css             = ' body{font-family: Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} '
    cover_url = u'http://media.netmagazine.futurecdn.net/sites/all/themes/netmag/logo.png'

    remove_tags_after = dict(name='footer', id=lambda x:not x)     
    remove_tags_before = dict(name='header', id=lambda x:not x)


    remove_tags = [
         dict(name='div', attrs={'class': 'item-list'}),
         dict(name='h4', attrs={'class': 'std-hdr'}),
         dict(name='div', attrs={'class': 'item-list share-links'}), #removes share links
         dict(name=['script', 'noscript']),
         dict(name='div', attrs={'id': 'comments-form'}), #comment these out if you want the comments to show
         dict(name='div', attrs={'id': re.compile('advertorial_block_($|| )')}),
         dict(name='div', attrs={'id': 'right-col'}),
         dict(name='div', attrs={'id': 'comments'}), #comment these out if you want the comments to show
         dict(name='div', attrs={'class': 'item-list related-content'}),

         ]
         
    feeds = [
               (u'net', u'http://feeds.feedburner.com/net/topstories?format=xml')
            ]
  
    def skip_ad_pages(self, soup):
          text = soup.find(text='click here to continue to article')
          if text:
              a = text.parent
              url = a.get('href')
              if url:
                return self.index_to_soup(url, raw=True)
Camper65 is offline   Reply With Quote