Kovid I modified it to use the recursive and is_link_wanted but can't get the is_link_wanted right. Can you help with what I should be using to find the url that corresponds to li class=pager-next or use the href that correspondence to the "next" entry so that I can get the second page (or more) properly. I am again posting the recipe so you can see the updates.
Spoiler:
__license__ = 'GPL v3'
Code:
__copyright__ = '2014, Bonni Salles - post in forum for help'
'''
Creative Blog (formerly .net magazine)
'''
from calibre.web.feeds.news import BasicNewsRecipe
class creativeblog(BasicNewsRecipe):
title = u'Creative Blog (formerly .Net magazine)'
__author__ = 'Bonni Salles'
oldest_article = 7
publication_type = 'blog'
max_articles_per_feed = 100
description = 'Web Design and Tutorials from Creative Blog (part of .Net Magazine and others)'
publisher = 'Creative Blog'
category = 'internet, web design'
language = 'en'
encoding = 'utf-8'
ignore_duplicate_articles = {'title', 'url'}
remove_empty_feeds = True
auto_cleanup = True
recursions = 1
# presently this is set to download the whole group of blogs for the feed. If you want
# to limit it to the specific sections of the blog that you want to download.
feeds = [
(u'Creative Blog', u'http://www.creativebloq.com/feed/'),
# (u'3D', u'http://www.creativebloq.com/feed/3d'),
# (u'Adobe', u'http://www.creativebloq.com/feed/adobe'),
# (u'Animation', u'http://www.creativebloq.com/feed/animation'),
# (u'Apple', u'http://www.creativebloq.com/feed/apple'),
# (u'Branding', u'http://www.creativebloq.com/feed/branding'),
# (u'Graphic Design', u'http://www.creativebloq.com/feed/graphic-design'),
# (u'Illustration', u'http://www.creativebloq.com/feed/illustration'),
# (u'News', u'http://www.creativebloq.com/feed/news'),
# (u'Opinion', u'http://www.creativebloq.com/feed/opinion'),
# (u'Tutorials', u'http://www.creativebloq.com/feed/tutorial'),
# (u'Typography', u'http://www.creativebloq.com/feed/typography'),
# (u'Video', u'http://www.creativebloq.com/feed/video'),
# (u'web design', u'http://www.creativebloq.com/feed/web-design'),
]
def is_link_wanted(self, url, tag):
ans = re.match(self, url, '<a href="*/page-*">') is not None
if ans:
self.log('Following multipage link: %s'%url)
return ans