Python uses indentation to "nest" code. Be consistent and use spaces rather than tabs.
Not being an expert I think the idea is you want something close to this:
Code:
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag, NavigableString
class WSWS(BasicNewsRecipe):
title = 'World Socialist Web Site'
__author__ = 'International Committee of The Fourth International'
description = 'WSWS'
no_stylesheets = True
remove_javascript = True
def parse_index(self):
articles = []
soup = self.index_to_soup('http://wsws.org/mobile/')
cover = None
feeds = []
for section in soup.findAll('li'):
section_title = self.tag_to_string(section.find('b'))
articles = []
for post in section.findAll('a', href=True):
url = post['href']
if url.startswith('/'):
url = 'http://www.wsws.org'+url
title = self.tag_to_string(post)
if str(post).find('class=') > 0:
klass = post['class']
if klass != "":
self.log()
self.log('--> post: ', post)
self.log('--> url: ', url)
self.log('--> title: ', title)
self.log('--> class: ', klass)
articles.append({'title':title, 'url':url})
if articles:
feeds.append((section_title, articles))
return feeds
so the idea is you loop through all sections that are identified by "li" entries and then for each entry found use the loop
Code:
for post in section.findAll('a', href=True):
url = post['href']
if url.startswith('/'):
url = 'http://www.wsws.org'+url
title = self.tag_to_string(post)
if str(post).find('class=') > 0:
klass = post['class']
if klass != "":
self.log()
self.log('--> post: ', post)
self.log('--> url: ', url)
self.log('--> title: ', title)
self.log('--> class: ', klass)
articles.append({'title':title, 'url':url})
to append each article to the list of articles