from calibre.web.feeds.news import BasicNewsRecipe from calibre.ebooks.BeautifulSoup import Tag, NavigableString class WSWS(BasicNewsRecipe): title = 'World Socialist Web Site' __author__ = 'International Committee of The Fourth International' description = 'WSWS' no_stylesheets = True remove_javascript = True def parse_index(self): articles = [] soup = self.index_to_soup('http://wsws.org/mobile/') cover = None feeds = [] for section in soup.findAll('div', attrs={'class':'content'}): print 'A section was found!' section_title = self.tag_to_string(section.find('b')) articles = [] for post in section.findAll('a', href=True): print 'A post was found!' url = post['href'] if url.startswith('/'): url = 'http://www.wsws.org'+url title = self.tag_to_string(post) if str(post).find('class=') > 0: print 'A class was found in the post!' klass = post['class'] if klass != "": print 'A klass was found!' self.log() self.log('--> post: ', post) self.log('--> url: ', url) self.log('--> title: ', title) self.log('--> class: ', klass) articles.append({'title':title, 'url':url}) if articles: feeds.append((section_title, articles)) return feeds