i changed it to fit. mu 2nd call of soup it not opening the url (temp2) and souping it(the html file that the url leeds to). it is just souping the url it self.what am i doing wrong?
Spoiler:
Code:
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, re
class AlisonB(BasicNewsRecipe):
title = 'blah'
__author__ = 'Tonythebookworm'
description = 'blah'
language = 'en'
no_stylesheets = True
publisher = 'Tonythebookworm'
category = 'column'
use_embedded_content= False
no_stylesheets = True
oldest_article = 24
remove_javascript = True
remove_empty_feeds = True
max_articles_per_feed = 10
INDEX = '"http://maya.tase.co.il/'
def parse_index(self):
feeds = []
for title, url in [
(u"Feed", u"http://maya.tase.co.il/bursa/index.asp?view=search&company_group=3000&arg_comp=&srh_comp_lb=1007&srh_from=2010-01-01&srh_until=2010-09-28&srh_anaf=-1&srh_event=9999&is_urgent=0&srh_company_press="),
]:
articles = self.make_links(url)
if articles:
feeds.append((title, articles))
return feeds
def make_links(self, url):
title = 'Temp'
current_articles = []
soup = self.index_to_soup(url)
print 'The soup is: ', soup
for item in soup.findAll('a',attrs={'class':'A3'}):
print 'item is: ',item
#link = item.find('a')
#titlecheck = self.tag_to_string(link)
#url_test = re.search('javascript', item['href'])
if not re.search('javascript', item['href']):
temp2= self.INDEX + item['href']
print 'url1 is', temp2
soup1 = self.index_to_soup(temp2)
print 'the new soup is', temp2
print '6714'
for item1 in soup1.findAll('iframe'):
print 'item1 is:' , item1
print 'FOUND GOOD URL'
url = item1['src']
print 'url is: ', url
title = self.tag_to_string(item)
print 'title is: ', title
current_articles.append({'title': title, 'url': url, 'description':'', 'date':''}) # append all this
return current_articles