and you are right, i works.
so i wanted to take it to the next step. on the urls that you found, there is the clean version of the reports i am trying to get.
it is the "src" attr from the iframe tag (in some cases, i want to do this step by step).
so i added a sub function. i gave it all the information in needs to do what you did.
Spoiler:
Code:
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, re
class AlisonB(BasicNewsRecipe):
title = 'blah'
__author__ = 'Tonythebookworm'
description = 'blah'
language = 'en'
no_stylesheets = True
publisher = 'Tonythebookworm'
category = 'column'
use_embedded_content= False
no_stylesheets = True
oldest_article = 24
remove_javascript = True
remove_empty_feeds = True
max_articles_per_feed = 10
INDEX = '"http://maya.tase.co.il/'
def make_links1(self, url, title, description, date):
title = 'Temp1'
current_articles1 = []
soup = self.index_to_soup(url)
for item in soup.findAll('iframe'):
print 'FOUND GOOD URL'
url = item['src']
print 'url is: ', url
current_articles1.append({'title': title, 'url': url, 'description':'', 'date':''}) # append all this
return current_articles1
def parse_index(self):
feeds = []
for title, url in [
(u"Feed", u"http://maya.tase.co.il/bursa/index.asp?view=search&company_group=3000&arg_comp=&srh_comp_lb=1007&srh_from=2010-01-01&srh_until=2010-09-28&srh_anaf=-1&srh_event=9999&is_urgent=0&srh_company_press="),
]:
articles = self.make_links(url)
if articles:
feeds.append((title, articles))
return feeds
def make_links(self, url):
title = 'Temp'
current_articles = []
soup = self.index_to_soup(url)
print 'The soup is: ', soup
for item in soup.findAll('a',attrs={'class':'A3'}):
print 'item is: ',item
#link = item.find('a')
#titlecheck = self.tag_to_string(link)
#url_test = re.search('javascript', item['href'])
if not re.search('javascript', item['href']):
title = self.tag_to_string(item)
print 'title is: ', title
current_articles=make_links1(url, title, description, date)
return current_articles
when i run it, i get "NameError: global name 'make_links1' is not defined"
it looks right to me, i have no idea what i did wrong.