but i need some help fixing it.
i marked it HERE in the code. i want to format the url as u"www....com" i am giving it a simple string. i tried ' and " and []. still cant get the syntax right. can i get some help with that?
Spoiler:
Code:
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, re
class AlisonB(BasicNewsRecipe):
title = 'blah'
__author__ = 'Tonythebookworm'
description = 'blah'
language = 'en'
no_stylesheets = True
publisher = 'Tonythebookworm'
category = 'column'
use_embedded_content= False
no_stylesheets = True
oldest_article = 24
remove_javascript = True
remove_empty_feeds = True
max_articles_per_feed = 10
INDEX = '"http://maya.tase.co.il/'
def parse_index(self):
feeds = []
for title, url in [
(u"Feed", u"http://maya.tase.co.il/bursa/index.asp?view=search&company_group=3000&arg_comp=&srh_comp_lb=1007&srh_from=2010-01-01&srh_until=2010-09-28&srh_anaf=-1&srh_event=9999&is_urgent=0&srh_company_press="),
]:
articles = self.make_links(url)
if articles:
feeds.append((title, articles))
return feeds
def make_links(self, url):
title = 'Temp'
current_articles = []
soup = self.index_to_soup(url)
print 'url is', url
print 'The soup is: ', soup
for item in soup.findAll('a',attrs={'class':'A3'}):
print 'item is: ',item
#link = item.find('a')
#titlecheck = self.tag_to_string(link)
#url_test = re.search('javascript', item['href'])
if not re.search('javascript', item['href']):
temp3= self.INDEX + item['href'] #HERE
# temp2=[temp3]
print 'url1 is', temp2
soup1 = self.index_to_soup(temp3) #AND HERE
print 'the new soup is', temp2
print '6714'
for item1 in soup1.findAll('iframe'):
print 'item1 is:' , item1
print 'FOUND GOOD URL'
url = item1['src']
print 'url is: ', url
title = self.tag_to_string(item)
print 'title is: ', title
current_articles.append({'title': title, 'url': url, 'description':'', 'date':''}) # append all this
return current_articles