Thread: maya recipe
View Single Post
Old 10-01-2010, 03:12 AM   #5
marbs
Zealot
marbs began at the beginning.
 
Posts: 122
Karma: 10
Join Date: Jul 2010
Device: nook
i went over it again

and you are right, i works.
so i wanted to take it to the next step. on the urls that you found, there is the clean version of the reports i am trying to get.
it is the "src" attr from the iframe tag (in some cases, i want to do this step by step).
so i added a sub function. i gave it all the information in needs to do what you did.
Spoiler:
Code:
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, re
class AlisonB(BasicNewsRecipe):
    title      = 'blah'
    __author__ = 'Tonythebookworm'
    description = 'blah'
    language = 'en'
    no_stylesheets = True
    publisher           = 'Tonythebookworm'
    category            = 'column'
    use_embedded_content= False
    no_stylesheets      = True
    oldest_article      = 24
    remove_javascript   = True
    remove_empty_feeds  = True
    
    max_articles_per_feed = 10
    INDEX = '"http://maya.tase.co.il/'

    def make_links1(self, url, title, description, date):
        title = 'Temp1'
        current_articles1 = []
        soup = self.index_to_soup(url)
        for item in soup.findAll('iframe'):
             print 'FOUND GOOD URL'
             url         =  item['src']
             print 'url is: ', url

        current_articles1.append({'title': title, 'url': url, 'description':'', 'date':''}) # append all this           
        return current_articles1
    

    def parse_index(self):
        feeds = []
        for title, url in [
                            (u"Feed", u"http://maya.tase.co.il/bursa/index.asp?view=search&company_group=3000&arg_comp=&srh_comp_lb=1007&srh_from=2010-01-01&srh_until=2010-09-28&srh_anaf=-1&srh_event=9999&is_urgent=0&srh_company_press="),
                            
                            
                             ]:
            articles = self.make_links(url)
            if articles:
                feeds.append((title, articles))
        return feeds
        
    def make_links(self, url):
        title = 'Temp'
        current_articles = []
        soup = self.index_to_soup(url)
        print 'The soup is: ', soup
        for item in soup.findAll('a',attrs={'class':'A3'}):
            print 'item is: ',item
            #link = item.find('a')
            #titlecheck = self.tag_to_string(link)
            #url_test = re.search('javascript', item['href'])
           
            if not re.search('javascript', item['href']):
               title       = self.tag_to_string(item)
               print 'title is: ', title
               current_articles=make_links1(url, title, description, date)
  
        return current_articles

when i run it, i get "NameError: global name 'make_links1' is not defined"
it looks right to me, i have no idea what i did wrong.
marbs is offline   Reply With Quote