View Single Post
Old 02-29-2012, 05:25 PM   #46
ubieubie
Member
ubieubie began at the beginning.
 
Posts: 10
Karma: 10
Join Date: Dec 2010
Device: none
went ahead and posted the whole recipe maybe you can show me where i am supposed to be adding it. Im sure that is what the issue is. Bold is the line I added I have other folders to add but thats the only one ive tried. Thanks and sorry for the numerous posts.



import urllib, sys
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe

class AdvancedUserRecipe1299694372(BasicNewsRecipe):
title = u'Instapaper'
__author__ = 'Darko Miletic, Stanislav Khromov'
publisher = 'Instapaper.com'
category = 'info, custom, Instapaper'
feeds = []

#if all feeds are empty, no periodical will be generated
remove_empty_feeds = True

# - Recipe configuration - #

#Set archive to True if you want to auto-archive items
archive = False
#Set include_liked to true if you want to include liked items in your feed.
#Warning: Please avoid enabling this in conjunction with auto-archive.
include_liked = False
#Set to true if you want to reverse article order
reverse_article_order = False
#Set the maximum amount of articles you'd like to receive here
max_articles_per_feed = 30

#Set your own custom folders here, simply re-paste the example line below for each new folder you need
#Example:
#feeds+=[(u'My custom folder', u'http://www.instapaper.com/u/folder/1321400/my-custom-folder')]
#feeds+=[(u'Long-term reading', u'http://www.instapaper.com/u/folder/1232089/long-term-reading')]
feeds+=[(u'Genesis Reviews', u'http://www.instapaper.com/u/folder/1538365/genesis-reviews')]
# - End of recipe configuration - #

oldest_article = 0
no_stylesheets = True
remove_javascript = True
remove_tags = [
dict(name='div', attrs={'id':'text_controls_toggle'})
,dict(name='script')
,dict(name='div', attrs={'id':'text_controls'})
,dict(name='div', attrs={'id':'editing_controls'})
,dict(name='div', attrs={'class':'bar bottom'})
,dict(name='div', attrs={'id':'controlbar_container'})
,dict(name='div', attrs={'id':'footer'})
]
use_embedded_content = False
needs_subscription = True
INDEX = u'http://www.instapaper.com'
LOGIN = INDEX + u'/user/login'

if include_liked:
feeds = feeds+[(u'Instapaper Liked', u'http://www.instapaper.com/starred')]
feeds = feeds+[(u'Instapaper Unread', u'http://www.instapaper.com/u')]
feeds.reverse()

cleanup_items = []

def get_browser(self):
br = BasicNewsRecipe.get_browser()
if self.username is not None:
br.open(self.LOGIN)
br.select_form(nr=0)
br['username'] = self.username
if self.password is not None:
br['password'] = self.password
br.submit()
return br

def parse_index(self):
totalfeeds = []
lfeeds = self.get_feeds()

for feedobj in lfeeds:
feedtitle, feedurl = feedobj
self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
articles = []

#folder page
current_page = 1

#so we run the first time
articles_on_page = 1

while articles_on_page>0:
soup = self.index_to_soup(feedurl+u'/'+str(current_page))

#get and count number of items on current page
items = soup.findAll('a', attrs={'class':'actionButton textButton'})
cleanup = soup.findAll('a', attrs={'class':'actionButton archiveButton'})

#Add cleanup items
for clean_item in cleanup:
self.cleanup_items.append(clean_item);

articles_on_page = len(items)

# Add each item
for item in items:
description = self.tag_to_string(item.div)
#print atag
#sys.exit()
if item and item.has_key('href'):
url = item['href']
articles.append({
'url' :url
})

current_page+=1

#Don't append empty feeds
if len(articles)!=0:
totalfeeds.append((feedtitle, articles))

if len(totalfeeds)==0:
return None
else:
return totalfeeds

def print_version(self, url):
return 'http://www.instapaper.com' + url

def populate_article_metadata(self, article, soup, first):
#adds the title to the metadata
article.title = soup.find('title').contents[0].strip()

def postprocess_html(self, soup, first_fetch):
#adds the title to each story, as it is not always included
for link_tag in soup.findAll(attrs={"id" : "story"}):
link_tag.insert(0,'<h1>'+soup.find('title').conten ts[0].strip()+'</h1>')

#print repr(soup)
return soup

def cleanup(self):
if self.archive:
self.report_progress(0, "Archiving articles...")
# Clean up each item
for cleanup_item in self.cleanup_items:
self.report_progress(0, "Cleaning up... : " + self.INDEX+cleanup_item['href'])
self.browser.open(self.INDEX+cleanup_item['href']);
ubieubie is offline   Reply With Quote