I managed to create a recipe to NLR(based on
oneilpt example), it works fairly well although it has a minor problem with some symbols, (they are replaced others). But that is not a big deal for me. I know the code is far from perfect.
At the end tried to had the code to insert login/password, but when fetching keep receiving an annoying message.
Here is the CODE:
Code:
'''
http://newleftreview.org
'''
from calibre.web.feeds.news import BasicNewsRecipe
class NewLeftReview(BasicNewsRecipe):
title = 'New Left Review2'
description = ''
publisher = 'NLR'
category = 'analisys, politics, left'
needs_subscription = True
no_stylesheets = True
encoding = 'cp1252'
use_embedded_content = False
language = 'en'
publication_type = 'magazine'
masthead_url = 'http://newleftreview.org/images/logo.gif'
index = 'http://newleftreview.org/'
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
}
remove_attributes=['width','height']
remove_tags_before = dict(name='h4', attrs={'id':'publisher'})
remove_tags=[dict(name='ul', attrs={'id':'articlelogolist'}),
dict(name='div', attrs={'id':'bottom'})
]
def get_browser(self):
br = BasicNewsRecipe.get_browser()
if self.username is not None and self.password is not None:
br.open('http://www.newleftreview.org/?page=login')
br.select_form(name='login')
br['USERID'] = self.username
br['PASSWORD'] = self.password
br.submit()
return br
def parse_index(self):
articles = []
soup = self.index_to_soup(self.index)
cover = None
feeds = []
for section in soup.findAll('h4', attrs={'class':'center'}):
section_title = self.tag_to_string(section.find('title'))
articles = []
for post in section.findAll('a', href=True):
url = post['href']
if url.startswith('?'):
url = 'http://newleftreview.org/'+url
title = self.tag_to_string(post)
self.log()
self.log('--> post: ', post) self.log('--> url: ', url)
self.log('--> title: ', title)
articles.append({'title':title, 'url':url})
if articles:
feeds.append((section_title, articles))
return feeds
and here the ERROR message
Code:
Fetch news from New Left Review2
Resolved conversion options
calibre version: 0.7.47
{'asciiize': False,
'author_sort': None,
'authors': None,
'base_font_size': 0,
'book_producer': None,
'change_justification': 'original',
'chapter': None,
'chapter_mark': 'pagebreak',
'comments': None,
'cover': None,
'debug_pipeline': None,
'dehyphenate': True,
'delete_blank_paragraphs': True,
'disable_font_rescaling': False,
'dont_compress': False,
'dont_download_recipe': False,
'enable_heuristics': False,
'extra_css': None,
'fix_indents': True,
'font_size_mapping': None,
'format_scene_breaks': True,
'html_unwrap_factor': 0.4,
'input_encoding': None,
'input_profile': <calibre.customize.profiles.InputProfile object at 0x05AA49F0>,
'insert_blank_line': False,
'insert_metadata': False,
'isbn': None,
'italicize_common_cases': True,
'keep_ligatures': False,
'language': None,
'level1_toc': None,
'level2_toc': None,
'level3_toc': None,
'line_height': 0,
'linearize_tables': False,
'lrf': False,
'margin_bottom': 5.0,
'margin_left': 5.0,
'margin_right': 5.0,
'margin_top': 5.0,
'markup_chapter_headings': True,
'max_toc_links': 50,
'minimum_line_height': 120.0,
'mobi_ignore_margins': False,
'no_chapters_in_toc': False,
'no_inline_navbars': True,
'no_inline_toc': False,
'output_profile': <calibre.customize.profiles.KindleOutput object at 0x05AA4D10>,
'page_breaks_before': None,
'password': 'riotejo',
'personal_doc': '[PDOC]',
'prefer_author_sort': False,
'prefer_metadata_cover': False,
'pretty_print': False,
'pubdate': None,
'publisher': None,
'rating': None,
'read_metadata_from_opf': None,
'remove_first_image': False,
'remove_paragraph_spacing': False,
'remove_paragraph_spacing_indent_size': 1.5,
'renumber_headings': True,
'replace_scene_breaks': '',
'rescale_images': False,
'series': None,
'series_index': None,
'smarten_punctuation': False,
'sr1_replace': '',
'sr1_search': '',
'sr2_replace': '',
'sr2_search': '',
'sr3_replace': '',
'sr3_search': '',
'tags': None,
'test': False,
'timestamp': None,
'title': None,
'title_sort': None,
'toc_filter': None,
'toc_threshold': 6,
'toc_title': None,
'unwrap_lines': True,
'use_auto_toc': False,
'username': 'luiscc',
'verbose': 2}
InputFormatPlugin: Recipe Input running
Python function terminated unexpectedly
no form matching name 'login' (Error Code: 1)
Traceback (most recent call last):
File "site.py", line 103, in main
File "site.py", line 85, in run_entry_point
File "site-packages\calibre\utils\ipc\worker.py", line 110, in main
File "site-packages\calibre\gui2\convert\gui_conversion.py", line 25, in gui_convert
File "site-packages\calibre\ebooks\conversion\plumber.py", line 904, in run
File "site-packages\calibre\customize\conversion.py", line 204, in __call__
File "site-packages\calibre\web\feeds\input.py", line 101, in convert
File "site-packages\calibre\web\feeds\news.py", line 631, in __init__
File "c:\users\lus~1.pro\appdata\local\temp\calibre_0.7.47_tmp_nxbsa9\calibre_0.7.47_gz04zi_recipes\recipe0.py", line 43, in get_browser
br.select_form(name='login')
File "site-packages\mechanize-0.2.4-py2.7.egg\mechanize\_mechanize.py", line 524, in select_form
mechanize._mechanize.FormNotFoundError: no form matching name 'login'
Can anyone help, please?