Register Guidelines E-Books Search Today's Posts Mark Forums Read

Go Back   MobileRead Forums > E-Book Software > Calibre > Recipes

Notices

Reply
 
Thread Tools Search this Thread
Old 11-27-2014, 08:40 PM   #1
beemanfunk
Junior Member
beemanfunk began at the beginning.
 
Posts: 2
Karma: 10
Join Date: Nov 2014
Device: iPad 1
NHK Easy News (Japanese News site)

Hi everyone,

I need some help. I'm not really a programmer and this whole recipe thing is a little difficult for me. I'd like to be able to create epubs for this website:

http://www3.nhk.or.jp/news/easy/

Unfortunately, there's no RSS feed. I know it's possible to make a recipe without one, but I found this hard going.

Here's the other thing. This website is designed for children and foreigners learning Japanese by using a simpler form of the language. It also uses ruby text which is basically text above the Kanji (chinese characters) to show the reader how to pronounce a given word. It also has popup links that gives the definition of certain words in Japanese.

Anyone know of any recipes that might be a good base to facilitate ruby text? I've tried tinkering around with some of the other Japanese news recipes with little success (as I said, I'm not really proficient in programming)

Any help would be appreciated.
beemanfunk is offline   Reply With Quote
Old 12-25-2014, 03:44 AM   #2
beemanfunk
Junior Member
beemanfunk began at the beginning.
 
Posts: 2
Karma: 10
Join Date: Nov 2014
Device: iPad 1
Ok, here's recipe in progress.

Quote:
__license__ = 'GPL v3'
__copyright__ = '2014, <>'
'''
www3.nhk.or.jp/news/easy/index.html
'''

import re
from calibre.web.feeds.news import BasicNewsRecipe

class NHKEasyNews(BasicNewsRecipe):
title = 'NHK Easy News'
__author__ = 'Brendan Dalton'
oldest_article = 2
max_articles_per_feed = 20
description = 'NHK Easy News'
publisher = 'NHK'
category = 'news, japan, simple'
language = 'ja'
index = 'Brendan Dalton'
remove_javascript = True
masthead_title = u'NHK Easy News'

remove_tags_before = {'div ID':"newstitle"}
remove_tags_after = {'div ID':"newsarticle"}

def parse_index(self):
soup = self.index_to_soup('http://www3.nhk.or.jp/news/easy/')

def feed_title(div):
return ''.join(div.findAll(text=True, recursive=False)).strip()

articles = {}
key = None
ans = []
for div in soup.findAll(True,
attrs={'id':['newstitle'],'class':["hightLine-tn1", "heightLine-tn1", "heightline-tn3"]}):

if div['id'] in ['newstitle']:
a = div.find('a', href=True)
if not a:
continue
url = re.sub(r'\?.*', '', a['href'])
url += '?pagewanted=all'
title = self.tag_to_string(a, use_alt=True).strip()
description = ''
pubdate = strftime('%a, %d %b')
summary = div.find(True, attrs={'class':'summary'})
if summary:
description = self.tag_to_string(summary, use_alt=False)

feed = key if key is not None else 'Uncategorized'
if not articles.has_key(feed):
articles[feed] = []
if not 'podcasts' in url:
articles[feed].append(
dict(title=title, url=url, date=pubdate,
description=description,
content=''))

elif div['class'] in ["hightLine-tn1", "heightLine-tn1", "heightline-tn3"]:
a = div.find('a', href=True)
if not a:
continue
url = re.sub(r'\?.*', '', a['href'])
url += '?pagewanted=all'
title = self.tag_to_string(a, use_alt=True).strip()
description = ''
pubdate = strftime('%a, %d %b')
summary = div.find(True, attrs={'class':'summary'})
if summary:
description = self.tag_to_string(summary, use_alt=False)

feed = key if key is not None else 'Uncategorized'
if not articles.has_key(feed):
articles[feed] = []
if not 'podcasts' in url:
articles[feed].append(
dict(title=title, url=url, date=pubdate,
description=description,
content=''))
ans = self.sort_index_by(ans, {'The Front Page':-1, 'Dining In, Dining Out':1, 'Obituaries':2})
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
return ans

def preprocess_html(self, soup):
refresh = soup.find('meta', {'http-equiv':'refresh'})
if refresh is None:
return soup
content = refresh.get('content').partition('=')[2]
raw = self.browser.open('http://www.nytimes.com'+content).read()
return BeautifulSoup(raw.decode('cp1252', 'replace'))
return feeds
But I get the following error.

Quote:
Fetch news from NHK Easy News
Resolved conversion options
calibre version: 2.11.0
{'asciiize': False,
'author_sort': None,
'authors': None,
'base_font_size': 0,
'book_producer': None,
'change_justification': 'original',
'chapter': None,
'chapter_mark': 'pagebreak',
'comments': None,
'cover': None,
'debug_pipeline': None,
'dehyphenate': True,
'delete_blank_paragraphs': True,
'disable_font_rescaling': False,
'dont_download_recipe': False,
'dont_split_on_page_breaks': True,
'duplicate_links_in_toc': False,
'embed_all_fonts': False,
'embed_font_family': None,
'enable_heuristics': False,
'epub_flatten': False,
'epub_inline_toc': False,
'epub_toc_at_end': False,
'expand_css': False,
'extra_css': None,
'extract_to': None,
'filter_css': None,
'fix_indents': True,
'flow_size': 260,
'font_size_mapping': None,
'format_scene_breaks': True,
'html_unwrap_factor': 0.4,
'input_encoding': None,
'input_profile': <calibre.customize.profiles.InputProfile object at 0x000000000234BA20>,
'insert_blank_line': False,
'insert_blank_line_size': 0.5,
'insert_metadata': False,
'isbn': None,
'italicize_common_cases': True,
'keep_ligatures': False,
'language': None,
'level1_toc': None,
'level2_toc': None,
'level3_toc': None,
'line_height': 0,
'linearize_tables': False,
'lrf': False,
'margin_bottom': 5.0,
'margin_left': 5.0,
'margin_right': 5.0,
'margin_top': 5.0,
'markup_chapter_headings': True,
'max_toc_links': 50,
'minimum_line_height': 120.0,
'no_chapters_in_toc': False,
'no_default_epub_cover': False,
'no_inline_navbars': False,
'no_svg_cover': False,
'output_profile': <calibre.customize.profiles.iPadOutput object at 0x000000000234BEB8>,
'page_breaks_before': None,
'prefer_metadata_cover': False,
'preserve_cover_aspect_ratio': False,
'pretty_print': True,
'pubdate': None,
'publisher': None,
'rating': None,
'read_metadata_from_opf': None,
'remove_fake_margins': True,
'remove_first_image': False,
'remove_paragraph_spacing': False,
'remove_paragraph_spacing_indent_size': 1.5,
'renumber_headings': True,
'replace_scene_breaks': '',
'search_replace': None,
'series': None,
'series_index': None,
'smarten_punctuation': False,
'sr1_replace': '',
'sr1_search': '',
'sr2_replace': '',
'sr2_search': '',
'sr3_replace': '',
'sr3_search': '',
'start_reading_at': None,
'subset_embedded_fonts': False,
'tags': None,
'test': False,
'timestamp': None,
'title': None,
'title_sort': None,
'toc_filter': None,
'toc_threshold': 6,
'toc_title': None,
'unsmarten_punctuation': False,
'unwrap_lines': True,
'use_auto_toc': False,
'verbose': 2}
InputFormatPlugin: Recipe Input running
Using custom recipe
Python function terminated unexpectedly
No articles found, aborting (Error Code: 1)
Traceback (most recent call last):
File "site.py", line 132, in main
File "site.py", line 109, in run_entry_point
File "site-packages\calibre\utils\ipc\worker.py", line 193, in main
File "site-packages\calibre\gui2\convert\gui_conversion.py", line 25, in gui_convert
File "site-packages\calibre\ebooks\conversion\plumber.py", line 1041, in run
File "site-packages\calibre\customize\conversion.py", line 241, in __call__
File "site-packages\calibre\ebooks\conversion\plugins\recipe_ input.py", line 117, in convert
File "site-packages\calibre\web\feeds\news.py", line 998, in download
File "site-packages\calibre\web\feeds\news.py", line 1171, in build_index
ValueError: No articles found, aborting
Any feedback would be helpful.
beemanfunk is offline   Reply With Quote
Advert
Reply

Tags
japanese, language learning, news, ruby text

Thread Tools Search this Thread
Search this Thread:

Advanced Search

Forum Jump

Similar Threads
Thread Thread Starter Forum Replies Last Post
Fetching news with a site according to specific search terms? talizh Recipes 0 04-13-2014 06:15 AM
IDG.se - Recipe for swedish news site khromov Recipes 3 09-18-2011 09:40 PM
Site for pocket pc freeware and news pocketfree Lounge 0 07-11-2007 06:13 AM


All times are GMT -4. The time now is 07:13 PM.


MobileRead.com is a privately owned, operated and funded community.