View Single Post
Old 09-26-2010, 05:37 PM   #4
weasal
Junior Member
weasal began at the beginning.
 
Posts: 7
Karma: 10
Join Date: Sep 2010
Device: Kindle 3
I have the same problem but at toc generation time.
Calibre version 0.6.13
command:
ebook-convert myrecipe.recipe output.mobi -vv --test

here is the end of the output:

Creating MOBI Output...
67% Creating MOBI Output
Generating in-line TOC...
Applying case-transforming CSS...
Parsing manglecase.css ...
Parsing tocstyle.css ...
: cannot connect to X server


DISPLAY variable is not set and I'm using a local image for the cover which is available and I can see is copied over to the html output

here is the recipe:

import string, re
from calibre import strftime
from time import strptime
from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup

class Standartnewa(BasicNewsRecipe):

title = 'StandartNews'
remove_tags_before = dict(name=['h2', 'h3'], attrs={'class':['article_title', 'article']})

remove_tags_after = dict(name='div', attrs={'id':'article_content'})
recursions = 0
no_stylesheets = True
language = 'bg'
description = 'News from Bulgaria'
category = 'news, Bulgaria, BG, world'
publisher = 'Standartnews'
extra_css = '.kare {BORDER-BOTTOM: #8d93a5 1px solid; PADDING-BOTTOM: 5px; BACKGROUND-COLOR: #eeeff2; PADDING-LEFT: 5px; PADDING-RIGHT: 5px; MARGIN-BOTTOM: 10px; BORDER-TOP: #8d93a5 1px solid; PADDING-TOP: 5px} .cl { OVERFLOW: hidden } .cl:after { DISPLAY: block; HEIGHT: 0px; VISIBILITY: hidden; CLEAR: both} .img_article { BORDER-BOTTOM: #e3e4e9 1px solid; BORDER-LEFT: #e3e4e9 1px solid; PADDING-BOTTOM: 3px; MARGIN: 0px 10px 10px 0px; PADDING-LEFT: 5px; PADDING-RIGHT: 5px; COLOR: #8d93a5; FONT-SIZE: 11px; BORDER-TOP: #e3e4e9 1px solid; BORDER-RIGHT: #e3e4e9 1px solid; PADDING-TOP: 5px; WIDTH: 300px } .img_article img { MARGIN: 0px 0px 3px } .fl { FLOAT: left } '
remove_attributes = ['font', 'style']
remove_tags = [dict(name='hr')]
preprocess_regexps = [
(re.compile(r'(<div class="img_article(?<!</div>).*</div>)', re.IGNORECASE),
lambda match: match.group(0) + '<br>'),
]

conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
,'linearize_tables': True
}
def get_cover_url(self):
return 'file:///home/weasal/news/cover.jpg'

def parse_index(self):
soup = self.index_to_soup('http://paper.standartnews.com/bg/index.php')

def feed_title(div):
return ''.join(div.findAll(text=True, recursive=False)).strip()

articles = {}
key = None
ans = []
for div in soup.findAll(True, attrs={'id':'left'}):
for link in div.find('div').findAll('a', attrs={'href' : re.compile('^category.php.*')}):
if link.has_key('class'):
key = '--' + string.capwords(feed_title(link))
else:
key = string.capwords(feed_title(link))

articles[key] = []
ans.append(key)
cat = self.index_to_soup('http://paper.standartnews.com/bg/' + link['href'])
a = cat.find('a', { "class" : "read" })
if not a:
continue
catMain = self.index_to_soup('http://paper.standartnews.com/bg/' + a['href'])
for article in catMain.find('div', { "class" : "right" }).find('ul', { "class" : "addonnews" }).findAll('a', href=True):
url = 'http://paper.standartnews.com/bg/' + article['href']
title = self.tag_to_string(article, use_alt=True).strip()
pubdate = strftime('%a, %d %b', strptime(re.search('article.php\?d=(\d\d\d\d-\d\d-\d\d)\&article=\d+', article['href']).group(1), '%Y-%m-%d'))
if not articles.has_key(key):
articles[key] = []
articles[key].append(
dict(title=title, url=url, date=pubdate,
description='',
content=''))

ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
return ans

Last edited by weasal; 09-26-2010 at 05:49 PM.
weasal is offline   Reply With Quote