View Single Post
Old 10-26-2010, 03:10 PM   #1
Daermond
Member
Daermond has a complete set of Star Wars action figures.Daermond has a complete set of Star Wars action figures.Daermond has a complete set of Star Wars action figures.Daermond has a complete set of Star Wars action figures.
 
Posts: 22
Karma: 300
Join Date: Oct 2010
Device: none
Metadata downlad plugin trouble

Hi.

I try to create a metadata download plugin for a Hungarian online shop called libri.hu.
First I have made a python script that downloads the metadata and print it in the console.
The I try to convert it to a plugin for Calibre, but I stucked.
In debug mode I can see it prints out the data for the book but at the end of the run of the plugin I get an error message:
Code:
Traceback (most recent call last):
  File "threading.py", line 525, in __bootstrap_inner
  File "D:\Calibre Libary\calibre\src\calibre\gui2\dialogs\fetch_metadata.py", l
ine 38, in run
  File "D:\Calibre Libary\calibre\src\calibre\ebooks\metadata\fetch.py", line 32
0, in search
  File "D:\Calibre Libary\calibre\src\calibre\ebooks\metadata\fetch.py", line 28
7, in filter_metadata_results
AttributeError: 'str' object has no attribute 'publisher'
I'm new in python and I have just basic programing skills.
My code is now just a basic scratch version, and I try to do fine tuning and make some error handling in the future.
The metadata it scrapes mostly contains special Hungarian characters.
Here is my plugins code:
Code:
# -*- coding: utf-8 -*-

import lxml, sys
import lxml.html as lh
#import traceback

from calibre import browser
from calibre.customize import Plugin
from calibre.ebooks.metadata.fetch import MetadataSource
from calibre.ebooks.metadata import MetaInformation

def get_social_metadata(title, authors, publisher, isbn):
	print '4'
	mi = MetaInformation(title, authors)
	SEARCH_URL = "http://www.libri.hu/reszletes_kereso"
	print '5'
	br = browser()
	br.open(SEARCH_URL)
	br.select_form(name = "detailed_search_form")
	br['cim'] = title
	br['szerzo'] = authors
#	br['isbn'] = isbn
	search_page = br.submit()
	
	book_page = br.follow_link(url_regex='konyv', nr=0)
	
	print search_page.geturl()
	print book_page.geturl()
	
	doc = lh.parse(book_page)
	
	book_title = ''.join(doc.xpath('//*[@id="book"]/div/h1/text()')).replace("\t",'').replace("\n",'')
	print book_title
	mi.title = book_title
	
	book_author = ''.join(doc.xpath('//*[@id="book"]//*[@class="authors"]//text()')).replace("\t",'').replace("\n",'')
	print book_author
	mi.authors = book_author
	
	book_property = doc.xpath('//*[@class="props"]/text()')
	book_property = [text for text in book_property if text != '\r\n\t\t\t\t\t\r\n\t\t\t' and text != '\r\n\t\t\t\t' and text != '\r\n\t\t\t\t\r\n\t\t\t\t\t\r\n\t\t\t\t' and text != ' ' and text != ': ']
	
	book_publish_date = ''.join(book_property[6]).replace(" ",'').replace("\t",'').replace("\n",'')
	print book_publish_date
	mi.pubdate =  book_publish_date	
	
	book_publisher = ''.join(doc.xpath('//*[@class="props"]/a/text()'))
	print book_publisher
	mi.publisher = book_publisher
	
	#mi.series = series[0].strip()
	
	book_comments = ''.join(doc.xpath('//*[@id="tab_content_lead"]//text()')).replace("\t",'').replace("\n",'')
	print book_comments
	mi.comments = book_comments	
	
	book_isbn = ''.join(book_property[5]).replace(" ",'').replace("\t",'').replace("\n",'')
	print book_isbn
	mi.isbn=book_isbn
		
	return mi

class LibraryThing(MetadataSource):

	author = 'Csaba Hoffer'
	metadata_type = 'basic'
	name = 'Libri_Hu'
	description = _('Downloads metadata from Libri.hu')
	version             = (1, 0, 0)
	supported_platforms = ['windows', 'osx', 'linux']
	
	print '1'
	
	def fetch(self):
		print '2'
		try:
			self.results = get_social_metadata(self.title, self.book_author, self.publisher, self.isbn)
			print '8'
			#print self.results
			print '9'
		
		except Exception, e:
			print '6'
			self.exception = e
			self.tb = traceback.format_exc()

def main(args=sys.argv):
	print get_social_metadata(None, None, None, None)
	return 0

if __name__ == '__main__':
	sys.exit(main())
Please, if anyone have some hint for me what could be the problem share it with me.
Thanks!
Daermond is offline   Reply With Quote