View Single Post
Old 08-18-2022, 09:09 AM   #2
Shohreh
Addict
Shohreh ought to be getting tired of karma fortunes by now.Shohreh ought to be getting tired of karma fortunes by now.Shohreh ought to be getting tired of karma fortunes by now.Shohreh ought to be getting tired of karma fortunes by now.Shohreh ought to be getting tired of karma fortunes by now.Shohreh ought to be getting tired of karma fortunes by now.Shohreh ought to be getting tired of karma fortunes by now.Shohreh ought to be getting tired of karma fortunes by now.Shohreh ought to be getting tired of karma fortunes by now.Shohreh ought to be getting tired of karma fortunes by now.Shohreh ought to be getting tired of karma fortunes by now.
 
Posts: 207
Karma: 304158
Join Date: Jan 2016
Location: France
Device: none
It works, but www.isbn.nu is missing a lot of non-English books. I'll have to find a better source (Amazon probably).

Code:
#pip install beautifulsoup4
#pip install lxml
from bs4 import BeautifulSoup
import requests
import sqlite3
import re

"""
First, scan ISBN with barcode scanner into plain text file

sqlite3.exe books.sqlite
CREATE TEMP TABLE temp_books(isbn TEXT);
.import input.txt temp_books
CREATE TABLE IF NOT EXISTS books(isbn TEXT,title TEXT, year TEXT);
INSERT INTO books(isbn) SELECT isbn FROM temp_books;
CREATE TABLE IF NOT EXISTS authors(isbn TEXT,author TEXT);
DROP TABLE temp_books;
.quit
"""

pattern_year = re.compile('(\d{4})')

db = sqlite3.connect('books.sqlite')
cursor = db.cursor()
cursor.execute('BEGIN')
with open('input.txt') as reader:
	for line in reader:
		isbn = line.strip()
		print(f"Handling {isbn}")

		url = f"https://isbn.nu/{isbn}"
		page = requests.get(url)
		soup = BeautifulSoup(page.content, 'html.parser')
		#title
		title=soup.title.string
		if title == "No Title Found":
			title = None
		print("Title=",title)
		#Year published
		for col in soup.find_all("div", {"class": "bi_row"}):
			if col.find("span", {"class": "bi_col_title"}).text == "Publication date":
				date = col.find("span", {"class": "bi_col_value"}).text 
				#Extract year
				m = pattern_year.search(date)
				if m:
					year = m.group(0) #Not (1)?
					print("Date=", year)
				else:
					print("Date not found")
				break
		cursor.execute("UPDATE books SET title= ? , year=? WHERE isbn=?", (title,year,isbn))
		
		#author(s)
		authors = soup.select("a[href*=authorx]")
		for author in authors:
			name=author.string
			author = author.string
			cursor.execute("UPDATE authors SET author= ?  WHERE isbn=?", (author,isbn))
			
cursor.execute('END')
db.commit()
db.close()

print("Done.")
Shohreh is offline   Reply With Quote