It works, but
www.isbn.nu is missing a lot of non-English books. I'll have to find a better source (Amazon probably).
Code:
#pip install beautifulsoup4
#pip install lxml
from bs4 import BeautifulSoup
import requests
import sqlite3
import re
"""
First, scan ISBN with barcode scanner into plain text file
sqlite3.exe books.sqlite
CREATE TEMP TABLE temp_books(isbn TEXT);
.import input.txt temp_books
CREATE TABLE IF NOT EXISTS books(isbn TEXT,title TEXT, year TEXT);
INSERT INTO books(isbn) SELECT isbn FROM temp_books;
CREATE TABLE IF NOT EXISTS authors(isbn TEXT,author TEXT);
DROP TABLE temp_books;
.quit
"""
pattern_year = re.compile('(\d{4})')
db = sqlite3.connect('books.sqlite')
cursor = db.cursor()
cursor.execute('BEGIN')
with open('input.txt') as reader:
for line in reader:
isbn = line.strip()
print(f"Handling {isbn}")
url = f"https://isbn.nu/{isbn}"
page = requests.get(url)
soup = BeautifulSoup(page.content, 'html.parser')
#title
title=soup.title.string
if title == "No Title Found":
title = None
print("Title=",title)
#Year published
for col in soup.find_all("div", {"class": "bi_row"}):
if col.find("span", {"class": "bi_col_title"}).text == "Publication date":
date = col.find("span", {"class": "bi_col_value"}).text
#Extract year
m = pattern_year.search(date)
if m:
year = m.group(0) #Not (1)?
print("Date=", year)
else:
print("Date not found")
break
cursor.execute("UPDATE books SET title= ? , year=? WHERE isbn=?", (title,year,isbn))
#author(s)
authors = soup.select("a[href*=authorx]")
for author in authors:
name=author.string
author = author.string
cursor.execute("UPDATE authors SET author= ? WHERE isbn=?", (author,isbn))
cursor.execute('END')
db.commit()
db.close()
print("Done.")