In case anyone else wants to add this feature to their own code, here is the sample code:
Code:
import sys
from bs4 import BeautifulSoup
from PIL import Image
# extract base language from language code
def baselang(lang):
if len(lang) > 3:
if lang[2:3] in "-_":
return lang[0:2]
return None
def parse_xmpxml_for_alttext(xmpxml):
xmpmeta = BeautifulSoup(xmpxml, 'xml')
alt_dict = {}
if xmpmeta:
node = xmpmeta.find('AltTextAccessibility')
if node:
for element in node.find_all('li'):
lang = element.get('xml:lang', 'x-default')
alt_dict[lang] = element.text
lg = baselang(lang)
if lg:
alt_dict[lg] = element.txt
return alt_dict
def get_image_metadata_alttext(imgpath, tgtlang):
xmpxml = None
description = ""
with Image.open(imgpath) as im:
if im.format == 'WebP':
if "xmp" in im.info:
xmpxml = im.info["xmp"]
if im.format == 'PNG':
if "XML:com.adobe.xmp" in im.info:
xmpxml = im.info["XML:com.adobe.xmp"]
if im.format == 'TIFF':
if 700 in im.tag_v2:
xmpxml = im.tag_v2[700]
if im.format == 'JPEG':
for segment, content in im.applist:
if segment == "APP1":
marker, xmp_tags = content.split(b"\x00")[:2]
if marker == b"http://ns.adobe.com/xap/1.0/":
xmpxml = xmp_tags
break
exif = im.getexif()
# 270 = ImageDescription
if exif and 270 in exif:
description = exif[270]
if not xmpxml:
return description
alt_dict = parse_xmpxml_for_alttext(xmpxml)
# first try full language code match
if tgtlang in alt_dict:
return alt_dict[tgtlang]
# next try base language code match
lg = baselang(tgtlang)
if lg and lg in alt_dict:
return alt_dict[lg]
# use default
if 'x-default' in alt_dict:
return alt_dict['x-default']
# otherwise fall back to exif image description
return description
imgpath = "test.jpg"
lang = 'en-US'
print(get_image_metadata_alttext(imgpath, lang))