View Single Post
Old 05-23-2024, 02:08 PM   #25
KevinH
Sigil Developer
KevinH ought to be getting tired of karma fortunes by now.KevinH ought to be getting tired of karma fortunes by now.KevinH ought to be getting tired of karma fortunes by now.KevinH ought to be getting tired of karma fortunes by now.KevinH ought to be getting tired of karma fortunes by now.KevinH ought to be getting tired of karma fortunes by now.KevinH ought to be getting tired of karma fortunes by now.KevinH ought to be getting tired of karma fortunes by now.KevinH ought to be getting tired of karma fortunes by now.KevinH ought to be getting tired of karma fortunes by now.KevinH ought to be getting tired of karma fortunes by now.
 
Posts: 8,914
Karma: 6120478
Join Date: Nov 2009
Device: many
In case anyone else wants to add this feature to their own code, here is the sample code:

Code:
import sys
from bs4 import BeautifulSoup
from PIL import Image

# extract base language from language code
def baselang(lang):
    if len(lang) > 3:
        if lang[2:3] in "-_":
            return lang[0:2]
    return None

def parse_xmpxml_for_alttext(xmpxml):
    xmpmeta = BeautifulSoup(xmpxml, 'xml')
    alt_dict = {}
    if xmpmeta:
        node = xmpmeta.find('AltTextAccessibility')
        if node:
            for element in node.find_all('li'):
                lang = element.get('xml:lang', 'x-default')
                alt_dict[lang] = element.text
                lg = baselang(lang)
                if lg:
                    alt_dict[lg] = element.txt
    return alt_dict


def get_image_metadata_alttext(imgpath, tgtlang):
    xmpxml = None
    description = ""
    with Image.open(imgpath) as im:
        if im.format == 'WebP':
            if "xmp" in im.info:
                xmpxml = im.info["xmp"]
        if im.format == 'PNG':
            if "XML:com.adobe.xmp" in im.info:
                xmpxml = im.info["XML:com.adobe.xmp"]
        if im.format == 'TIFF':
            if 700 in im.tag_v2:
                xmpxml = im.tag_v2[700]
        if im.format == 'JPEG':
            for segment, content in im.applist:
                if segment == "APP1":
                    marker, xmp_tags = content.split(b"\x00")[:2]
                    if marker == b"http://ns.adobe.com/xap/1.0/":
                        xmpxml = xmp_tags
                        break
        exif = im.getexif()
        # 270 = ImageDescription
        if exif and 270 in exif:
            description = exif[270]
    if not xmpxml:
        return description
    alt_dict = parse_xmpxml_for_alttext(xmpxml)
    # first try full language code match
    if tgtlang in alt_dict:
        return alt_dict[tgtlang]
     # next try base language code match
    lg = baselang(tgtlang)
    if lg and lg in alt_dict:
        return alt_dict[lg]
    # use default
    if 'x-default' in alt_dict:
        return alt_dict['x-default']
    # otherwise fall back to exif image description
    return description



imgpath = "test.jpg"
lang = 'en-US'
print(get_image_metadata_alttext(imgpath, lang))

Last edited by KevinH; 05-24-2024 at 11:57 AM.
KevinH is offline   Reply With Quote