This regex function will replace numbers in words with digits. But it can't deal with numbers like twenty-one. They must be in the format twenty one.
Can anyone suggest a solution?
Code:
def text2int(textnum, numwords={}):
if not numwords:
units = [
"zero", "one", "two", "three", "four", "five", "six", "seven", "eight",
"nine", "ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen",
"sixteen", "seventeen", "eighteen", "nineteen",
]
tens = ["", "", "twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety"]
scales = ["hundred", "thousand", "million", "billion", "trillion"]
numwords["and"] = (1, 0)
for idx, word in enumerate(units): numwords[word] = (1, idx)
for idx, word in enumerate(tens): numwords[word] = (1, idx * 10)
for idx, word in enumerate(scales): numwords[word] = (10 ** (idx * 3 or 2), 0)
current = result = 0
for word in textnum.split():
word = word.lower()
if word not in numwords:
raise Exception("Illegal word: " + word)
scale, increment = numwords[word]
current = current * scale + increment
if scale > 100:
result += current
current = 0
return result + current
def replace(match, number, file_name, metadata, dictionaries, data, functions, *args, **kwargs):
return match.group(1)+str(text2int(match.group(2)))+match.group(3)