View Single Post
Old 09-23-2021, 06:48 PM   #1
Ted Friesen
Nameless Being
 
Question Regex Function words to integers

This regex function will replace numbers in words with digits. But it can't deal with numbers like twenty-one. They must be in the format twenty one.

Can anyone suggest a solution?

Code:
def text2int(textnum, numwords={}):
    if not numwords:
      units = [
        "zero", "one", "two", "three", "four", "five", "six", "seven", "eight",
        "nine", "ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen",
        "sixteen", "seventeen", "eighteen", "nineteen",
      ]

      tens = ["", "", "twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety"]

      scales = ["hundred", "thousand", "million", "billion", "trillion"]

      numwords["and"] = (1, 0)
      for idx, word in enumerate(units):    numwords[word] = (1, idx)
      for idx, word in enumerate(tens):     numwords[word] = (1, idx * 10)
      for idx, word in enumerate(scales):   numwords[word] = (10 ** (idx * 3 or 2), 0)

    current = result = 0
    for word in textnum.split():
        word = word.lower()
        if word not in numwords:
          raise Exception("Illegal word: " + word)

        scale, increment = numwords[word]
        current = current * scale + increment
        if scale > 100:
            result += current
            current = 0

    return result + current

def replace(match, number, file_name, metadata, dictionaries, data, functions, *args, **kwargs):
    return match.group(1)+str(text2int(match.group(2)))+match.group(3)

Last edited by Ted Friesen; 09-23-2021 at 06:54 PM.
  Reply With Quote