Skip to content

Instantly share code, notes, and snippets.

@dizys
Created December 13, 2022 20:07
Show Gist options
  • Save dizys/dd3c942d8f2d0ad31bb7814b6fbeeb61 to your computer and use it in GitHub Desktop.
Save dizys/dd3c942d8f2d0ad31bb7814b6fbeeb61 to your computer and use it in GitHub Desktop.
import re
file1=open("all-OANC.txt",'r')
pattern1=r'(\$[0-9]+(\.[0-9]+)?)'
digit_in_english = f'(?:one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve|thirteen|fifteen)'
teens_in_english = f'(?:(?:{digit_in_english}(?:y|ty|teen))|forty|fifty|half +a|a)'
number1_in_english = f'(?:(?:(?:{teens_in_english} +)?{digit_in_english})|{teens_in_english})'
big_numbers = f'(?:million|billion|trillion|hundred|thousand)'
number_in_english = f'(?:(?:{number1_in_english} +{big_numbers} +(?:and +)?)+{number1_in_english}?|{number1_in_english})'
pattern_test= f'(?<=\s){number_in_english} +dolla(rs|r)( +and +{number_in_english} +cents)?'
print(number_in_english)
pattern2=r'((one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve)(?:(y|ty|teen)))(\s(million|billion|trillion))*\sdolla(rs|r)(\sand.((one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve)(?:y|ty|teen))|(?:one|two|three|four|five|six|seven|eight|nine)\scents)*'
pattern3=r'(\$[0-9]+(.hundred)*(.((million|billion|trillion))))'
pattern=r'(\$[0-9]+(\.[0-9]+)?)|((one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve(?:y|ty|teen))(\s(million|billion|trillion))*\sdolla(r|rs)(\sand.(one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve(?:y|ty|teen))|(?:one|two|three|four|five|six|seven|eight|nine)/scents)*)|(\$[0-9]+(.hundred)*(.((million|billion|trillion))))'
#(one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve(?:y|ty|teen))|(?:one|two|three|four|five|six|seven|eight|nine)/scents)
lines = file1.read()
matches = re.finditer(pattern_test,lines)
for match in matches:
print(match.group())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment