Created
December 26, 2017 05:15
-
-
Save ttpro1995/f6c5963723075a9ea975bf0670f48e2f to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import dateparser | |
# use re and dateparser to normalize date in sentence | |
# :author: Thai Thien | |
def p1(): | |
regex = r"\d+/\d+/\d+" | |
token = " today is 14/2/2012 which is meowing full day. Tomorrow is 15/2/2012, which is an meowingfull day again" | |
m = re.search(regex, token) | |
m2 = re.findall(regex, token) | |
print(m) | |
print(m.group()) | |
print(m2) # ok | |
def parse_day(sentences): | |
regex = r"\d+/\d+/\d+" | |
m2 = re.findall(regex, sentences) | |
for d in m2: | |
date = dateparser.parse(d) | |
sentences = sentences.replace(d, str(date)) | |
return sentences | |
if __name__ == "__main__": | |
token = " today is 14/2/2012 which is meowing full day. Tomorrow is 15/2/2012, which is an meowingfull day again" | |
result = parse_day(token) | |
print(result) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment