Created
October 11, 2019 02:56
-
-
Save dzakyputra/9f1bba1e6c89a4e4d6edf8811e3a33ee to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Search the date of birth | |
def search_dob(text): | |
# Define the pattern to get dates (20-07-2018, 20 07 2018, 20-07 2018, 20 07-2018) | |
# reg_dob = '(\d{2}[\/ -]\d{2}[\/ -]\d{2,4})' | |
# reg_dob = '(((0[1-9]|([1-2][0-9])|3[01])[-./ \s]?((0[13578])|(10|12)))|((0[1-9]|([1-2][0-9])|3[0])[-./ \s]?((0[469])|11))|((0[1-9]|[1-2][0-9])[-./ \s]?(02)))[-./ \s]?[\d]{2,4}' | |
reg_dob = '((((0[\ ]*[1-9][\ ]*|([1-2][\ ]*[0-9][\ ]*)|3[\ ]*[01][\ ]*)[-.—/ \s]*((0[\ ]*[13578][\ ]*)|(1[\ ]*0[\ ]*|1[\ ]*2[\ ]*)))|((0[\ ]*[1-9][\ ]*|([1-2][\ ]*[0-9][\ ]*)|3[\ ]*[0][\ ]*)[-./ —\s]*((0[\ ]*[469][\ ]*)|1[\ ]*1[\ ]*))|((0[\ ]*[1-9][\ ]*|[1-2][\ ]*[0-9][\ ]*)[-./ —\s]*(0[\ ]*2[\ ]*)))[-./ —\s]*([\d][\ ]*){2,4})' | |
# Find all strings that match | |
dates = re.finditer(reg_dob, text) | |
temp = [date.group() for date in dates] | |
dates = temp | |
# for date in dates: | |
# print(date.group()) | |
# print(dates) | |
# We iterate through the results and convert it into datetime format | |
for i in range(len(dates)): | |
date = re.sub('[-— ]', '', dates[i]) | |
# If invalid string format was read, we set its value to zero | |
reg_year = '[0-9]+' | |
year = re.match(reg_year, date[4:]) | |
year = int(year[0]) | |
print(year) | |
if year < 1900 or year > 2000: | |
dates[i] = 0 | |
continue | |
# print(dates[i]) | |
dates[i] = datetime.strptime(date, '%d%m%Y') | |
# print(dates) | |
# We remove those invalid read strings | |
dates = [date for date in dates if not isinstance(date, int)] | |
# We take the smallest dates in a KTP, beacuse it must be his/her birth date | |
date = min(dates) | |
return datetime.strftime(date, '%d/%m/%Y') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment