Skip to content

Instantly share code, notes, and snippets.

@dzakyputra
Created October 11, 2019 02:56
Show Gist options
  • Save dzakyputra/9f1bba1e6c89a4e4d6edf8811e3a33ee to your computer and use it in GitHub Desktop.
Save dzakyputra/9f1bba1e6c89a4e4d6edf8811e3a33ee to your computer and use it in GitHub Desktop.
# Search the date of birth
def search_dob(text):
# Define the pattern to get dates (20-07-2018, 20 07 2018, 20-07 2018, 20 07-2018)
# reg_dob = '(\d{2}[\/ -]\d{2}[\/ -]\d{2,4})'
# reg_dob = '(((0[1-9]|([1-2][0-9])|3[01])[-./ \s]?((0[13578])|(10|12)))|((0[1-9]|([1-2][0-9])|3[0])[-./ \s]?((0[469])|11))|((0[1-9]|[1-2][0-9])[-./ \s]?(02)))[-./ \s]?[\d]{2,4}'
reg_dob = '((((0[\ ]*[1-9][\ ]*|([1-2][\ ]*[0-9][\ ]*)|3[\ ]*[01][\ ]*)[-.—/ \s]*((0[\ ]*[13578][\ ]*)|(1[\ ]*0[\ ]*|1[\ ]*2[\ ]*)))|((0[\ ]*[1-9][\ ]*|([1-2][\ ]*[0-9][\ ]*)|3[\ ]*[0][\ ]*)[-./ —\s]*((0[\ ]*[469][\ ]*)|1[\ ]*1[\ ]*))|((0[\ ]*[1-9][\ ]*|[1-2][\ ]*[0-9][\ ]*)[-./ —\s]*(0[\ ]*2[\ ]*)))[-./ —\s]*([\d][\ ]*){2,4})'
# Find all strings that match
dates = re.finditer(reg_dob, text)
temp = [date.group() for date in dates]
dates = temp
# for date in dates:
# print(date.group())
# print(dates)
# We iterate through the results and convert it into datetime format
for i in range(len(dates)):
date = re.sub('[-— ]', '', dates[i])
# If invalid string format was read, we set its value to zero
reg_year = '[0-9]+'
year = re.match(reg_year, date[4:])
year = int(year[0])
print(year)
if year < 1900 or year > 2000:
dates[i] = 0
continue
# print(dates[i])
dates[i] = datetime.strptime(date, '%d%m%Y')
# print(dates)
# We remove those invalid read strings
dates = [date for date in dates if not isinstance(date, int)]
# We take the smallest dates in a KTP, beacuse it must be his/her birth date
date = min(dates)
return datetime.strftime(date, '%d/%m/%Y')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment