Skip to content

Instantly share code, notes, and snippets.

@bluenex
Last active May 5, 2018 04:54
Show Gist options
  • Save bluenex/2347ae70f421a1b514831937cb7ff296 to your computer and use it in GitHub Desktop.
Save bluenex/2347ae70f421a1b514831937cb7ff296 to your computer and use it in GitHub Desktop.
Regex patterns for kk's date 😂
Display the source blob
Display the rendered blob
Raw
{
"cells": [{
"cell_type": "code",
"execution_count": 0,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"5/1/1017 : 5/1/2017\n",
"a-001 : ---- REALLY NO IDEA, GO WITH 5/1/2017? ----\n",
"10/12017 : 10/1/2017\n",
"131/2017 : 13/1/2017\n",
"19/1//2017 : 19/1/2017\n",
"25/1/217 : 25/1/2017\n",
"27/12017 : 27/1/2017\n",
"30/1/201 : ---- THE 201X ----\n",
"29/1/201 : ---- THE 201X ----\n",
"30/1/25017 : 30/1/2017\n",
"22/2/017 : 22/2/2017\n",
"23/2/201 : ---- THE 201X ----\n",
"5/2/018 : 5/2/2018\n",
"7/2/20108 : 7/2/2018\n",
"10/2/20188 : 10/2/2018\n",
"12/2/018 : 12/2/2018\n",
"14/2/22018 : 14/2/2018\n",
"f : ---- REALLY NO IDEA, GO WITH 14/2/2018? ----\n",
"22/2/20188 : 22/2/2018\n",
"6/3/22018 : 6/3/2018\n",
"12/3/20118 : 12/3/2018\n",
"12/3/118 : 12/3/2018\n",
"25/3/20181 : 25/3/2018\n",
"27/3/20018 : 27/3/2018\n",
"29/3/20188 : 29/3/2018\n",
"3/3/2/2017 : 3/2/2017\n",
"9/3//2017 : 9/3/2017\n",
"13/22/2017 : ---- OVER 12 -> 13/22/2017 ----\n",
"ฟ-001 : ---- REALLY NO IDEA, GO WITH 9/3/2017? ----\n",
"36/3/17 : ---- OVER 31 -> 36/3/2017 ----\n",
"2/13/17 : ---- OVER 12 -> 2/13/2017 ----\n",
"7/4/217 : 7/4/2017\n",
"10/4/20107 : 10/4/2017\n",
"21/4/20117 : 21/4/2017\n",
"23/4/20174 : 23/4/2017\n",
"13/4/20107 : 13/4/2017\n",
"254/4/17 : ---- OVER 31 -> 254/4/17 ----\n",
"27/4/20117 : 27/4/2017\n",
"14/15/2017 : ---- OVER 12 -> 14/15/2017 ----\n",
"18/05/2017 : 18/05/2017\n",
"19/5/2017000 : 19/5/2017\n",
"00:00:00 : ---- REALLY NO IDEA, GO WITH 19/5/2017? ----\n",
"25/05/2017 : 25/05/2017\n",
"29/05/2017 : 29/05/2017\n",
"31/05/2017 : 31/05/2017\n",
"1/6/22017 : 1/6/2017\n",
"12/6/201 : ---- THE 201X ----\n",
"7/72017 : 7/7/2017\n",
"87/72017 : ---- OVER 31 -> 87/7/2017 ----\n",
"11/7//2017 : 11/7/2017\n",
"12/72017 : 12/7/2017\n",
"12/7/22017 : 12/7/2017\n",
"21/7/20147 : 21/7/2017\n",
"26/72017 : 26/7/2017\n",
"29/7/20147 : 29/7/2017\n",
"1/8/20170 : 1/8/2017\n",
"12/8/20017 : 12/8/2017\n",
"3/9/20170 : 3/9/2017\n",
"10/9/25017 : 10/9/2017\n",
"19/9/201 : ---- THE 201X ----\n",
"22/9/20147 : 22/9/2017\n",
"274/9/2017 : ---- OVER 31 -> 274/9/2017 ----\n",
"27/9/20107 : 27/9/2017\n",
"2/10/201 : ---- THE 201X ----\n",
"อ : ---- REALLY NO IDEA, GO WITH 27/9/2017? ----\n",
"8/101/2017 : ---- REALLY NO IDEA, GO WITH 27/9/2017? ----\n",
"12/10/017 : 12/10/2017\n",
"16/10/20147 : 16/10/2017\n",
" 17/10/2017 : 17/10/2017\n",
"1/11/117 : 1/11/2017\n",
"11/11/22017 : 11/11/2017\n",
" 17/11/2017 : 17/11/2017\n",
"11/12/201 : ---- THE 201X ----\n",
"14/12/201 : ---- THE 201X ----\n",
"19/12/201 : ---- THE 201X ----\n",
"21/12/20147 : 21/12/2017\n",
"22/12/22017 : 22/12/2017\n",
"27/12/22017 : 27/12/2017\n",
"29/17/2017 : ---- OVER 12 -> 29/17/2017 ----\n",
"11/12018 : 11/1/2018\n",
"16/1/22018 : 16/12/2018\n",
"18/1/018 : 18/10/2018\n",
"19/1/22018 : 19/12/2018\n",
"26/1/201/8 : 26/1/2018\n"
]
}
],
"source": [
"tmpdate = [i for i in df_cleaned['วันที่'].unique() if not isinstance(i, datetime.date)]\n",
"\n",
"# multiple patterns as a list\n",
"# https://stackoverflow.com/a/46328646/4010864 \n",
"\n",
"last = ''\n",
"\n",
"dateRegexPatterns = [\n",
" # remove starting whitespace\n",
" (r'^\\s', r''),\n",
" # 30/12017 > 30/1/2017\n",
" (r'(^\\d{1,2}/)(\\d{1})([2017]{4})$', r'\\1\\2/\\3'),\n",
" # 131/2017 > 13/1/2017\n",
" (r'(^[1-3][0-9])([0-9])/([20178]{4})$', r'\\1/\\2/\\3'),\n",
" # 30/1/25017 > 30/1/2017, 19/1//2017 > 19/1/2017, 5/1/1017 > 5/1/2017\n",
" (r'(^\\d{1,2}/\\d+/)(/?\\d*7\\d*)$', r'\\g<1>2017'),\n",
" # 16/1/22018 > 16/12/2018\n",
" (r'(^\\d{1,2}/)([1])/([0-2])(\\d*8\\d*)$', r'\\1\\2\\3/\\4'),\n",
" # 30/1/25018 > 30/1/2018\n",
" (r'(^\\d{1,2}/\\d+/)(/?\\d*8\\d*)$', r'\\g<1>2018'),\n",
" # 10/12018 > 10/1/2018, 27/12018 > 27/1/2018\n",
" (r'^(\\d+\\/)(\\d+)(\\d{3}8)$', r'\\1\\2/\\3'),\n",
" # 3/3/2/2017 > 3/2/2017\n",
" (r'^(\\d+)/(\\d+)/(\\d+)/(\\d*7\\d*)$', r'\\2/\\3/\\4'),\n",
" # 26/1/201/8 > 26/1/2018\n",
" (r'^(\\d+/)(\\d+/)(\\d*)/(\\d*)', r'\\1\\2\\3\\4'), \n",
" # remove ending whitespace \n",
" (r'$|\\s', r''),\n",
"]\n",
"\n",
"# apply all patterns\n",
"def regexDate(dateString):\n",
" for old, new in dateRegexPatterns:\n",
" dateString = re.sub(old, new, dateString)\n",
" return dateString\n",
"\n",
"# loop through each date\n",
"for date in tmpdate:\n",
" passRegex = str(date)\n",
" passRegex = regexDate(passRegex)\n",
" \n",
" # filter normies and non-normies\n",
" normies = re.match(r'(^[0-9]|[0-2][0-9]|[3][0-1])/([1-9]|0[1-9]|1[0-2])/(201[7-8]$)', passRegex)\n",
" the201 = re.match(r'.*/201$', passRegex)\n",
" over31 = re.match(r'[^0-2][2-9]|[0-9]{3}/.*', passRegex)\n",
" over12 = re.match(r'.*/(1[^0-2]|[^0-1][0-9])/.*', passRegex)\n",
" \n",
" # debugging -- to see what is not in pattern yet\n",
" if passRegex is not '' and not normies:\n",
"# passRegex = '---- PASS REGEX BUT STILL NOT PARSEABLE -> {0} ----'.format(passRegex)\n",
"\n",
" if over12:\n",
" passRegex = '---- OVER 12 -> {0} ----'.format(passRegex)\n",
" \n",
" if over31:\n",
" passRegex = '---- OVER 31 -> {0} ----'.format(passRegex)\n",
" \n",
" if the201:\n",
" passRegex = '---- THE 201X ----'\n",
"\n",
" # if it pass no filter, the value is still the same\n",
" # if really not possible to get pattern, use last iter?\n",
" if passRegex == str(date) and not normies:\n",
" passRegex = '---- REALLY NO IDEA, GO WITH {0}? ----'.format(last)\n",
" elif not normies:\n",
" pass\n",
" else:\n",
" last = passRegex\n",
"\n",
" print(\"{0} : {1}\".format(str(date), passRegex))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.2"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment