Skip to content

Instantly share code, notes, and snippets.

@skylander86
Created December 25, 2018 05:52
Show Gist options
  • Save skylander86/4613a1f3ff936bd4e6ad3cab2684fd2d to your computer and use it in GitHub Desktop.
Save skylander86/4613a1f3ff936bd4e6ad3cab2684fd2d to your computer and use it in GitHub Desktop.
Regular expressions for capturing dates
DATE_REGEXES = [
re.compile(r'\b(?P<year>199\d|200\d|201\d)[\-\u2010\u2014\u2013\.\_\/\s]{1,2}(?P<month>1|2|3|4|5|6|7|8|9|01|02|03|04|05|06|07|08|09|10|11|12)[\-\u2010\u2014\u2013\.\_\/\s]{1,2}(?P<day>1|2|3|4|5|6|7|8|9|01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31)\b'), # 2017-09-28
re.compile(r'\b(?P<day>1|2|3|4|5|6|7|8|9|01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31)[\-\u2010\u2014\u2013\.\_\/\s]{1,2}(?P<month>1|2|3|4|5|6|7|8|9|01|02|03|04|05|06|07|08|09|10|11|12)[\-\u2010\u2014\u2013\.\_\/\s]{1,2}(?P<year>199\d|200\d|201\d)\b'), # 28-9-2017
re.compile(r'\b(?P<month>1|2|3|4|5|6|7|8|9|01|02|03|04|05|06|07|08|09|10|11|12)[\-\u2010\u2014\u2013\.\_\/\s]{1,2}(?P<day>1|2|3|4|5|6|7|8|9|01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31)[\-\u2010\u2014\u2013\.\_\/\s]{1,2}(?P<year>199\d|200\d|201\d)\b'), # 9-28-2017
re.compile(r'\b(?P<year>199\d|200\d|201\d)[\-\u2010\u2014\u2013\.\_\/\s]{1,2}(?P<month>1|2|3|4|5|6|7|8|9|01|02|03|04|05|06|07|08|09|10|11|12)\b'), # 2017-09
re.compile(r'\b(?P<month>1|2|3|4|5|6|7|8|9|01|02|03|04|05|06|07|08|09|10|11|12)[\-\u2010\u2014\u2013\.\_\/\s]{1,2}(?P<year>199\d|200\d|201\d)\b'), # 9-2017
re.compile(r'\b(?P<month>1|2|3|4|5|6|7|8|9|01|02|03|04|05|06|07|08|09|10|11|12)[\-\u2010\u2014\u2013\.\_\/\s]{1,2}(?P<day>1|2|3|4|5|6|7|8|9|01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31)[\-\u2010\u2014\u2013\.\_\/\s]{1,2}(?P<year>9\d|0\d|1\d)\b'), # 9-28-17
re.compile(r'\b(?P<year>9\d|0\d|1\d)[\-\u2010\u2014\u2013\.\_\/\s]{1,2}(?P<month>1|2|3|4|5|6|7|8|9|01|02|03|04|05|06|07|08|09|10|11|12)[\-\u2010\u2014\u2013\.\_\/\s]{1,2}(?P<day>1|2|3|4|5|6|7|8|9|01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31)\b'), # 17-9-28
re.compile(r'\b(?P<day>1|2|3|4|5|6|7|8|9|01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31)[\-\u2010\u2014\u2013\.\_\/\s]{1,2}(?P<month>1|2|3|4|5|6|7|8|9|01|02|03|04|05|06|07|08|09|10|11|12)[\-\u2010\u2014\u2013\.\_\/\s]{1,2}(?P<year>9\d|0\d|1\d)\b'), # 28-9-17
re.compile(r'\b(?P<year>199\d|200\d|201\d)(?P<month>1|2|3|4|5|6|7|8|9|01|02|03|04|05|06|07|08|09|10|11|12)(?P<day>01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31)\b'), # 20170928, 2017928
re.compile(r'\b(?P<year>199\d|200\d|201\d)(?P<day>01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31)(?P<month>01|02|03|04|05|06|07|08|09|10|11|12)\b'), # 20172809
re.compile(r'\b(?P<day>01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31)(?P<month>01|02|03|04|05|06|07|08|09|10|11|12)(?P<year>199\d|200\d|201\d)\b'), # 28092017
re.compile(r'\b(?P<month>1|2|3|4|5|6|7|8|9|01|02|03|04|05|06|07|08|09|10|11|12)(?P<day>01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31)(?P<year>199\d|200\d|201\d)\b'), # 09282017, 9282017
re.compile(r'\b(?P<day>01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31)(?P<month>01|02|03|04|05|06|07|08|09|10|11|12)(?P<year>90|91|92|93|94|95|96|97|98|99|00|01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18)\b'), # 280917
re.compile(r'\b(?P<month>01|02|03|04|05|06|07|08|09|10|11|12)(?P<day>01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31)(?P<year>90|91|92|93|94|95|96|97|98|99|00|01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18)\b'), # 092817
re.compile(r'\b(?P<year>90|91|92|93|94|95|96|97|98|99|00|01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18)(?P<month>01|02|03|04|05|06|07|08|09|10|11|12)(?P<day>01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31)\b'), # 170928
re.compile(r'\b(?P<year>90|91|92|93|94|95|96|97|98|99|00|01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18)(?P<day>01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31)(?P<month>01|02|03|04|05|06|07|08|09|10|11|12)\b'), # 172809
re.compile(r'\b(?P<year>199\d|200\d|201\d)[\-\_\u2010\u2014\u2013](?P<month>1|2|3|4|5|6|7|8|9|01|02|03|04|05|06|07|08|09|10|11|12)(?P<day>01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31)\b'), # 2017-928
re.compile(r'[^\d](?P<year>199\d|200\d|201\d)[\-\u2010\u2014\u2013\.\_\/\s]{0,8}(?P<month>jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec|january|february|march|april|may|june|july|august|september|october|november|december|sept)[\-\u2010\u2014\u2013\.\_\/\s]{0,8}(?P<day>1|2|3|4|5|6|7|8|9|01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31)[^\d]', flags=re.I), # 2017 Sep 28
re.compile(r'[^\d](?P<day>1|2|3|4|5|6|7|8|9|01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31)(st|nd|rd|th)?[\-\u2010\u2014\u2013\.\_\/\s]{0,8}(?P<month>jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec|january|february|march|april|may|june|july|august|september|october|november|december|sept)[\-\u2010\u2014\u2013\.\_\/\s]{0,8}(?P<year>199\d|200\d|201\d)[^\d]', flags=re.I), # 28 Sep 2017
re.compile(r'[^\d](?P<day>1|2|3|4|5|6|7|8|9|01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31)(st|nd|rd|th)?[\-\u2010\u2014\u2013\.\_\/\s]{0,8}(?P<month>jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec|january|february|march|april|may|june|july|august|september|october|november|december|sept)[\-\u2010\u2014\u2013\.\_\/\s]{0,8}(?P<year>9\d|0\d|1\d)[^\d]', flags=re.I), # 28 Sep 17
re.compile(r'[^\d](?P<month>jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec|january|february|march|april|may|june|july|august|september|october|november|december|sept)[\-\u2010\u2014\u2013\.\_\/\s]{0,8}(?P<day>1|2|3|4|5|6|7|8|9|01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31)(st|nd|rd|th)?[\-\u2010\u2014\u2013\.\_\/\s]{1,8}(?P<year>199\d|200\d|201\d)[^\d]', flags=re.I), # Sep 28 2017
re.compile(r'[^\d](?P<month>jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec|january|february|march|april|may|june|july|august|september|october|november|december|sept)[\-\u2010\u2014\u2013\.\_\/\s]{0,8}(?P<day>1|2|3|4|5|6|7|8|9|01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31)(st|nd|rd|th)?[\-\u2010\u2014\u2013\.\_\/\s]{1,8}(?P<year>9\d|0\d|1\d)[^\d]', flags=re.I), # Sep 28 17
re.compile(r'[^\d](?P<month>jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec|january|february|march|april|may|june|july|august|september|october|november|december|sept)[\-\u2010\u2014\u2013\.\_\/\s]{0,8}(?P<year>199\d|200\d|201\d)[^\d]', flags=re.I), # Sep 2017
re.compile(r'[^\d](?P<month>jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec|january|february|march|april|may|june|july|august|september|october|november|december|sept)[\-\u2010\u2014\u2013\.\_\/\s]{0,8}(?P<year>9\d|0\d|1\d)[^\d]', flags=re.I), # Sep 17
re.compile(r'[^\d](?P<year>199\d|200\d|201\d)[\-\u2010\u2014\u2013\.\_\/\s]{0,8}(?P<month>jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec|january|february|march|april|may|june|july|august|september|october|november|december|sept)[^\d]', flags=re.I), # 2017 Sep
re.compile(r'\b(?P<month>01|02|03|04|05|06|07|08|09|10|11|12)(?P<year>9\d|0\d|1\d)\b'), # 0817
re.compile(r'\b(?P<year>199\d|200\d|201\d)\b'), # 2017
None,
re.compile(r'[^\d](?P<year>199\d|200\d|201\d)[\-\u2010\u2014\u2013\.\_\/\s](?P<month>1|2|3|4|5|6|7|8|9|01|02|03|04|05|06|07|08|09|10|11|12)[\-\u2010\u2014\u2013\.\_\/\s](?P<day>1|2|3|4|5|6|7|8|9|01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31)[^\d]'), # 2017-09-28
re.compile(r'[^\d](?P<day>1|2|3|4|5|6|7|8|9|01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31)[\-\u2010\u2014\u2013\.\_\/\s](?P<month>1|2|3|4|5|6|7|8|9|01|02|03|04|05|06|07|08|09|10|11|12)[\-\u2010\u2014\u2013\.\_\/\s](?P<year>199\d|200\d|201\d)[^\d]'), # 28-9-2017
re.compile(r'[^\d](?P<month>1|2|3|4|5|6|7|8|9|01|02|03|04|05|06|07|08|09|10|11|12)[\-\u2010\u2014\u2013\.\_\/\s](?P<day>1|2|3|4|5|6|7|8|9|01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31)[\-\u2010\u2014\u2013\.\_\/\s](?P<year>199\d|200\d|201\d)[^\d]'), # 9-28-2017
re.compile(r'[^\d](?P<year>199\d|200\d|201\d)[\-\u2010\u2014\u2013\.\_\/\s]{1,2}(?P<month>1|2|3|4|5|6|7|8|9|01|02|03|04|05|06|07|08|09|10|11|12)[^\d]'), # 2017-09
re.compile(r'[^\d](?P<month>1|2|3|4|5|6|7|8|9|01|02|03|04|05|06|07|08|09|10|11|12)[\-\u2010\u2014\u2013\.\_\/\s]{1,2}(?P<year>199\d|200\d|201\d)[^\d]'), # 9-2017
re.compile(r'[^\d](?P<year>9\d|0\d|1\d)[\-\u2010\u2014\u2013\.\_\/\s](?P<month>1|2|3|4|5|6|7|8|9|01|02|03|04|05|06|07|08|09|10|11|12)[\-\u2010\u2014\u2013\.\_\/\s](?P<day>1|2|3|4|5|6|7|8|9|01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31)[^\d]'), # 17-9-28
re.compile(r'[^\d](?P<day>1|2|3|4|5|6|7|8|9|01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31)[\-\u2010\u2014\u2013\.\_\/\s](?P<month>1|2|3|4|5|6|7|8|9|01|02|03|04|05|06|07|08|09|10|11|12)[\-\u2010\u2014\u2013\.\_\/\s](?P<year>9\d|0\d|1\d)[^\d]'), # 28-9-2017
re.compile(r'[^\d](?P<month>1|2|3|4|5|6|7|8|9|01|02|03|04|05|06|07|08|09|10|11|12)[\-\u2010\u2014\u2013\.\_\/\s](?P<day>1|2|3|4|5|6|7|8|9|01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31)[\-\u2010\u2014\u2013\.\_\/\s](?P<year>9\d|0\d|1\d)[^\d]'), # 9-28-2017
re.compile(r'[^\d](?P<day>01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31)(?P<month>01|02|03|04|05|06|07|08|09|10|11|12)(?P<year>199\d|200\d|201\d)[^\d]'), # 28092017
re.compile(r'[^\d](?P<month>1|2|3|4|5|6|7|8|9|01|02|03|04|05|06|07|08|09|10|11|12)(?P<day>01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31)(?P<year>199\d|200\d|201\d)[^\d]'), # 09282017, 9282017
re.compile(r'[^\d](?P<year>199\d|200\d|201\d)(?P<month>1|2|3|4|5|6|7|8|9|01|02|03|04|05|06|07|08|09|10|11|12)(?P<day>01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31)[^\d]'), # 20170928, 2017928
re.compile(r'[^\d](?P<year>199\d|200\d|201\d)(?P<day>01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31)(?P<month>01|02|03|04|05|06|07|08|09|10|11|12)[^\d]'), # 20172809
re.compile(r'[^\d](?P<day>01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31)(?P<month>01|02|03|04|05|06|07|08|09|10|11|12)(?P<year>9\d|0\d|1\d)[^\d]'), # 280917
re.compile(r'[^\d](?P<month>01|02|03|04|05|06|07|08|09|10|11|12)(?P<day>01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31)(?P<year>9\d|0\d|1\d)[^\d]'), # 092817
re.compile(r'[^\d](?P<year>9\d|0\d|1\d)(?P<month>01|02|03|04|05|06|07|08|09|10|11|12)(?P<day>01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31)[^\d]'), # 170928
re.compile(r'[^\d](?P<year>9\d|0\d|1\d)(?P<day>01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31)(?P<month>01|02|03|04|05|06|07|08|09|10|11|12)[^\d]'), # 172809
re.compile(r'[^\d](?P<year>199\d|200\d|201\d)[\-\u2010\u2014\u2013](?P<month>1|2|3|4|5|6|7|8|9|01|02|03|04|05|06|07|08|09|10|11|12)(?P<day>01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31)[^\d]'), # 2017-928
re.compile(r'(\'|ya \')(?P<year>199\d|200\d|201\d)\b', flags=re.I), # '2017
re.compile(r'(\'|ya \')(?P<year>9\d|0\d|1\d)\b', flags=re.I), # '17
re.compile(r'(ya|ay)\s*(?P<year>199\d|200\d|201\d)\b', flags=re.I), # YA2017
re.compile(r'(ya|ay)\s*(?P<year>9\d|0\d|1\d)\b', flags=re.I), # YA17, YA 17
re.compile(r'[^\d](?P<month>01|02|03|04|05|06|07|08|09|10|11|12)(?P<year>9\d|0\d|1\d)[^\d]'), # 0817
re.compile(r'[^\d](?P<year>199\d|200\d|201\d)[^\d]'), # 2017
]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment