Created
December 25, 2018 05:52
-
-
Save skylander86/4613a1f3ff936bd4e6ad3cab2684fd2d to your computer and use it in GitHub Desktop.
Regular expressions for capturing dates
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
DATE_REGEXES = [ | |
re.compile(r'\b(?P<year>199\d|200\d|201\d)[\-\u2010\u2014\u2013\.\_\/\s]{1,2}(?P<month>1|2|3|4|5|6|7|8|9|01|02|03|04|05|06|07|08|09|10|11|12)[\-\u2010\u2014\u2013\.\_\/\s]{1,2}(?P<day>1|2|3|4|5|6|7|8|9|01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31)\b'), # 2017-09-28 | |
re.compile(r'\b(?P<day>1|2|3|4|5|6|7|8|9|01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31)[\-\u2010\u2014\u2013\.\_\/\s]{1,2}(?P<month>1|2|3|4|5|6|7|8|9|01|02|03|04|05|06|07|08|09|10|11|12)[\-\u2010\u2014\u2013\.\_\/\s]{1,2}(?P<year>199\d|200\d|201\d)\b'), # 28-9-2017 | |
re.compile(r'\b(?P<month>1|2|3|4|5|6|7|8|9|01|02|03|04|05|06|07|08|09|10|11|12)[\-\u2010\u2014\u2013\.\_\/\s]{1,2}(?P<day>1|2|3|4|5|6|7|8|9|01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31)[\-\u2010\u2014\u2013\.\_\/\s]{1,2}(?P<year>199\d|200\d|201\d)\b'), # 9-28-2017 | |
re.compile(r'\b(?P<year>199\d|200\d|201\d)[\-\u2010\u2014\u2013\.\_\/\s]{1,2}(?P<month>1|2|3|4|5|6|7|8|9|01|02|03|04|05|06|07|08|09|10|11|12)\b'), # 2017-09 | |
re.compile(r'\b(?P<month>1|2|3|4|5|6|7|8|9|01|02|03|04|05|06|07|08|09|10|11|12)[\-\u2010\u2014\u2013\.\_\/\s]{1,2}(?P<year>199\d|200\d|201\d)\b'), # 9-2017 | |
re.compile(r'\b(?P<month>1|2|3|4|5|6|7|8|9|01|02|03|04|05|06|07|08|09|10|11|12)[\-\u2010\u2014\u2013\.\_\/\s]{1,2}(?P<day>1|2|3|4|5|6|7|8|9|01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31)[\-\u2010\u2014\u2013\.\_\/\s]{1,2}(?P<year>9\d|0\d|1\d)\b'), # 9-28-17 | |
re.compile(r'\b(?P<year>9\d|0\d|1\d)[\-\u2010\u2014\u2013\.\_\/\s]{1,2}(?P<month>1|2|3|4|5|6|7|8|9|01|02|03|04|05|06|07|08|09|10|11|12)[\-\u2010\u2014\u2013\.\_\/\s]{1,2}(?P<day>1|2|3|4|5|6|7|8|9|01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31)\b'), # 17-9-28 | |
re.compile(r'\b(?P<day>1|2|3|4|5|6|7|8|9|01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31)[\-\u2010\u2014\u2013\.\_\/\s]{1,2}(?P<month>1|2|3|4|5|6|7|8|9|01|02|03|04|05|06|07|08|09|10|11|12)[\-\u2010\u2014\u2013\.\_\/\s]{1,2}(?P<year>9\d|0\d|1\d)\b'), # 28-9-17 | |
re.compile(r'\b(?P<year>199\d|200\d|201\d)(?P<month>1|2|3|4|5|6|7|8|9|01|02|03|04|05|06|07|08|09|10|11|12)(?P<day>01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31)\b'), # 20170928, 2017928 | |
re.compile(r'\b(?P<year>199\d|200\d|201\d)(?P<day>01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31)(?P<month>01|02|03|04|05|06|07|08|09|10|11|12)\b'), # 20172809 | |
re.compile(r'\b(?P<day>01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31)(?P<month>01|02|03|04|05|06|07|08|09|10|11|12)(?P<year>199\d|200\d|201\d)\b'), # 28092017 | |
re.compile(r'\b(?P<month>1|2|3|4|5|6|7|8|9|01|02|03|04|05|06|07|08|09|10|11|12)(?P<day>01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31)(?P<year>199\d|200\d|201\d)\b'), # 09282017, 9282017 | |
re.compile(r'\b(?P<day>01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31)(?P<month>01|02|03|04|05|06|07|08|09|10|11|12)(?P<year>90|91|92|93|94|95|96|97|98|99|00|01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18)\b'), # 280917 | |
re.compile(r'\b(?P<month>01|02|03|04|05|06|07|08|09|10|11|12)(?P<day>01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31)(?P<year>90|91|92|93|94|95|96|97|98|99|00|01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18)\b'), # 092817 | |
re.compile(r'\b(?P<year>90|91|92|93|94|95|96|97|98|99|00|01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18)(?P<month>01|02|03|04|05|06|07|08|09|10|11|12)(?P<day>01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31)\b'), # 170928 | |
re.compile(r'\b(?P<year>90|91|92|93|94|95|96|97|98|99|00|01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18)(?P<day>01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31)(?P<month>01|02|03|04|05|06|07|08|09|10|11|12)\b'), # 172809 | |
re.compile(r'\b(?P<year>199\d|200\d|201\d)[\-\_\u2010\u2014\u2013](?P<month>1|2|3|4|5|6|7|8|9|01|02|03|04|05|06|07|08|09|10|11|12)(?P<day>01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31)\b'), # 2017-928 | |
re.compile(r'[^\d](?P<year>199\d|200\d|201\d)[\-\u2010\u2014\u2013\.\_\/\s]{0,8}(?P<month>jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec|january|february|march|april|may|june|july|august|september|october|november|december|sept)[\-\u2010\u2014\u2013\.\_\/\s]{0,8}(?P<day>1|2|3|4|5|6|7|8|9|01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31)[^\d]', flags=re.I), # 2017 Sep 28 | |
re.compile(r'[^\d](?P<day>1|2|3|4|5|6|7|8|9|01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31)(st|nd|rd|th)?[\-\u2010\u2014\u2013\.\_\/\s]{0,8}(?P<month>jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec|january|february|march|april|may|june|july|august|september|october|november|december|sept)[\-\u2010\u2014\u2013\.\_\/\s]{0,8}(?P<year>199\d|200\d|201\d)[^\d]', flags=re.I), # 28 Sep 2017 | |
re.compile(r'[^\d](?P<day>1|2|3|4|5|6|7|8|9|01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31)(st|nd|rd|th)?[\-\u2010\u2014\u2013\.\_\/\s]{0,8}(?P<month>jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec|january|february|march|april|may|june|july|august|september|october|november|december|sept)[\-\u2010\u2014\u2013\.\_\/\s]{0,8}(?P<year>9\d|0\d|1\d)[^\d]', flags=re.I), # 28 Sep 17 | |
re.compile(r'[^\d](?P<month>jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec|january|february|march|april|may|june|july|august|september|october|november|december|sept)[\-\u2010\u2014\u2013\.\_\/\s]{0,8}(?P<day>1|2|3|4|5|6|7|8|9|01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31)(st|nd|rd|th)?[\-\u2010\u2014\u2013\.\_\/\s]{1,8}(?P<year>199\d|200\d|201\d)[^\d]', flags=re.I), # Sep 28 2017 | |
re.compile(r'[^\d](?P<month>jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec|january|february|march|april|may|june|july|august|september|october|november|december|sept)[\-\u2010\u2014\u2013\.\_\/\s]{0,8}(?P<day>1|2|3|4|5|6|7|8|9|01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31)(st|nd|rd|th)?[\-\u2010\u2014\u2013\.\_\/\s]{1,8}(?P<year>9\d|0\d|1\d)[^\d]', flags=re.I), # Sep 28 17 | |
re.compile(r'[^\d](?P<month>jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec|january|february|march|april|may|june|july|august|september|october|november|december|sept)[\-\u2010\u2014\u2013\.\_\/\s]{0,8}(?P<year>199\d|200\d|201\d)[^\d]', flags=re.I), # Sep 2017 | |
re.compile(r'[^\d](?P<month>jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec|january|february|march|april|may|june|july|august|september|october|november|december|sept)[\-\u2010\u2014\u2013\.\_\/\s]{0,8}(?P<year>9\d|0\d|1\d)[^\d]', flags=re.I), # Sep 17 | |
re.compile(r'[^\d](?P<year>199\d|200\d|201\d)[\-\u2010\u2014\u2013\.\_\/\s]{0,8}(?P<month>jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec|january|february|march|april|may|june|july|august|september|october|november|december|sept)[^\d]', flags=re.I), # 2017 Sep | |
re.compile(r'\b(?P<month>01|02|03|04|05|06|07|08|09|10|11|12)(?P<year>9\d|0\d|1\d)\b'), # 0817 | |
re.compile(r'\b(?P<year>199\d|200\d|201\d)\b'), # 2017 | |
None, | |
re.compile(r'[^\d](?P<year>199\d|200\d|201\d)[\-\u2010\u2014\u2013\.\_\/\s](?P<month>1|2|3|4|5|6|7|8|9|01|02|03|04|05|06|07|08|09|10|11|12)[\-\u2010\u2014\u2013\.\_\/\s](?P<day>1|2|3|4|5|6|7|8|9|01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31)[^\d]'), # 2017-09-28 | |
re.compile(r'[^\d](?P<day>1|2|3|4|5|6|7|8|9|01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31)[\-\u2010\u2014\u2013\.\_\/\s](?P<month>1|2|3|4|5|6|7|8|9|01|02|03|04|05|06|07|08|09|10|11|12)[\-\u2010\u2014\u2013\.\_\/\s](?P<year>199\d|200\d|201\d)[^\d]'), # 28-9-2017 | |
re.compile(r'[^\d](?P<month>1|2|3|4|5|6|7|8|9|01|02|03|04|05|06|07|08|09|10|11|12)[\-\u2010\u2014\u2013\.\_\/\s](?P<day>1|2|3|4|5|6|7|8|9|01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31)[\-\u2010\u2014\u2013\.\_\/\s](?P<year>199\d|200\d|201\d)[^\d]'), # 9-28-2017 | |
re.compile(r'[^\d](?P<year>199\d|200\d|201\d)[\-\u2010\u2014\u2013\.\_\/\s]{1,2}(?P<month>1|2|3|4|5|6|7|8|9|01|02|03|04|05|06|07|08|09|10|11|12)[^\d]'), # 2017-09 | |
re.compile(r'[^\d](?P<month>1|2|3|4|5|6|7|8|9|01|02|03|04|05|06|07|08|09|10|11|12)[\-\u2010\u2014\u2013\.\_\/\s]{1,2}(?P<year>199\d|200\d|201\d)[^\d]'), # 9-2017 | |
re.compile(r'[^\d](?P<year>9\d|0\d|1\d)[\-\u2010\u2014\u2013\.\_\/\s](?P<month>1|2|3|4|5|6|7|8|9|01|02|03|04|05|06|07|08|09|10|11|12)[\-\u2010\u2014\u2013\.\_\/\s](?P<day>1|2|3|4|5|6|7|8|9|01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31)[^\d]'), # 17-9-28 | |
re.compile(r'[^\d](?P<day>1|2|3|4|5|6|7|8|9|01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31)[\-\u2010\u2014\u2013\.\_\/\s](?P<month>1|2|3|4|5|6|7|8|9|01|02|03|04|05|06|07|08|09|10|11|12)[\-\u2010\u2014\u2013\.\_\/\s](?P<year>9\d|0\d|1\d)[^\d]'), # 28-9-2017 | |
re.compile(r'[^\d](?P<month>1|2|3|4|5|6|7|8|9|01|02|03|04|05|06|07|08|09|10|11|12)[\-\u2010\u2014\u2013\.\_\/\s](?P<day>1|2|3|4|5|6|7|8|9|01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31)[\-\u2010\u2014\u2013\.\_\/\s](?P<year>9\d|0\d|1\d)[^\d]'), # 9-28-2017 | |
re.compile(r'[^\d](?P<day>01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31)(?P<month>01|02|03|04|05|06|07|08|09|10|11|12)(?P<year>199\d|200\d|201\d)[^\d]'), # 28092017 | |
re.compile(r'[^\d](?P<month>1|2|3|4|5|6|7|8|9|01|02|03|04|05|06|07|08|09|10|11|12)(?P<day>01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31)(?P<year>199\d|200\d|201\d)[^\d]'), # 09282017, 9282017 | |
re.compile(r'[^\d](?P<year>199\d|200\d|201\d)(?P<month>1|2|3|4|5|6|7|8|9|01|02|03|04|05|06|07|08|09|10|11|12)(?P<day>01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31)[^\d]'), # 20170928, 2017928 | |
re.compile(r'[^\d](?P<year>199\d|200\d|201\d)(?P<day>01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31)(?P<month>01|02|03|04|05|06|07|08|09|10|11|12)[^\d]'), # 20172809 | |
re.compile(r'[^\d](?P<day>01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31)(?P<month>01|02|03|04|05|06|07|08|09|10|11|12)(?P<year>9\d|0\d|1\d)[^\d]'), # 280917 | |
re.compile(r'[^\d](?P<month>01|02|03|04|05|06|07|08|09|10|11|12)(?P<day>01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31)(?P<year>9\d|0\d|1\d)[^\d]'), # 092817 | |
re.compile(r'[^\d](?P<year>9\d|0\d|1\d)(?P<month>01|02|03|04|05|06|07|08|09|10|11|12)(?P<day>01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31)[^\d]'), # 170928 | |
re.compile(r'[^\d](?P<year>9\d|0\d|1\d)(?P<day>01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31)(?P<month>01|02|03|04|05|06|07|08|09|10|11|12)[^\d]'), # 172809 | |
re.compile(r'[^\d](?P<year>199\d|200\d|201\d)[\-\u2010\u2014\u2013](?P<month>1|2|3|4|5|6|7|8|9|01|02|03|04|05|06|07|08|09|10|11|12)(?P<day>01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31)[^\d]'), # 2017-928 | |
re.compile(r'(\'|ya \')(?P<year>199\d|200\d|201\d)\b', flags=re.I), # '2017 | |
re.compile(r'(\'|ya \')(?P<year>9\d|0\d|1\d)\b', flags=re.I), # '17 | |
re.compile(r'(ya|ay)\s*(?P<year>199\d|200\d|201\d)\b', flags=re.I), # YA2017 | |
re.compile(r'(ya|ay)\s*(?P<year>9\d|0\d|1\d)\b', flags=re.I), # YA17, YA 17 | |
re.compile(r'[^\d](?P<month>01|02|03|04|05|06|07|08|09|10|11|12)(?P<year>9\d|0\d|1\d)[^\d]'), # 0817 | |
re.compile(r'[^\d](?P<year>199\d|200\d|201\d)[^\d]'), # 2017 | |
] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment