Last active
January 20, 2022 03:09
-
-
Save natbusa/198c1f120938ece3fa3a8134e0293b6e to your computer and use it in GitHub Desktop.
Approximate compare of DateOffset strings in pandas
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Comparing pandas DateOffset strings directly one another is not available | |
# as a method of the DateOffset class in python. The reason is straightforward: | |
# offsets are "timestamp context" dependent. | |
# For example 1M can be either 28D, 29D, 30D, or 31D so there is no way to compare | |
# *exactly * DateOffset strings with each other out of context. | |
# To alleviate this, here below you can find a reference implementation | |
# for an approximate comparison assuming months of 30 days, and years of 365 days. | |
def alias_to_days(x): | |
return { | |
"B": 1, | |
"D": 1, | |
"W": 7, | |
"M": 30, | |
"SM": 15, | |
"BM": 30, | |
"MS": 30, | |
"SMS": 15, | |
"BMS": 30, | |
"Q": 90, | |
"BQ": 90, | |
"QS": 90, | |
"BQS": 90, | |
"A": 365, | |
"Y": 365, | |
"BA": 365, | |
"BY": 365, | |
"AS": 365, | |
"YS": 365, | |
"BAS": 365, | |
"BYS": 365, | |
}.get(x, None) | |
def alias_to_seconds(x): | |
return { | |
"BH": 3600, | |
"H": 3600, | |
"T": 60, | |
"min": 60, | |
"S": 1, | |
}.get(x) | |
def alias_to_int(x): | |
n= to_days(x) | |
if n is not None: | |
return n * 86400 | |
n= to_seconds(x) | |
if n is not None: | |
return n | |
raise ValueError(f'{x} is not a valid/supported string offset') | |
import re | |
def offset_to_int(x): | |
i = 0 | |
first=r"^(\d*)([a-zA-Z]+)(-[a-zA-Z]+)?(.*)$" | |
g = re.match(first,x).groups() | |
i += (int(g[0]) or 1)* alias_to_int(g[1]) | |
# handle concatenated datetime offset strings | |
if g[3]: | |
block=r"(\d+)([a-zA-Z]+)(-[a-zA-Z]+)?" | |
for e in re.findall(block,g[3]): | |
i += (int(e[0]) or 1)* alias_to_int(e[1]) | |
return i | |
def compare_offsets(a, b): | |
if a==b: | |
return 0 | |
a = offset_to_int(a) | |
b = offset_to_int(b) | |
if a==b: | |
return 0 | |
elif a>b: | |
return 1 | |
else: | |
return -1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
>> compare_offsets('72H30T', '3D') | |
1 | |
>> compare_offsets('72H', '3D') | |
0 | |
>> compare_offsets('2B12BH', '3D') | |
-1 | |
offset_to_int('2QS-JUN12W30T-MON69S1BH') | |
22815069 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment