Skip to content

Instantly share code, notes, and snippets.

@joshlk
Created July 4, 2017 12:55
Show Gist options
  • Save joshlk/d084d7ccf98a3ee02d9dac64971c7e53 to your computer and use it in GitHub Desktop.
Save joshlk/d084d7ccf98a3ee02d9dac64971c7e53 to your computer and use it in GitHub Desktop.
Pandas rolling stats for time series non-unique data
import pandas as pd
def nunique_rolling_time_series(data_series, step_freqency, window_size, output_name=''):
"""
Calculate a rolling statistic of nunique of a time series. The input series has a DateTime index.
"""
data_series = data_series.sort_index()
min_date = data_series.index.min()
max_date = data_series.index.max()
date_steps = pd.date_range(min_date, max_date, freq=step_freqency, normalize=True)
date_steps.freq = pd.tseries.offsets.Day() # Change the offset so when +1 it does it in days
output = pd.Series(index=date_steps, name=output_name)
for i, e in enumerate(date_steps):
# Skip untill got a full window width
if i < window_size:
continue
window_start = date_steps[i-window_size]
window_end = e
data_window = data_series.loc[(window_start+1):window_end] # +1 As date slice is inclusive of both dates
n_unique = data_window.nunique()
output[i] = n_unique
return output
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment