>>> import pandas as pd
>>>
>>> df = pd.DataFrame({'A': range(1, 6)}, index=pd.date_range('2018-12-28', peri
ods=5))
>>>
>>> df2 = pd.DataFrame({'B': range(1, 13, 2)}, index=pd.date_range('2018-12-1',
periods=6))
>>>
>>> df
A
2018-12-28 1
2018-12-29 2
2018-12-30 3
2018-12-31 4
2019-01-01 5
>>>
>>> df2
B
2018-12-01 1
2018-12-02 3
2018-12-03 5
2018-12-04 7
2018-12-05 9
2018-12-06 11
>>>
>>> df3 = pd.DataFrame({'items_sold': range(1, 19, 3)}, index=pd.date_range('2018-12-1', periods=6))
>>>
>>> df3
items_sold
2018-12-01 1
2018-12-02 4
2018-12-03 7
2018-12-04 10
2018-12-05 13
2018-12-06 16
>>>
>>> df3.resample("3D")
DatetimeIndexResampler [freq=<3 * Days>, axis=0, closed=left, label=left, conven
tion=start, base=0]
>>>
>>> df3.resample("3D").pipe(lambda d: d.max() - d.min())
items_sold
2018-12-01 6
2018-12-04 6
>>>
>>> df3.resample('3D').agg(['sum'])
items_sold
sum
2018-12-01 12
2018-12-04 39
>>>
>>> df3.resample('3D').agg(['mean'])
items_sold
mean
2018-12-01 4
2018-12-04 13
>>>
>>> df3.resample('3D').agg(['std'])
items_sold
std
2018-12-01 3.0
2018-12-04 3.0
>>>
>>> df3.resample('3D').agg(['std', 'mean', 'sum'])
items_sold
std mean sum
2018-12-01 3.0 4 12
2018-12-04 3.0 13 39
>>>
>>> mu = 12 / 3
>>> mu
4.0
>>>
>>> (4 - 1) ** 2 + (4-4) + (4-7)
6
>>> (4 - 1) ** 2 + (4-4) + (4-7) ** 2
18
>>>
>>> 24
24
>>> pow(18/3, 1/2)
2.449489742783178
>>>
Note: the mean deviation is sometimes called the Mean Absolute Deviation (MAD) because it is the mean of the absolute deviations.
>>>
>>> df4 = pd.DataFrame({'a': [3, 6, 6, 7, 8, 11, 15, 16]})
>>> df4
a
0 3
1 6
2 6
3 7
4 8
5 11
6 15
7 16
>>>
>>> df4.mad()
a 3.75
dtype: float64
>>>
>>> df4.mad(axis=1) # mean absolute deviation
0 0.0
1 0.0
2 0.0
3 0.0
4 0.0
5 0.0
6 0.0
7 0.0
dtype: float64
>>>
>>>
>>> df5 = pd.DataFrame({'a': [3, 6, 6, 7, 8, 11, 15, 16, np.nan, np.nan]})
>>> df5
a
0 3.0
1 6.0
2 6.0
3 7.0
4 8.0
5 11.0
6 15.0
7 16.0
8 NaN
9 NaN
>>>
>>> df4.mad() # skipna=True
a 3.75
dtype: float64
>>>
>>> df4.mad(skipna=False)
a NaN
dtype: float64
>>>
>>> df4.fillna(0)
a
0 3.0
1 6.0
2 6.0
3 7.0
4 8.0
5 11.0
6 15.0
7 16.0
8 0.0
9 0.0
>>>
>>> df4
a
0 3.0
1 6.0
2 6.0
3 7.0
4 8.0
5 11.0
6 15.0
7 16.0
8 NaN
9 NaN
>>>
>>> df4.fillna(0, inplace=True)
>>> df4
a
0 3.0
1 6.0
2 6.0
3 7.0
4 8.0
5 11.0
6 15.0
7 16.0
8 0.0
9 0.0
>>>
>>> df4.mad(skipna=False)
a 4.24
dtype: float64
>>>
>>> df4['a'].sum()
72.0
>>>
>>> df4['a'].count()
10
>>> df4['a'].sum(skipna=False)
72.0
>>>
>>> df4['a'].mean()
7.2
>>>
>>> df4['a'].mean(skipna=False)
7.2
>>> df4['a'].mean(skipna=True)
7.2
>>> df4['a'].sum() / 10
7.2
>>> df4['a'].sum() / 8
9.0
>>> df4
a
0 3.0
1 6.0
2 6.0
3 7.0
4 8.0
5 11.0
6 15.0
7 16.0
8 0.0
9 0.0
>>>
>>> df4
a
0 3.0
1 6.0
2 6.0
3 7.0
4 8.0
5 11.0
6 15.0
7 16.0
8 0.0
9 0.0
>>>
>>> df5.mean()
a 9.0
dtype: float64
>>>
>>> df5.mean(skipna=False)
a NaN
dtype: float64
>>>
>>> df5.fillna(0)
a
0 3.0
1 6.0
2 6.0
3 7.0
4 8.0
5 11.0
6 15.0
7 16.0
8 0.0
9 0.0
>>>
>>> df5.fillna(0, inplace=True)
>>> df5.mean()
a 7.2
dtype: float64
>>>