Created
July 7, 2021 15:56
-
-
Save dutc/ed023acd490e08877f3c6d3dacd265df to your computer and use it in GitHub Desktop.
“Python Expert” Newsletter (July 7, 2021): Learning Corner
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
from functools import total_ordering | |
from dataclasses import dataclass | |
from numpy import tile, repeat | |
from numpy.random import default_rng | |
from pandas import DataFrame, date_range, Timestamp | |
from pandas.core.dtypes.common import is_numeric_dtype | |
from random import seed | |
from string import ascii_lowercase | |
from numpy import number, int64 | |
from inspect import getfile | |
@dataclass | |
class Dummy: | |
value : object | |
@dataclass | |
@total_ordering | |
class Comparable: | |
value : object | |
def __lt__(self, other): | |
return self.value < other.value | |
@dataclass | |
class AlmostNumeric(int64): | |
value : object | |
def __add__(self, other): | |
return AlmostNumeric(self.value + other.value) | |
def __truediv__(self, other): | |
return AlmostNumeric(self.value / other.value) | |
assert is_numeric_dtype(AlmostNumeric) | |
if __name__ == '__main__': | |
rng = default_rng(s := Timestamp('2021-07-04').asm8.astype('uint32')) | |
seed(s) | |
tickers = rng.choice([*ascii_lowercase], size=(5, 4)).view('<U4').ravel() | |
dates = date_range('2021-07-04', periods=4) | |
df = DataFrame({ | |
'date': repeat(dates, len(tickers)), | |
'ticker': tile(tickers, len(dates)), | |
'price': tile( | |
rng.normal(loc=100, scale=50, size=len(tickers)).clip(10), | |
len(dates) | |
) + rng.normal(scale=5, size=(len(dates), len(tickers))).cumsum(axis=0).ravel(), | |
'volume': rng.integers(0, 1_000, size=len(tickers) * len(dates)), | |
'signal': rng.normal(size=len(tickers) * len(dates)), | |
'flag': rng.choice([True, False], size=len(tickers) * len(dates)), | |
}).set_index(['date', 'ticker']).sort_index() | |
print( | |
df.groupby('ticker').max(), | |
df.groupby('ticker').min(), | |
# operates only on comparable columns | |
df.pipe(lambda df: df.assign(signal=df['signal'].apply(Dummy))).groupby('ticker').max().columns, | |
df.pipe(lambda df: df.assign(signal=df['signal'].apply(Comparable))).groupby('ticker').max().columns, | |
# operates on numeric columns (`bool` is a numeric type) | |
df.groupby('ticker').sum(), | |
df.groupby('ticker').prod(), | |
df.groupby('ticker').mean(), | |
df.groupby('ticker').median(), | |
df.groupby('ticker').std(), | |
df.groupby('ticker').var(), | |
df.groupby('ticker').skew(), | |
# operates only on numeric columns | |
df.pipe(lambda df: df.assign(signal=df['signal'].apply(Dummy))).groupby('ticker').mean().columns, | |
df.pipe(lambda df: df.assign(signal=df['signal'].apply(AlmostNumeric))).groupby('ticker').mean().columns, | |
df.groupby('ticker').cumsum(), | |
df.groupby('ticker').cumprod(), | |
df.groupby('ticker').cummin(), | |
df.groupby('ticker').cummax(), | |
df.groupby('ticker').count(), | |
df.groupby('ticker').cumcount(), | |
df.groupby('ticker').first(), | |
df.groupby('ticker').last(), | |
df.groupby('ticker').nth(1), | |
df.groupby('ticker').nth(-2), | |
df.groupby('ticker').rank(), | |
df.pipe(lambda x: x[x.columns.difference({'flag'})]) | |
.groupby('ticker').pct_change(), | |
sep=f'\n{"-" * 78}\n', | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
For the full write-up and discussion, sign up for the “Python Expert” newsletter!
bit.ly/expert-python