Last active
March 30, 2024 21:06
-
-
Save normanlmfung/657689444a1ebb88cf847cd192b1da96 to your computer and use it in GitHub Desktop.
python_pandas_syntax
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
from datetime import datetime | |
from tabulate import tabulate | |
# Illusttration 1. Load some dummy data | |
pd_total_equity = None | |
strategies = [ 'trend_following', 'statsarb', 'triarb' ] | |
strategies_total_equities = {} | |
for strategy in strategies: | |
strategy_total_equity_file = f'./data/{strategy}_total_equity.json' | |
pd_strategy_total_equity = pd.read_json('./data/trend_following_total_equity.json') | |
pd_strategy_total_equity['book'] = strategy | |
pd_strategy_total_equity['strategy_family'] = 'CTA' if 'arb' not in strategy else 'Arbitrage' | |
pd_strategy_total_equity['datetime'] = pd.to_datetime(pd_strategy_total_equity['datetime'], format='%Y%m%d') | |
# Make it easy to filter/group/aggregate | |
pd_strategy_total_equity['year'] = pd_strategy_total_equity['datetime'].dt.year | |
pd_strategy_total_equity['month'] = pd_strategy_total_equity['datetime'].dt.month | |
pd_strategy_total_equity['day'] = pd_strategy_total_equity['datetime'].dt.day | |
pd_strategy_total_equity['tm1_total_equity'] = pd_strategy_total_equity['total_equity'].shift(-1) | |
pd_strategy_total_equity['total_equity_chg'] = pd_strategy_total_equity['total_equity'].diff() | |
pd_strategy_total_equity['monthly_return'] = pd_strategy_total_equity['total_equity'].pct_change() | |
pd_strategy_total_equity['cumulative_interval_return'] = (1 + pd_strategy_total_equity['monthly_return']).cumprod() | |
# pd_strategy_total_equity = pd_strategy_total_equity[~pd_strategy_total_equity.monthly_return.isna()] | |
strategies_total_equities[strategy] = pd_strategy_total_equity | |
pd_total_equity = pd.concat(strategies_total_equities.values(), axis=0) | |
pd_total_equity.set_index(['book','datetime'], inplace=True) | |
pd_total_equity | |
# Illusttration 2. Calculate TimeSeries data - Pandas fast with vectorized operations like these! | |
SLIDING_WINDOW_SIZE : int = int(365/12) | |
pd_total_equity['mean_ema_monthly_return'] = pd_total_equity['monthly_return'].ewm(span=SLIDING_WINDOW_SIZE, adjust=False).mean() | |
pd_total_equity['mean_sma_monthly_return'] = pd_total_equity['monthly_return'].rolling(window = SLIDING_WINDOW_SIZE).mean() | |
pd_total_equity['std_ema_monthly_return'] = pd_total_equity['monthly_return'].ewm(span=SLIDING_WINDOW_SIZE, adjust=False).std() | |
pd_total_equity['std_sma_monthly_return'] = pd_total_equity['monthly_return'].rolling(window = SLIDING_WINDOW_SIZE).std() | |
pd_total_equity['zscore_monthly_return'] = (pd_total_equity['monthly_return'] - pd_total_equity['mean_ema_monthly_return'])/pd_total_equity['std_ema_monthly_return'] | |
pd_total_equity | |
''' | |
Illusttration 3. Basic filtering | |
& Logical AND | |
| Logical OR | |
~ Negate NOT | |
''' | |
# pd_total_equity[pd_total_equity.strategy_family.str.contains('arb', case=False)] | |
# pd_total_equity[(pd_total_equity.monthly_return>=0.0025) | (pd_total_equity.monthly_return<=0.0005)] | |
# pd_total_equity[(pd_total_equity.monthly_return>=0.0025) & (pd_total_equity.year==2022)] | |
# isnull/isna, same, you're looking for null values | |
pd_total_equity[~pd_total_equity.zscore_monthly_return.isnull()] | |
pd_total_equity[~pd_total_equity.zscore_monthly_return.isna()] | |
# Illusttration 4. Filtering by Index columns | |
# pd_total_equity.xs("trend_following") # This will give you a DataFrame with 'datetime' as index. And 'total_equity'... and all the other columns. | |
# pd_total_equity.xs("trend_following").xs(datetime(2021,8,1)) # This will give you one row | |
pd_total_equity.loc[(slice(None), datetime(2021, 8, 1)), :] # If you dont want to filter by index 'book' and only filter by 'datetime' | |
# pd_total_equity.loc['trend_following'][pd_total_equity.loc['trend_following'].index.get_level_values('datetime').year == 2021] | |
# Illusttration 5. Sort | |
pd_total_equity.sort_values(by=['monthly_return', 'turnover'], ascending=[False, True], inplace=False) | |
# Illusttration 6. NaN Handling | |
pd_total_equity['monthly_return'] = pd_total_equity['monthly_return'].fillna(0) # Does not get rid of math.inf, but does replaces math.nan and np.nan | |
pd_total_equity['monthly_return'] = pd_total_equity['monthly_return'].replace([np.inf, -np.inf], -1) | |
pd_total_equity[pd_total_equity.monthly_return.isna()] | |
# Illusttration 7. Lambda transform | |
def _monthly_return_to_buckets(monthly_return): | |
if monthly_return>=0 and monthly_return<0.0010: | |
return "0-10bps" | |
elif monthly_return>=0.0010 and monthly_return<0.0025: | |
return "10-25bps" | |
elif monthly_return>=0.0025 and monthly_return<0.0050: | |
return "25-50bps" | |
elif monthly_return>=0.0050 and monthly_return<0.0075: | |
return "50-75bps" | |
elif monthly_return>=0.0075 and monthly_return<0.0010: | |
return "75-100bps" | |
else: | |
return ">100bps" | |
# Two ways to "apply", and achieve same result: | |
pd_total_equity['monthly_return_bucket'] = pd_total_equity['monthly_return'].apply(lambda monthly_return : _monthly_return_to_buckets(monthly_return)) | |
pd_total_equity['monthly_return_bucket'] = pd_total_equity.apply(lambda rw : _monthly_return_to_buckets(rw['monthly_return']), axis=1) | |
pd_total_equity | |
# Illusttration 8. Aggregation | |
pd_total_equity.groupby(['book', 'year', 'monthly_return_bucket']).agg({'turnover' : 'sum', 'day' : 'count'}) | |
# Illusttration 9. Add one row to data frame | |
new_row = pd.DataFrame( | |
[ | |
{ | |
"book": "basis_calendar_spread", | |
"datetime": datetime(2024,4,1), | |
"total_equity": 1800, | |
"turnover": 1000000 | |
} | |
] | |
) | |
pd_total_equity.reset_index(drop=True, inplace=True) # Drop index first, flatten the data frame.. | |
num_rows_before = pd_total_equity.shape[0] | |
pd_total_equity = pd.concat([pd_total_equity, new_row], axis=0, ignore_index=True) | |
pd_total_equity.set_index(['book','datetime'], inplace=True) # Add back index | |
num_rows_after = pd_total_equity.shape[0] | |
assert(num_rows_before+1==num_rows_after) | |
pd_total_equity.iloc[-1] | |
# Illustration 10: Modify a field of a given row, avoiding: "A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead." | |
# pd_total_equity[pd_total_equity.total_equity==2000]['turnover']=12345 # Warning: "A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead." | |
pd_total_equity.loc[("basis_calendar_spread", datetime(2024, 4, 1)), 'turnover'] = 98765 | |
pd_total_equity.iloc[-1] | |
# Illustration 11: Column to ndarray and to simple list | |
dates = pd_total_equity['monthly_return'].values | |
print(type(dates)) # numpy.ndarray | |
dates = pd_total_equity['monthly_return'].tolist() | |
print(type(dates)) # list | |
# Illustration 12: Basic loop | |
for row_index, row in pd_total_equity.iterrows(): | |
print(f"{row_index}: {row['total_equity']}") | |
# Illustration 13: Pretty print your pandas | |
pd_total_equity.reset_index(drop=True, inplace=True) | |
print(f"{tabulate(pd_total_equity)}") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment