Skip to content

Instantly share code, notes, and snippets.

@normanlmfung
Last active March 30, 2024 21:06
Show Gist options
  • Save normanlmfung/657689444a1ebb88cf847cd192b1da96 to your computer and use it in GitHub Desktop.
Save normanlmfung/657689444a1ebb88cf847cd192b1da96 to your computer and use it in GitHub Desktop.
python_pandas_syntax
import pandas as pd
import numpy as np
from datetime import datetime
from tabulate import tabulate
# Illusttration 1. Load some dummy data
pd_total_equity = None
strategies = [ 'trend_following', 'statsarb', 'triarb' ]
strategies_total_equities = {}
for strategy in strategies:
strategy_total_equity_file = f'./data/{strategy}_total_equity.json'
pd_strategy_total_equity = pd.read_json('./data/trend_following_total_equity.json')
pd_strategy_total_equity['book'] = strategy
pd_strategy_total_equity['strategy_family'] = 'CTA' if 'arb' not in strategy else 'Arbitrage'
pd_strategy_total_equity['datetime'] = pd.to_datetime(pd_strategy_total_equity['datetime'], format='%Y%m%d')
# Make it easy to filter/group/aggregate
pd_strategy_total_equity['year'] = pd_strategy_total_equity['datetime'].dt.year
pd_strategy_total_equity['month'] = pd_strategy_total_equity['datetime'].dt.month
pd_strategy_total_equity['day'] = pd_strategy_total_equity['datetime'].dt.day
pd_strategy_total_equity['tm1_total_equity'] = pd_strategy_total_equity['total_equity'].shift(-1)
pd_strategy_total_equity['total_equity_chg'] = pd_strategy_total_equity['total_equity'].diff()
pd_strategy_total_equity['monthly_return'] = pd_strategy_total_equity['total_equity'].pct_change()
pd_strategy_total_equity['cumulative_interval_return'] = (1 + pd_strategy_total_equity['monthly_return']).cumprod()
# pd_strategy_total_equity = pd_strategy_total_equity[~pd_strategy_total_equity.monthly_return.isna()]
strategies_total_equities[strategy] = pd_strategy_total_equity
pd_total_equity = pd.concat(strategies_total_equities.values(), axis=0)
pd_total_equity.set_index(['book','datetime'], inplace=True)
pd_total_equity
# Illusttration 2. Calculate TimeSeries data - Pandas fast with vectorized operations like these!
SLIDING_WINDOW_SIZE : int = int(365/12)
pd_total_equity['mean_ema_monthly_return'] = pd_total_equity['monthly_return'].ewm(span=SLIDING_WINDOW_SIZE, adjust=False).mean()
pd_total_equity['mean_sma_monthly_return'] = pd_total_equity['monthly_return'].rolling(window = SLIDING_WINDOW_SIZE).mean()
pd_total_equity['std_ema_monthly_return'] = pd_total_equity['monthly_return'].ewm(span=SLIDING_WINDOW_SIZE, adjust=False).std()
pd_total_equity['std_sma_monthly_return'] = pd_total_equity['monthly_return'].rolling(window = SLIDING_WINDOW_SIZE).std()
pd_total_equity['zscore_monthly_return'] = (pd_total_equity['monthly_return'] - pd_total_equity['mean_ema_monthly_return'])/pd_total_equity['std_ema_monthly_return']
pd_total_equity
'''
Illusttration 3. Basic filtering
& Logical AND
| Logical OR
~ Negate NOT
'''
# pd_total_equity[pd_total_equity.strategy_family.str.contains('arb', case=False)]
# pd_total_equity[(pd_total_equity.monthly_return>=0.0025) | (pd_total_equity.monthly_return<=0.0005)]
# pd_total_equity[(pd_total_equity.monthly_return>=0.0025) & (pd_total_equity.year==2022)]
# isnull/isna, same, you're looking for null values
pd_total_equity[~pd_total_equity.zscore_monthly_return.isnull()]
pd_total_equity[~pd_total_equity.zscore_monthly_return.isna()]
# Illusttration 4. Filtering by Index columns
# pd_total_equity.xs("trend_following") # This will give you a DataFrame with 'datetime' as index. And 'total_equity'... and all the other columns.
# pd_total_equity.xs("trend_following").xs(datetime(2021,8,1)) # This will give you one row
pd_total_equity.loc[(slice(None), datetime(2021, 8, 1)), :] # If you dont want to filter by index 'book' and only filter by 'datetime'
# pd_total_equity.loc['trend_following'][pd_total_equity.loc['trend_following'].index.get_level_values('datetime').year == 2021]
# Illusttration 5. Sort
pd_total_equity.sort_values(by=['monthly_return', 'turnover'], ascending=[False, True], inplace=False)
# Illusttration 6. NaN Handling
pd_total_equity['monthly_return'] = pd_total_equity['monthly_return'].fillna(0) # Does not get rid of math.inf, but does replaces math.nan and np.nan
pd_total_equity['monthly_return'] = pd_total_equity['monthly_return'].replace([np.inf, -np.inf], -1)
pd_total_equity[pd_total_equity.monthly_return.isna()]
# Illusttration 7. Lambda transform
def _monthly_return_to_buckets(monthly_return):
if monthly_return>=0 and monthly_return<0.0010:
return "0-10bps"
elif monthly_return>=0.0010 and monthly_return<0.0025:
return "10-25bps"
elif monthly_return>=0.0025 and monthly_return<0.0050:
return "25-50bps"
elif monthly_return>=0.0050 and monthly_return<0.0075:
return "50-75bps"
elif monthly_return>=0.0075 and monthly_return<0.0010:
return "75-100bps"
else:
return ">100bps"
# Two ways to "apply", and achieve same result:
pd_total_equity['monthly_return_bucket'] = pd_total_equity['monthly_return'].apply(lambda monthly_return : _monthly_return_to_buckets(monthly_return))
pd_total_equity['monthly_return_bucket'] = pd_total_equity.apply(lambda rw : _monthly_return_to_buckets(rw['monthly_return']), axis=1)
pd_total_equity
# Illusttration 8. Aggregation
pd_total_equity.groupby(['book', 'year', 'monthly_return_bucket']).agg({'turnover' : 'sum', 'day' : 'count'})
# Illusttration 9. Add one row to data frame
new_row = pd.DataFrame(
[
{
"book": "basis_calendar_spread",
"datetime": datetime(2024,4,1),
"total_equity": 1800,
"turnover": 1000000
}
]
)
pd_total_equity.reset_index(drop=True, inplace=True) # Drop index first, flatten the data frame..
num_rows_before = pd_total_equity.shape[0]
pd_total_equity = pd.concat([pd_total_equity, new_row], axis=0, ignore_index=True)
pd_total_equity.set_index(['book','datetime'], inplace=True) # Add back index
num_rows_after = pd_total_equity.shape[0]
assert(num_rows_before+1==num_rows_after)
pd_total_equity.iloc[-1]
# Illustration 10: Modify a field of a given row, avoiding: "A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead."
# pd_total_equity[pd_total_equity.total_equity==2000]['turnover']=12345 # Warning: "A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead."
pd_total_equity.loc[("basis_calendar_spread", datetime(2024, 4, 1)), 'turnover'] = 98765
pd_total_equity.iloc[-1]
# Illustration 11: Column to ndarray and to simple list
dates = pd_total_equity['monthly_return'].values
print(type(dates)) # numpy.ndarray
dates = pd_total_equity['monthly_return'].tolist()
print(type(dates)) # list
# Illustration 12: Basic loop
for row_index, row in pd_total_equity.iterrows():
print(f"{row_index}: {row['total_equity']}")
# Illustration 13: Pretty print your pandas
pd_total_equity.reset_index(drop=True, inplace=True)
print(f"{tabulate(pd_total_equity)}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment