Skip to content

Instantly share code, notes, and snippets.

@shreyas90999
Created October 6, 2020 03:51
Show Gist options
  • Save shreyas90999/c1ad82074da017cee9e04fe843af3298 to your computer and use it in GitHub Desktop.
Save shreyas90999/c1ad82074da017cee9e04fe843af3298 to your computer and use it in GitHub Desktop.
stats
# ref - https://www.kaggle.com/gspmoreira/cnn-glove-single-model-private-lb-0-41117-35th
def generate_cbs_stats(train,test):
df_group = train.groupby('cat_brand_ship',as_index = False).agg({"shipping" : len,
"log_price" : [np.median, np.mean, np.std,np.min,np.max]})
df_group.columns = ['cat_brand_ship','cbs_count','cbs_log_price_median','cbs_log_price_mean','cbs_log_price_std',
'cbs_log_price_min','cbs_log_price_max']
df_group['cbs_log_price_std'] = df_group['cbs_log_price_std'].fillna(0)
df_group['cbs_log_price_conf_variance'] = df_group['cbs_log_price_std'] / df_group['cbs_log_price_mean']
df_group['cbs_log_count'] = np.log1p(df_group['cbs_count'])
df_group['cbs_min_expected_log_price'] = (df_group['cbs_log_price_mean'] - (df_group['cbs_log_price_std']*2)).clip(lower=1.0)
df_group['cbs_max_expected_log_price'] = (df_group['cbs_log_price_mean'] + (df_group['cbs_log_price_std']*2))
df_group_stats = test.merge(df_group.reset_index(),
how = 'left',
on = 'cat_brand_ship')[['cbs_log_count',
'cbs_log_price_mean',
'cbs_log_price_std',
'cbs_log_price_conf_variance',
'cbs_min_expected_log_price',
'cbs_max_expected_log_price',
'cbs_log_price_min',
'cbs_log_price_max']].fillna(0).values
scaler = StandardScaler(copy=True)
cbs_feats_scaled = scaler.fit_transform(df_group_stats)
return cbs_feats_scaled
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment