Skip to content

Instantly share code, notes, and snippets.

View marnixkoops's full-sized avatar
❤️‍🔥

Marnix Koops marnixkoops

❤️‍🔥
View GitHub Profile
@marnixkoops
marnixkoops / conversion_likelihood.py
Created July 1, 2020 11:20
Cox Proportional Hazard Regression model (Survival Analysis)
"""
# [+] PROJECT INFO
# - Cox Proportional Hazard Regression model (Survival Analysis)
# - Main purpose is to predict post-session conversion likelihood on customer level (cookie_id)
#
# Owner: Marnix Koops / marnixkoops@gmail.com
"""
# ==================================================================================================
# [+] SETUP
@marnixkoops
marnixkoops / combine_csv.py
Created June 23, 2020 09:25
Read and combine multiple CSV's
df = pd.concat(
[
pd.read_csv(f)
for f in glob.glob("./folder/*.csv")
],
ignore_index=True,
)
@marnixkoops
marnixkoops / logging.py
Created April 1, 2020 09:15
Log to logfile and std out (print)
import logging
logging.basicConfig( # logging to terminal & disk file
level=logging.INFO,
format="%(asctime)s [%(threadName)s] [%(levelname)s] %(message)s",
handlers=[logging.FileHandler("logfile.log"), logging.StreamHandler()],
)
logger = logging.getLogger()
@marnixkoops
marnixkoops / rnn_gru_tf2_embedding.py
Last active February 20, 2021 12:19
Session based product recommendations (GRU Neural Net)
import numpy as np
import pandas as pd
import time
import datetime
import warnings
import gc
import tensorflow as tf
from tensorflow import keras
from tensorflow.python.client import device_lib
@marnixkoops
marnixkoops / downcast_dtypes.py
Created November 30, 2019 13:23
Function to downcast datatypes to reduce memory footprint of pandas df
def downcast_datatypes(df):
float_cols = df.select_dtypes(include=['float'])
int_cols = df.select_dtypes(include=['int'])
for cols in float_cols.columns:
df[cols] = pd.to_numeric(df[cols], downcast='float')
for cols in int_cols.columns:
df[cols] = pd.to_numeric(df[cols], downcast='integer')
return df
# %% markdown
# WEER IMPACT
# %%
# SETUP
import pandas as pd
import numpy as np
import pickle
import matplotlib
import matplotlib.pyplot as plt
@marnixkoops
marnixkoops / target_encoder.py
Last active February 5, 2022 16:49
Target Encoding in Python
import pandas as pd
import numpy as np
from custom_code import timefold
from sklearn import preprocessing
def target_encoder(df, column, target, index=None, method='mean'):
"""
Target-based encoding is numerization of a categorical variables via the target variable. Main purpose is to deal
@marnixkoops
marnixkoops / lagged-feats.py
Created October 4, 2018 07:40
lagged target features
# Group by product / timewindow and compute aggregate features
print('[+] Generating weekly lagged product aggregation features ...')
agg_week = demand_df.groupby(['product_id', 'year', 'weekofyear'])[
'actual_raw'].agg(num_week_lag_aggregations)
agg_week.columns = ["_week_lagged_".join(agg_feature)
for agg_feature in agg_week.columns.ravel()]
agg_week.reset_index(drop=False, inplace=True)
print('[+] Generating monthly lagged product aggregation features ...')
agg_month = demand_df.groupby(['product_id', 'year', 'month'])[
@marnixkoops
marnixkoops / lgbm_framework.py
Created September 20, 2018 13:53
LightGBM framework
############################################################################################
# [+] SETUP
############################################################################################
import numpy as np
import pandas as pd
import gc
import glob
import os
@marnixkoops
marnixkoops / Jupyter-Themes
Last active December 11, 2018 09:29
Jupyter Theme Settings
# DARK
jt -t onedork -f sourcemed -fs 10 -altp -tfs 11 -nfs 115 -cellw 80% -T
# LIGHT
jt -t grade3 -f sourcemed -fs 10 -altp -tfs 11 -nfs 115 -cellw 80% -T