❤️‍🔥

Marnix Koops marnixkoops

❤️‍🔥

marnixkoops / conversion_likelihood.py

Created July 1, 2020 11:20

Cox Proportional Hazard Regression model (Survival Analysis)

	"""
	# [+] PROJECT INFO
	# - Cox Proportional Hazard Regression model (Survival Analysis)
	# - Main purpose is to predict post-session conversion likelihood on customer level (cookie_id)
	#
	# Owner: Marnix Koops / marnixkoops@gmail.com
	"""

	# ==================================================================================================
	# [+] SETUP

marnixkoops / combine_csv.py

Created June 23, 2020 09:25

Read and combine multiple CSV's

marnixkoops / logging.py

Created April 1, 2020 09:15

Log to logfile and std out (print)

	import logging

	logging.basicConfig( # logging to terminal & disk file
	level=logging.INFO,
	format="%(asctime)s [%(threadName)s] [%(levelname)s] %(message)s",
	handlers=[logging.FileHandler("logfile.log"), logging.StreamHandler()],
	)
	logger = logging.getLogger()

marnixkoops / rnn_gru_tf2_embedding.py

Last active February 20, 2021 12:19

Session based product recommendations (GRU Neural Net)

	import numpy as np
	import pandas as pd
	import time
	import datetime
	import warnings
	import gc

	import tensorflow as tf
	from tensorflow import keras
	from tensorflow.python.client import device_lib

marnixkoops / downcast_dtypes.py

Created November 30, 2019 13:23

Function to downcast datatypes to reduce memory footprint of pandas df

	def downcast_datatypes(df):
	float_cols = df.select_dtypes(include=['float'])
	int_cols = df.select_dtypes(include=['int'])

	for cols in float_cols.columns:
	df[cols] = pd.to_numeric(df[cols], downcast='float')
	for cols in int_cols.columns:
	df[cols] = pd.to_numeric(df[cols], downcast='integer')

	return df

marnixkoops / weer

Created April 10, 2019 13:01

	# %% markdown
	# WEER IMPACT

	# %%
	# SETUP
	import pandas as pd
	import numpy as np
	import pickle
	import matplotlib
	import matplotlib.pyplot as plt

marnixkoops / target_encoder.py

Last active February 5, 2022 16:49

Target Encoding in Python

	import pandas as pd
	import numpy as np

	from custom_code import timefold
	from sklearn import preprocessing


	def target_encoder(df, column, target, index=None, method='mean'):
	"""
	Target-based encoding is numerization of a categorical variables via the target variable. Main purpose is to deal

marnixkoops / lagged-feats.py

Created October 4, 2018 07:40

lagged target features

	# Group by product / timewindow and compute aggregate features
	print('[+] Generating weekly lagged product aggregation features ...')
	agg_week = demand_df.groupby(['product_id', 'year', 'weekofyear'])[
	'actual_raw'].agg(num_week_lag_aggregations)
	agg_week.columns = ["_week_lagged_".join(agg_feature)
	for agg_feature in agg_week.columns.ravel()]
	agg_week.reset_index(drop=False, inplace=True)

	print('[+] Generating monthly lagged product aggregation features ...')
	agg_month = demand_df.groupby(['product_id', 'year', 'month'])[

marnixkoops / lgbm_framework.py

Created September 20, 2018 13:53

LightGBM framework

marnixkoops / Jupyter-Themes

Last active December 11, 2018 09:29

Jupyter Theme Settings

	# DARK
	jt -t onedork -f sourcemed -fs 10 -altp -tfs 11 -nfs 115 -cellw 80% -T

	# LIGHT
	jt -t grade3 -f sourcemed -fs 10 -altp -tfs 11 -nfs 115 -cellw 80% -T