Matteo Courthoud matteocourthoud

Economics PhD Student

matteocourthoud / mse_predict.py

Created July 27, 2022 14:16

	lambdas = {}
	for city in cities:
	mse_pre = synth_predict(df, SyntheticControl(), city, treatment_year).mse
	mse_tot = np.mean((df[f'Synthetic {city}'] - df[city])**2)
	lambdas[city] = (mse_tot - mse_pre) / mse_pre

	print(f"p-value: {np.mean(np.fromiter(lambdas.values(), dtype='float') > lambdas[treated_city]):.4}")

matteocourthoud / synth_plot_difference.py

Created July 27, 2022 14:13

	def plot_difference(df, city, year, vline=True, hline=True, **kwargs):
	sns.lineplot(x=df['year'], y=df[city] - df[f'Synthetic {city}'], **kwargs)
	if vline:
	plt.axvline(x=year, ls=":", color='C2', label='Self-driving cars', lw=3, zorder=100)
	plt.legend()
	if hline: sns.lineplot(x=df['year'], y=0, lw=3, color='k', zorder=1)
	plt.title("Estimated effect of self-driving cars");

matteocourthoud / SyntheticControl.py

Created July 27, 2022 14:13

	from toolz import partial
	from scipy.optimize import fmin_slsqp

	class SyntheticControl():

	# Loss function
	def loss(self, W, X, y) -> float:
	return np.sqrt(np.mean((y - X.dot(W))**2))

	# Fit model

matteocourthoud / synth_predict.py

Created July 27, 2022 14:11

	def synth_predict(df, model, city, year):
	other_cities = [c for c in cities if c not in ['year', city]]
	y = df.loc[df['year'] <= year, city]
	X = df.loc[df['year'] <= year, other_cities]
	df[f'Synthetic {city}'] = model.fit(X, y).predict(df[other_cities])
	return model

matteocourthoud / synth_plot_lines.py

Created July 27, 2022 13:59

	def plot_lines(df, line1, line2, year, hline=True):
	sns.lineplot(x=df['year'], y=df[line1].values, label=line1)
	sns.lineplot(x=df['year'], y=df[line2].values, label=line2)
	plt.axvline(x=year, ls=":", color='C2', label='Self-Driving Cars', zorder=1)
	plt.legend();
	plt.title("Average revenue per day (in M$)");

matteocourthoud / aipw_simulate_estimators.py

Created July 18, 2022 08:58

	from joblib import Parallel, delayed

	def simulate_estimators(X_e, X_mu, D, y):
	r = Parallel(n_jobs=8)(delayed(compare_estimators)(X_e, X_mu, D, y, i) for i in range(100))
	df_tau = pd.DataFrame(r, columns=['S-learn', 'IPW', 'AIPW'])
	plot = sns.boxplot(data=pd.melt(df_tau), x='variable', y='value', linewidth=2);
	plot.set(title="Distribution of $\hat τ$ and its components", xlabel='', ylabel='')
	plot.axhline(2, c='r', ls=':');

matteocourthoud / aipw_compare_estimators.py

Created July 18, 2022 08:57

	def compare_estimators(X_e, X_mu, D, y, seed):
	df = dgp_darkmode().generate_data(seed=seed)
	e = estimate_e(df, X_e, D, LogisticRegression())
	mu0, mu1 = estimate_mu(df, X_mu, D, y, LinearRegression())
	slearn = mu1 - mu0
	ipw = (df[D] / e - (1-df[D]) / (1-e)) * df[y]
	aipw = slearn + df[D] / e * (df[y] - mu1) - (1-df[D]) / (1-e) * (df[y] - mu0)
	return np.mean((slearn, ipw, aipw), axis=1)

matteocourthoud / meta_X_learner.py

Created July 11, 2022 08:29

	from sklearn.neighbors import KNeighborsRegressor
	from sklearn.linear_model import LogisticRegressionCV

	def X_learner(df, model, y, D, X):
	temp = dgp.generate_data(true_te=True).sort_values(X)

	# Mu
	mu0 = model.fit(temp.loc[temp[D]==0, X], temp.loc[temp[D]==0, y])
	temp['mu0_hat_'] = mu0.predict(temp[X])
	mu1 = model.fit(temp.loc[temp[D]==1, X], temp.loc[temp[D]==1, y])

matteocourthoud / meta_T_learner.py

Created July 11, 2022 08:28

	def T_learner(df, model, y, D, X):
	temp = dgp.generate_data(true_te=True).sort_values(X)
	mu0 = model.fit(temp.loc[temp[D]==0, X], temp.loc[temp[D]==0, y])
	temp['mu0_hat'] = mu0.predict(temp[X])
	mu1 = model.fit(temp.loc[temp[D]==1, X], temp.loc[temp[D]==1, y])
	temp['mu1_hat'] = mu1.predict(temp[X])
	plot_TE(temp, true_te=True)

matteocourthoud / meta_S_learner.py

Created July 11, 2022 08:27

	def S_learner(dgp, model, y, D, X):
	temp = dgp.generate_data(true_te=True).sort_values(X)
	mu = model.fit(temp[X + [D]], temp[y])
	temp['mu0_hat'] = mu.predict(temp[X + [D]].assign(premium=0))
	temp['mu1_hat'] = mu.predict(temp[X + [D]].assign(premium=1))
	plot_TE(temp, true_te=True)