This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| lambdas = {} | |
| for city in cities: | |
| mse_pre = synth_predict(df, SyntheticControl(), city, treatment_year).mse | |
| mse_tot = np.mean((df[f'Synthetic {city}'] - df[city])**2) | |
| lambdas[city] = (mse_tot - mse_pre) / mse_pre | |
| print(f"p-value: {np.mean(np.fromiter(lambdas.values(), dtype='float') > lambdas[treated_city]):.4}") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def plot_difference(df, city, year, vline=True, hline=True, **kwargs): | |
| sns.lineplot(x=df['year'], y=df[city] - df[f'Synthetic {city}'], **kwargs) | |
| if vline: | |
| plt.axvline(x=year, ls=":", color='C2', label='Self-driving cars', lw=3, zorder=100) | |
| plt.legend() | |
| if hline: sns.lineplot(x=df['year'], y=0, lw=3, color='k', zorder=1) | |
| plt.title("Estimated effect of self-driving cars"); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from toolz import partial | |
| from scipy.optimize import fmin_slsqp | |
| class SyntheticControl(): | |
| # Loss function | |
| def loss(self, W, X, y) -> float: | |
| return np.sqrt(np.mean((y - X.dot(W))**2)) | |
| # Fit model |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def synth_predict(df, model, city, year): | |
| other_cities = [c for c in cities if c not in ['year', city]] | |
| y = df.loc[df['year'] <= year, city] | |
| X = df.loc[df['year'] <= year, other_cities] | |
| df[f'Synthetic {city}'] = model.fit(X, y).predict(df[other_cities]) | |
| return model |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def plot_lines(df, line1, line2, year, hline=True): | |
| sns.lineplot(x=df['year'], y=df[line1].values, label=line1) | |
| sns.lineplot(x=df['year'], y=df[line2].values, label=line2) | |
| plt.axvline(x=year, ls=":", color='C2', label='Self-Driving Cars', zorder=1) | |
| plt.legend(); | |
| plt.title("Average revenue per day (in M$)"); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from joblib import Parallel, delayed | |
| def simulate_estimators(X_e, X_mu, D, y): | |
| r = Parallel(n_jobs=8)(delayed(compare_estimators)(X_e, X_mu, D, y, i) for i in range(100)) | |
| df_tau = pd.DataFrame(r, columns=['S-learn', 'IPW', 'AIPW']) | |
| plot = sns.boxplot(data=pd.melt(df_tau), x='variable', y='value', linewidth=2); | |
| plot.set(title="Distribution of $\hat τ$ and its components", xlabel='', ylabel='') | |
| plot.axhline(2, c='r', ls=':'); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def compare_estimators(X_e, X_mu, D, y, seed): | |
| df = dgp_darkmode().generate_data(seed=seed) | |
| e = estimate_e(df, X_e, D, LogisticRegression()) | |
| mu0, mu1 = estimate_mu(df, X_mu, D, y, LinearRegression()) | |
| slearn = mu1 - mu0 | |
| ipw = (df[D] / e - (1-df[D]) / (1-e)) * df[y] | |
| aipw = slearn + df[D] / e * (df[y] - mu1) - (1-df[D]) / (1-e) * (df[y] - mu0) | |
| return np.mean((slearn, ipw, aipw), axis=1) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from sklearn.neighbors import KNeighborsRegressor | |
| from sklearn.linear_model import LogisticRegressionCV | |
| def X_learner(df, model, y, D, X): | |
| temp = dgp.generate_data(true_te=True).sort_values(X) | |
| # Mu | |
| mu0 = model.fit(temp.loc[temp[D]==0, X], temp.loc[temp[D]==0, y]) | |
| temp['mu0_hat_'] = mu0.predict(temp[X]) | |
| mu1 = model.fit(temp.loc[temp[D]==1, X], temp.loc[temp[D]==1, y]) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def T_learner(df, model, y, D, X): | |
| temp = dgp.generate_data(true_te=True).sort_values(X) | |
| mu0 = model.fit(temp.loc[temp[D]==0, X], temp.loc[temp[D]==0, y]) | |
| temp['mu0_hat'] = mu0.predict(temp[X]) | |
| mu1 = model.fit(temp.loc[temp[D]==1, X], temp.loc[temp[D]==1, y]) | |
| temp['mu1_hat'] = mu1.predict(temp[X]) | |
| plot_TE(temp, true_te=True) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def S_learner(dgp, model, y, D, X): | |
| temp = dgp.generate_data(true_te=True).sort_values(X) | |
| mu = model.fit(temp[X + [D]], temp[y]) | |
| temp['mu0_hat'] = mu.predict(temp[X + [D]].assign(premium=0)) | |
| temp['mu1_hat'] = mu.predict(temp[X + [D]].assign(premium=1)) | |
| plot_TE(temp, true_te=True) |
NewerOlder