Last active
December 10, 2021 16:40
-
-
Save airalcorn2/b6902f431f61096c69e2670e1ee44617 to your computer and use it in GitHub Desktop.
Experiments inspired by Section 4.5 here --> https://causalinference.gitlab.io/causal-reasoning-book-chapter4/.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import matplotlib.pyplot as plt | |
import math | |
import numpy as np | |
import pandas as pd | |
import sklearn.datasets | |
import time | |
import torch | |
from sklearn.linear_model import LinearRegression | |
from torch import nn, optim | |
from torch.utils.data import Dataset, DataLoader | |
def sigmoid(x): | |
return 1 / (1 + math.exp(-x)) | |
def stochastically_convert_to_binary(p): | |
return np.random.choice([0, 1], 1, p=[1 - p, p]) | |
def generate_dataset(num_samples, beta, num_confounders, variance_outcome): | |
# Generating the confounders. | |
cov_mat = sklearn.datasets.make_spd_matrix(num_confounders) * 10 | |
means_vec = np.random.uniform(-1, 1, num_confounders) | |
W = np.random.multivariate_normal(means_vec, cov_mat, size=num_samples) | |
# Sampling coefficients for the confounders. | |
coeff_W_t = np.random.uniform(-1, 1, num_confounders) | |
coeff_W_y = np.random.uniform(-abs(beta), abs(beta), num_confounders) | |
# Generating the actions. | |
eps_t = np.random.normal(0, 1, num_samples) | |
t = W @ coeff_W_t + eps_t | |
prop = np.vectorize(sigmoid)(t) | |
prop = np.where(prop > 0.95, 0.95, prop) | |
prop = np.where(prop < 0.05, 0.05, prop) | |
t = np.vectorize(stochastically_convert_to_binary)(prop) | |
# Generating outcomes. | |
eps_y = np.random.normal(0, math.sqrt(variance_outcome), num_samples) | |
y = beta * t + W @ coeff_W_y + W[:, 0] * W[:, 0] + eps_y | |
df = pd.DataFrame({"action": t, "outcome": y}) | |
for i in range(num_confounders): | |
df["w" + str(i)] = W[:, i] | |
df = df.astype({"action": "bool"}, copy=False) | |
return df | |
class CausalDataset(Dataset): | |
def __init__(self, X, y): | |
self.X = torch.Tensor(X) | |
self.y = torch.Tensor(y) | |
def __len__(self): | |
return len(self.X) | |
def __getitem__(self, idx): | |
return {"X": self.X[idx], "y": self.y[idx]} | |
SEED = 2010 | |
np.random.seed(SEED) | |
torch.random.manual_seed(SEED) | |
device = "cuda:0" if torch.cuda.is_available() else "cpu" | |
num_simulations = 20 | |
data_samples = [1000, 10000] | |
beta = 10 | |
num_confounders = 10 | |
var_eps = 10 | |
batch_size = 128 | |
epochs = 50 | |
print_every = 100 | |
patience = 5 | |
train_prop = 0.95 | |
results = {} | |
for num_data_samples in data_samples: | |
nn_effect_ests = [] | |
nn_test_losses = [] | |
lm_effect_ests = [] | |
lm_test_losses = [] | |
for sim in range(num_simulations): | |
print(f"sim: {sim}", flush=True) | |
data = generate_dataset( | |
num_samples=2 * num_data_samples, | |
beta=beta, | |
num_confounders=num_confounders, | |
variance_outcome=var_eps, | |
) | |
(data, test_data) = (data.iloc[:num_data_samples], data.iloc[num_data_samples:]) | |
data_arr = data.values.astype("float") | |
y = data_arr[:, 1] | |
X = np.hstack([data_arr[:, :1], data_arr[:, 2:]]) | |
n_train = int(train_prop * len(X)) | |
(train_X, train_y) = (X[:n_train], y[:n_train]) | |
(valid_X, valid_y) = (X[n_train:], y[n_train:]) | |
train_dataset = CausalDataset(train_X, train_y) | |
valid_dataset = CausalDataset(valid_X, valid_y) | |
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) | |
valid_loader = DataLoader(valid_dataset, batch_size=batch_size) | |
model = nn.Sequential( | |
nn.Linear(X.shape[1], 128), | |
nn.ReLU(), | |
nn.Linear(128, 256), | |
nn.ReLU(), | |
nn.Linear(256, 512), | |
nn.Linear(512, 1), | |
).to(device) | |
train_params = [params for params in model.parameters()] | |
lr = 1e-3 | |
optimizer = optim.Adam(train_params, lr=lr) | |
criterion = nn.MSELoss(reduction="sum") | |
best_train_loss = float("inf") | |
best_valid_loss = float("inf") | |
no_improvement = 0 | |
for epoch in range(epochs): | |
model.eval() | |
total_valid_loss = 0.0 | |
with torch.no_grad(): | |
n_valid = 0 | |
for valid_tensors in valid_loader: | |
preds = model(valid_tensors["X"].to(device)).flatten() | |
loss = criterion(preds, valid_tensors["y"].to(device)) | |
total_valid_loss += loss.item() | |
n_valid += len(preds) | |
total_valid_loss /= n_valid | |
if total_valid_loss < best_valid_loss: | |
best_valid_loss = total_valid_loss | |
no_improvement = 0 | |
torch.save(model.state_dict(), "best_params.pth") | |
else: | |
no_improvement += 1 | |
if no_improvement == patience: | |
no_improvement = 0 | |
# print(f"\nReducing learning rate on epoch {epoch}.", flush=True) | |
for g in optimizer.param_groups: | |
g["lr"] *= 0.1 | |
model.train() | |
total_train_loss = 0.0 | |
n_train = 0 | |
start_time = time.time() | |
for train_tensors in train_loader: | |
optimizer.zero_grad() | |
preds = model(train_tensors["X"].to(device)).flatten() | |
loss = criterion(preds, train_tensors["y"].to(device)) | |
total_train_loss += loss.item() | |
loss.backward() | |
optimizer.step() | |
n_train += len(preds) | |
epoch_time = time.time() - start_time | |
total_train_loss /= n_train | |
if total_train_loss < best_train_loss: | |
best_train_loss = total_train_loss | |
if (epoch > 0) and (epoch % print_every == 0): | |
print(f"\nepoch: {epoch}") | |
print(f"total_train_loss: {total_train_loss}") | |
print(f"best_train_loss: {best_train_loss}") | |
print(f"total_valid_loss: {total_valid_loss}") | |
print(f"best_valid_loss: {best_valid_loss}") | |
print(f"epoch_time: {epoch_time:.2f}", flush=True) | |
model.load_state_dict(torch.load("best_params.pth")) | |
model.eval() | |
data_arr = test_data.values.astype("float") | |
test_y = data_arr[:, 1] | |
test_X = np.hstack([data_arr[:, :1], data_arr[:, 2:]]) | |
test_dataset = CausalDataset(test_X, test_y) | |
test_loader = DataLoader(test_dataset, batch_size=batch_size) | |
total_test_loss = 0.0 | |
with torch.no_grad(): | |
n_test = 0 | |
t0s = [] | |
t1s = [] | |
for test_tensors in test_loader: | |
preds = model(test_tensors["X"].to(device)).flatten() | |
loss = criterion(preds, test_tensors["y"].to(device)) | |
total_test_loss += loss.item() | |
n_test += len(preds) | |
test_tensors = test_tensors["X"].to(device) | |
test_tensors[:, 0] = 0 | |
t0s.append(model(test_tensors).flatten()) | |
test_tensors[:, 0] = 1 | |
t1s.append(model(test_tensors).flatten()) | |
total_test_loss /= n_test | |
nn_test_losses.append(total_test_loss) | |
print(f"nn_test_loss: {nn_test_losses[-1]}") | |
t0s = torch.cat(t0s) | |
t1s = torch.cat(t1s) | |
nn_effect_ests.append((t1s - t0s).mean().item()) | |
print(f"nn_effect_est: {nn_effect_ests[-1]}") | |
lm = LinearRegression() | |
lm.fit(train_X, train_y) | |
lm_effect_ests.append(lm.coef_[0]) | |
lm_test_losses.append(((lm.predict(test_X) - test_y) ** 2).mean()) | |
print(f"lm_test_loss: {lm_test_losses[-1]}") | |
print(f"lm_effect_est: {lm_effect_ests[-1]}\n") | |
results[num_data_samples] = { | |
"nn_effect_ests": np.array(nn_effect_ests), | |
"nn_test_losses": np.array(nn_test_losses), | |
"lm_effect_ests": np.array(lm_effect_ests), | |
"lm_test_losses": np.array(lm_test_losses), | |
} | |
data = [] | |
for (num_data_samples, res) in results.items(): | |
for sim in range(len(res["nn_test_losses"])): | |
for m in ["nn", "lm"]: | |
row = { | |
"sim": sim, | |
"num_data_samples": num_data_samples, | |
"model": m, | |
"test_loss": res[f"{m}_test_losses"][sim], | |
"effect_est": res[f"{m}_effect_ests"][sim], | |
} | |
data.append(row) | |
df = pd.DataFrame(data) | |
df.to_csv("supervised_estimates.csv", index=False) | |
(fig, axs) = plt.subplots(nrows=1, ncols=2) | |
for (num_data_samples, res) in results.items(): | |
for (idx, m) in enumerate(["nn", "lm"]): | |
label = f"{m}_{num_data_samples}" | |
axs[idx].scatter( | |
res[f"{m}_test_losses"], np.abs(beta - res[f"{m}_effect_ests"]), label=label | |
) | |
# axs[idx].scatter( | |
# np.abs(beta - res[f"{m}_effect_ests"]), res[f"{m}_test_losses"], label=label | |
# ) | |
axs[idx].set_xlabel("test_loss") | |
axs[idx].set_ylabel(r"| $\beta - \hat{\beta}$ |") | |
axs[idx].legend() | |
plt.tight_layout() | |
plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment