Skip to content

Instantly share code, notes, and snippets.

@airalcorn2
Last active December 10, 2021 16:40
Show Gist options
  • Save airalcorn2/b6902f431f61096c69e2670e1ee44617 to your computer and use it in GitHub Desktop.
Save airalcorn2/b6902f431f61096c69e2670e1ee44617 to your computer and use it in GitHub Desktop.
Experiments inspired by Section 4.5 here --> https://causalinference.gitlab.io/causal-reasoning-book-chapter4/.
import matplotlib.pyplot as plt
import math
import numpy as np
import pandas as pd
import sklearn.datasets
import time
import torch
from sklearn.linear_model import LinearRegression
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader
def sigmoid(x):
return 1 / (1 + math.exp(-x))
def stochastically_convert_to_binary(p):
return np.random.choice([0, 1], 1, p=[1 - p, p])
def generate_dataset(num_samples, beta, num_confounders, variance_outcome):
# Generating the confounders.
cov_mat = sklearn.datasets.make_spd_matrix(num_confounders) * 10
means_vec = np.random.uniform(-1, 1, num_confounders)
W = np.random.multivariate_normal(means_vec, cov_mat, size=num_samples)
# Sampling coefficients for the confounders.
coeff_W_t = np.random.uniform(-1, 1, num_confounders)
coeff_W_y = np.random.uniform(-abs(beta), abs(beta), num_confounders)
# Generating the actions.
eps_t = np.random.normal(0, 1, num_samples)
t = W @ coeff_W_t + eps_t
prop = np.vectorize(sigmoid)(t)
prop = np.where(prop > 0.95, 0.95, prop)
prop = np.where(prop < 0.05, 0.05, prop)
t = np.vectorize(stochastically_convert_to_binary)(prop)
# Generating outcomes.
eps_y = np.random.normal(0, math.sqrt(variance_outcome), num_samples)
y = beta * t + W @ coeff_W_y + W[:, 0] * W[:, 0] + eps_y
df = pd.DataFrame({"action": t, "outcome": y})
for i in range(num_confounders):
df["w" + str(i)] = W[:, i]
df = df.astype({"action": "bool"}, copy=False)
return df
class CausalDataset(Dataset):
def __init__(self, X, y):
self.X = torch.Tensor(X)
self.y = torch.Tensor(y)
def __len__(self):
return len(self.X)
def __getitem__(self, idx):
return {"X": self.X[idx], "y": self.y[idx]}
SEED = 2010
np.random.seed(SEED)
torch.random.manual_seed(SEED)
device = "cuda:0" if torch.cuda.is_available() else "cpu"
num_simulations = 20
data_samples = [1000, 10000]
beta = 10
num_confounders = 10
var_eps = 10
batch_size = 128
epochs = 50
print_every = 100
patience = 5
train_prop = 0.95
results = {}
for num_data_samples in data_samples:
nn_effect_ests = []
nn_test_losses = []
lm_effect_ests = []
lm_test_losses = []
for sim in range(num_simulations):
print(f"sim: {sim}", flush=True)
data = generate_dataset(
num_samples=2 * num_data_samples,
beta=beta,
num_confounders=num_confounders,
variance_outcome=var_eps,
)
(data, test_data) = (data.iloc[:num_data_samples], data.iloc[num_data_samples:])
data_arr = data.values.astype("float")
y = data_arr[:, 1]
X = np.hstack([data_arr[:, :1], data_arr[:, 2:]])
n_train = int(train_prop * len(X))
(train_X, train_y) = (X[:n_train], y[:n_train])
(valid_X, valid_y) = (X[n_train:], y[n_train:])
train_dataset = CausalDataset(train_X, train_y)
valid_dataset = CausalDataset(valid_X, valid_y)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=batch_size)
model = nn.Sequential(
nn.Linear(X.shape[1], 128),
nn.ReLU(),
nn.Linear(128, 256),
nn.ReLU(),
nn.Linear(256, 512),
nn.Linear(512, 1),
).to(device)
train_params = [params for params in model.parameters()]
lr = 1e-3
optimizer = optim.Adam(train_params, lr=lr)
criterion = nn.MSELoss(reduction="sum")
best_train_loss = float("inf")
best_valid_loss = float("inf")
no_improvement = 0
for epoch in range(epochs):
model.eval()
total_valid_loss = 0.0
with torch.no_grad():
n_valid = 0
for valid_tensors in valid_loader:
preds = model(valid_tensors["X"].to(device)).flatten()
loss = criterion(preds, valid_tensors["y"].to(device))
total_valid_loss += loss.item()
n_valid += len(preds)
total_valid_loss /= n_valid
if total_valid_loss < best_valid_loss:
best_valid_loss = total_valid_loss
no_improvement = 0
torch.save(model.state_dict(), "best_params.pth")
else:
no_improvement += 1
if no_improvement == patience:
no_improvement = 0
# print(f"\nReducing learning rate on epoch {epoch}.", flush=True)
for g in optimizer.param_groups:
g["lr"] *= 0.1
model.train()
total_train_loss = 0.0
n_train = 0
start_time = time.time()
for train_tensors in train_loader:
optimizer.zero_grad()
preds = model(train_tensors["X"].to(device)).flatten()
loss = criterion(preds, train_tensors["y"].to(device))
total_train_loss += loss.item()
loss.backward()
optimizer.step()
n_train += len(preds)
epoch_time = time.time() - start_time
total_train_loss /= n_train
if total_train_loss < best_train_loss:
best_train_loss = total_train_loss
if (epoch > 0) and (epoch % print_every == 0):
print(f"\nepoch: {epoch}")
print(f"total_train_loss: {total_train_loss}")
print(f"best_train_loss: {best_train_loss}")
print(f"total_valid_loss: {total_valid_loss}")
print(f"best_valid_loss: {best_valid_loss}")
print(f"epoch_time: {epoch_time:.2f}", flush=True)
model.load_state_dict(torch.load("best_params.pth"))
model.eval()
data_arr = test_data.values.astype("float")
test_y = data_arr[:, 1]
test_X = np.hstack([data_arr[:, :1], data_arr[:, 2:]])
test_dataset = CausalDataset(test_X, test_y)
test_loader = DataLoader(test_dataset, batch_size=batch_size)
total_test_loss = 0.0
with torch.no_grad():
n_test = 0
t0s = []
t1s = []
for test_tensors in test_loader:
preds = model(test_tensors["X"].to(device)).flatten()
loss = criterion(preds, test_tensors["y"].to(device))
total_test_loss += loss.item()
n_test += len(preds)
test_tensors = test_tensors["X"].to(device)
test_tensors[:, 0] = 0
t0s.append(model(test_tensors).flatten())
test_tensors[:, 0] = 1
t1s.append(model(test_tensors).flatten())
total_test_loss /= n_test
nn_test_losses.append(total_test_loss)
print(f"nn_test_loss: {nn_test_losses[-1]}")
t0s = torch.cat(t0s)
t1s = torch.cat(t1s)
nn_effect_ests.append((t1s - t0s).mean().item())
print(f"nn_effect_est: {nn_effect_ests[-1]}")
lm = LinearRegression()
lm.fit(train_X, train_y)
lm_effect_ests.append(lm.coef_[0])
lm_test_losses.append(((lm.predict(test_X) - test_y) ** 2).mean())
print(f"lm_test_loss: {lm_test_losses[-1]}")
print(f"lm_effect_est: {lm_effect_ests[-1]}\n")
results[num_data_samples] = {
"nn_effect_ests": np.array(nn_effect_ests),
"nn_test_losses": np.array(nn_test_losses),
"lm_effect_ests": np.array(lm_effect_ests),
"lm_test_losses": np.array(lm_test_losses),
}
data = []
for (num_data_samples, res) in results.items():
for sim in range(len(res["nn_test_losses"])):
for m in ["nn", "lm"]:
row = {
"sim": sim,
"num_data_samples": num_data_samples,
"model": m,
"test_loss": res[f"{m}_test_losses"][sim],
"effect_est": res[f"{m}_effect_ests"][sim],
}
data.append(row)
df = pd.DataFrame(data)
df.to_csv("supervised_estimates.csv", index=False)
(fig, axs) = plt.subplots(nrows=1, ncols=2)
for (num_data_samples, res) in results.items():
for (idx, m) in enumerate(["nn", "lm"]):
label = f"{m}_{num_data_samples}"
axs[idx].scatter(
res[f"{m}_test_losses"], np.abs(beta - res[f"{m}_effect_ests"]), label=label
)
# axs[idx].scatter(
# np.abs(beta - res[f"{m}_effect_ests"]), res[f"{m}_test_losses"], label=label
# )
axs[idx].set_xlabel("test_loss")
axs[idx].set_ylabel(r"| $\beta - \hat{\beta}$ |")
axs[idx].legend()
plt.tight_layout()
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment