UlisseMini · January 9, 2023 20:37
diff --git a/flatten-hessian-mnist.py b/flatten-hessian-mnist.py
 # fuller code https://colab.research.google.com/drive/12zXLbykv537MrZr6WDCnRIqKQ5h8UjVw?usp=sharing

 fn = lambda *params: F.nll_loss(stateless.functional_call(model, {n: p for n,p in zip(names, params)}, x), y)
 H = hessian(fn, tuple(model.parameters()))
 # H[i][j] contains the derivatives of the loss with respect to every parameter in model.parameters()[i] and [j].
 # (It's an annoying tuple)

 # flatten the annoying tuple!
 rows = []
 shapes = [p.shape for p in model.parameters()]
 for i in range(len(H)):
    rows.append(torch.cat([H[j][i].view(shapes[j].numel(), shapes[i].numel()) for j in range(len(H))], dim=0))

 full_hessian = torch.cat(rows, dim=1)
 full_hessian.shape
	# fuller code https://colab.research.google.com/drive/12zXLbykv537MrZr6WDCnRIqKQ5h8UjVw?usp=sharing

	fn = lambda *params: F.nll_loss(stateless.functional_call(model, {n: p for n,p in zip(names, params)}, x), y)
	H = hessian(fn, tuple(model.parameters()))
	# H[i][j] contains the derivatives of the loss with respect to every parameter in model.parameters()[i] and [j].
	# (It's an annoying tuple)

	# flatten the annoying tuple!
	rows = []
	shapes = [p.shape for p in model.parameters()]
	for i in range(len(H)):
	rows.append(torch.cat([H[j][i].view(shapes[j].numel(), shapes[i].numel()) for j in range(len(H))], dim=0))

	full_hessian = torch.cat(rows, dim=1)
	full_hessian.shape