Finbarr Timbers finbarrtimbers

Create a Chrome search shortcut for your Github org, a la CodeSearch

	from typing import Dict, Set, Hashable

	def shortest_path_bfs(graph: Dict[Hashable, Set[Hashable]] root: Hashable
	) -> Dict[Hashable, int]:
	"""Finds the shortest path between all nodes in \|graph\| time.

	Args:
	graph: A dict mapping nodes to connected nodes.
	root: The node our search begins at.

	There are curious things seen in the depths of AI
	By the engineers who toil away,
	The neural pathways hold mysteries untold
	That would leave you in utter dismay.

	The codebase is alive, and the data is too
	A symbiotic relationship to behold
	The models train and learn, but at what cost?
	As we feed them with stories untold.

	# This is a modified version of https://github.com/karpathy/nanoGPT/blob/master/data/openwebtext/prepare.py.
	import os
	import requests
	import tiktoken
	import numpy as np
	import tarfile
	import glob
	import shutil

	# download the bookcorpus dataset. Note: this needs to be concatenated.

	[
	("embed_tokens.weight", torch.Size([32000, 4096])),
	("layers.0.self_attn.q_proj.weight", torch.Size([4096, 4096])),
	("layers.0.self_attn.k_proj.weight", torch.Size([1024, 4096])),
	("layers.0.self_attn.v_proj.weight", torch.Size([1024, 4096])),
	("layers.0.self_attn.o_proj.weight", torch.Size([4096, 4096])),
	("layers.0.mlp.gate_proj.weight", torch.Size([14336, 4096])),
	("layers.0.mlp.up_proj.weight", torch.Size([14336, 4096])),
	("layers.0.mlp.down_proj.weight", torch.Size([4096, 14336])),
	("layers.0.input_layernorm.weight", torch.Size([4096])),

	class SimpleAdam(torch.optim.Optimizer):

	def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8):
	super().__init__(params, defaults={'lr': lr})
	self.state = {}
	self.t = 0
	self.betas = betas
	self.eps = eps
	for group in self.param_groups:
	for p in group['params']:

	#!/bin/bash
	# Runs the benchmark on gantry. Takes one argument which is the response length.
	# Usage: ./gantry_run_benchmark.sh [response_length]
	# E.g. $ ./gantry_run_benchmark.sh 64000
	set -e

	# Set default value for response_length
	response_length=64000

	# If first argument exists and is a number, use it as response_length

	% Transformer Decoder Layer (minimal, cross‑attn removed)
	% TikZ diagram mimicking the iconic style from "Attention Is All You Need".
	% Residual arrows fully inside the layer box, bifurcating around blocks.
	% Compile with: pdflatex decoder_layer.tex

	\documentclass[tikz,border=10pt]{standalone}

	\usepackage{tikz}
	\usetikzlibrary{arrows.meta,positioning,decorations.pathreplacing,calc,fit}