Skip to content

Instantly share code, notes, and snippets.

@jgoodie
Created January 19, 2025 17:24
Show Gist options
  • Save jgoodie/74dc3a89a9f61e2ebec29db58150608f to your computer and use it in GitHub Desktop.
Save jgoodie/74dc3a89a9f61e2ebec29db58150608f to your computer and use it in GitHub Desktop.
Example of single headed attention mechanism
import scipy
import numpy as np
from sklearn.preprocessing import OneHotEncoder
sentence = "the otter swam across the river to the other bank"
d = dict.fromkeys(sentence.split())
vocab = list(d.keys())
tokens = sentence.lower().split()
encoder = OneHotEncoder(categories=[vocab], sparse_output=False)
X = encoder.fit_transform(np.array(tokens).reshape(-1, 1))
D, N = X.shape
# Choose random values for the parameters
omega_q = np.random.normal(size=(D,D))
omega_k = np.random.normal(size=(D,D))
omega_v = np.random.normal(size=(D,D))
beta_q = np.random.normal(size=(D,1))
beta_k = np.random.normal(size=(D,1))
beta_v = np.random.normal(size=(D,1))
def single_head_attention(X, beta_q, beta_k, beta_v, omega_q, omega_k, omega_v):
query = beta_q + omega_q@X
key = beta_k + omega_k@X
value = beta_v + omega_v@X
dp = np.dot(key.T, query)
scaled_dp = dp/np.sqrt(query.shape[0])
attention_weights = scipy.special.softmax(scaled_dp, axis=0)
attention_output = value@attention
return attention_output, attention_weights
attention_output, attention_weights = single_head_attention(X, beta_q, beta_k, beta_v, omega_q, omega_k, omega_v)
print(f"X shape: {X.shape}")
print(f"Q weights shape: {omega_q.shape}")
print(f"Beta weights shape: {beta_q.shape}")
print(f"Attention weights shape: {attention_weights.shape}")
print(f"Attention out shape: {attention_output.shape}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment