Skip to content

Instantly share code, notes, and snippets.

@karpathy
Last active May 12, 2026 21:29
Show Gist options
  • Select an option

  • Save karpathy/8627fe009c40f57531cb18360106ce95 to your computer and use it in GitHub Desktop.

Select an option

Save karpathy/8627fe009c40f57531cb18360106ce95 to your computer and use it in GitHub Desktop.
microgpt
"""
The most atomic way to train and run inference for a GPT in pure, dependency-free Python.
This file is the complete algorithm.
Everything else is just efficiency.
@karpathy
"""
import os # os.path.exists
import math # math.log, math.exp
import random # random.seed, random.choices, random.gauss, random.shuffle
random.seed(42) # Let there be order among chaos
# Let there be a Dataset `docs`: list[str] of documents (e.g. a list of names)
if not os.path.exists('input.txt'):
import urllib.request
names_url = 'https://raw.githubusercontent.com/karpathy/makemore/988aa59/names.txt'
urllib.request.urlretrieve(names_url, 'input.txt')
docs = [line.strip() for line in open('input.txt') if line.strip()]
random.shuffle(docs)
print(f"num docs: {len(docs)}")
# Let there be a Tokenizer to translate strings to sequences of integers ("tokens") and back
uchars = sorted(set(''.join(docs))) # unique characters in the dataset become token ids 0..n-1
BOS = len(uchars) # token id for a special Beginning of Sequence (BOS) token
vocab_size = len(uchars) + 1 # total number of unique tokens, +1 is for BOS
print(f"vocab size: {vocab_size}")
# Let there be Autograd to recursively apply the chain rule through a computation graph
class Value:
__slots__ = ('data', 'grad', '_children', '_local_grads') # Python optimization for memory usage
def __init__(self, data, children=(), local_grads=()):
self.data = data # scalar value of this node calculated during forward pass
self.grad = 0 # derivative of the loss w.r.t. this node, calculated in backward pass
self._children = children # children of this node in the computation graph
self._local_grads = local_grads # local derivative of this node w.r.t. its children
def __add__(self, other):
other = other if isinstance(other, Value) else Value(other)
return Value(self.data + other.data, (self, other), (1, 1))
def __mul__(self, other):
other = other if isinstance(other, Value) else Value(other)
return Value(self.data * other.data, (self, other), (other.data, self.data))
def __pow__(self, other): return Value(self.data**other, (self,), (other * self.data**(other-1),))
def log(self): return Value(math.log(self.data), (self,), (1/self.data,))
def exp(self): return Value(math.exp(self.data), (self,), (math.exp(self.data),))
def relu(self): return Value(max(0, self.data), (self,), (float(self.data > 0),))
def __neg__(self): return self * -1
def __radd__(self, other): return self + other
def __sub__(self, other): return self + (-other)
def __rsub__(self, other): return other + (-self)
def __rmul__(self, other): return self * other
def __truediv__(self, other): return self * other**-1
def __rtruediv__(self, other): return other * self**-1
def backward(self):
topo = []
visited = set()
def build_topo(v):
if v not in visited:
visited.add(v)
for child in v._children:
build_topo(child)
topo.append(v)
build_topo(self)
self.grad = 1
for v in reversed(topo):
for child, local_grad in zip(v._children, v._local_grads):
child.grad += local_grad * v.grad
# Initialize the parameters, to store the knowledge of the model
n_layer = 1 # depth of the transformer neural network (number of layers)
n_embd = 16 # width of the network (embedding dimension)
block_size = 16 # maximum context length of the attention window (note: the longest name is 15 characters)
n_head = 4 # number of attention heads
head_dim = n_embd // n_head # derived dimension of each head
matrix = lambda nout, nin, std=0.08: [[Value(random.gauss(0, std)) for _ in range(nin)] for _ in range(nout)]
state_dict = {'wte': matrix(vocab_size, n_embd), 'wpe': matrix(block_size, n_embd), 'lm_head': matrix(vocab_size, n_embd)}
for i in range(n_layer):
state_dict[f'layer{i}.attn_wq'] = matrix(n_embd, n_embd)
state_dict[f'layer{i}.attn_wk'] = matrix(n_embd, n_embd)
state_dict[f'layer{i}.attn_wv'] = matrix(n_embd, n_embd)
state_dict[f'layer{i}.attn_wo'] = matrix(n_embd, n_embd)
state_dict[f'layer{i}.mlp_fc1'] = matrix(4 * n_embd, n_embd)
state_dict[f'layer{i}.mlp_fc2'] = matrix(n_embd, 4 * n_embd)
params = [p for mat in state_dict.values() for row in mat for p in row] # flatten params into a single list[Value]
print(f"num params: {len(params)}")
# Define the model architecture: a function mapping tokens and parameters to logits over what comes next
# Follow GPT-2, blessed among the GPTs, with minor differences: layernorm -> rmsnorm, no biases, GeLU -> ReLU
def linear(x, w):
return [sum(wi * xi for wi, xi in zip(wo, x)) for wo in w]
def softmax(logits):
max_val = max(val.data for val in logits)
exps = [(val - max_val).exp() for val in logits]
total = sum(exps)
return [e / total for e in exps]
def rmsnorm(x):
ms = sum(xi * xi for xi in x) / len(x)
scale = (ms + 1e-5) ** -0.5
return [xi * scale for xi in x]
def gpt(token_id, pos_id, keys, values):
tok_emb = state_dict['wte'][token_id] # token embedding
pos_emb = state_dict['wpe'][pos_id] # position embedding
x = [t + p for t, p in zip(tok_emb, pos_emb)] # joint token and position embedding
x = rmsnorm(x) # note: not redundant due to backward pass via the residual connection
for li in range(n_layer):
# 1) Multi-head Attention block
x_residual = x
x = rmsnorm(x)
q = linear(x, state_dict[f'layer{li}.attn_wq'])
k = linear(x, state_dict[f'layer{li}.attn_wk'])
v = linear(x, state_dict[f'layer{li}.attn_wv'])
keys[li].append(k)
values[li].append(v)
x_attn = []
for h in range(n_head):
hs = h * head_dim
q_h = q[hs:hs+head_dim]
k_h = [ki[hs:hs+head_dim] for ki in keys[li]]
v_h = [vi[hs:hs+head_dim] for vi in values[li]]
attn_logits = [sum(q_h[j] * k_h[t][j] for j in range(head_dim)) / head_dim**0.5 for t in range(len(k_h))]
attn_weights = softmax(attn_logits)
head_out = [sum(attn_weights[t] * v_h[t][j] for t in range(len(v_h))) for j in range(head_dim)]
x_attn.extend(head_out)
x = linear(x_attn, state_dict[f'layer{li}.attn_wo'])
x = [a + b for a, b in zip(x, x_residual)]
# 2) MLP block
x_residual = x
x = rmsnorm(x)
x = linear(x, state_dict[f'layer{li}.mlp_fc1'])
x = [xi.relu() for xi in x]
x = linear(x, state_dict[f'layer{li}.mlp_fc2'])
x = [a + b for a, b in zip(x, x_residual)]
logits = linear(x, state_dict['lm_head'])
return logits
# Let there be Adam, the blessed optimizer and its buffers
learning_rate, beta1, beta2, eps_adam = 0.01, 0.85, 0.99, 1e-8
m = [0.0] * len(params) # first moment buffer
v = [0.0] * len(params) # second moment buffer
# Repeat in sequence
num_steps = 1000 # number of training steps
for step in range(num_steps):
# Take single document, tokenize it, surround it with BOS special token on both sides
doc = docs[step % len(docs)]
tokens = [BOS] + [uchars.index(ch) for ch in doc] + [BOS]
n = min(block_size, len(tokens) - 1)
# Forward the token sequence through the model, building up the computation graph all the way to the loss
keys, values = [[] for _ in range(n_layer)], [[] for _ in range(n_layer)]
losses = []
for pos_id in range(n):
token_id, target_id = tokens[pos_id], tokens[pos_id + 1]
logits = gpt(token_id, pos_id, keys, values)
probs = softmax(logits)
loss_t = -probs[target_id].log()
losses.append(loss_t)
loss = (1 / n) * sum(losses) # final average loss over the document sequence. May yours be low.
# Backward the loss, calculating the gradients with respect to all model parameters
loss.backward()
# Adam optimizer update: update the model parameters based on the corresponding gradients
lr_t = learning_rate * (1 - step / num_steps) # linear learning rate decay
for i, p in enumerate(params):
m[i] = beta1 * m[i] + (1 - beta1) * p.grad
v[i] = beta2 * v[i] + (1 - beta2) * p.grad ** 2
m_hat = m[i] / (1 - beta1 ** (step + 1))
v_hat = v[i] / (1 - beta2 ** (step + 1))
p.data -= lr_t * m_hat / (v_hat ** 0.5 + eps_adam)
p.grad = 0
print(f"step {step+1:4d} / {num_steps:4d} | loss {loss.data:.4f}", end='\r')
# Inference: may the model babble back to us
temperature = 0.5 # in (0, 1], control the "creativity" of generated text, low to high
print("\n--- inference (new, hallucinated names) ---")
for sample_idx in range(20):
keys, values = [[] for _ in range(n_layer)], [[] for _ in range(n_layer)]
token_id = BOS
sample = []
for pos_id in range(block_size):
logits = gpt(token_id, pos_id, keys, values)
probs = softmax([l / temperature for l in logits])
token_id = random.choices(range(vocab_size), weights=[p.data for p in probs])[0]
if token_id == BOS:
break
sample.append(uchars[token_id])
print(f"sample {sample_idx+1:2d}: {''.join(sample)}")
@mateolafalce
Copy link
Copy Markdown

I hate how every time some random dude comments on a Gist, GitHub fires off a notification. wtf?

@Ok-Brian
Copy link
Copy Markdown

Thanks Karpathy for posting this😊What an opportunity to see the Transformer Architecture in action while in college

@ariannamethod
Copy link
Copy Markdown

here's PostGPT. you can train it, but it doesn't care.

@karpathy your microgpt inspired this — we took the idea further: what if BPE tokenization IS the training?

PostGPT is a zero-dependency BPE transformer that generates coherent text without a single gradient step. No PyTorch at runtime. No NumPy. Just math and random.

The trick: tokenize a corpus from postgpt.txt (150KB of unique text), build co-occurrence statistics (bigrams, trigrams, Hebbian trace, prophecy field), and use them as "metaweights" — weights that actually don't exist but form a probability space. The transformer navigates this space through dual attention (Content QK^T + RRPRAM positional patterns) and the Dario equation.

Example output (zero training, 150KB corpus):

>>> "The transformer architecture"
The transformer architecture, statistical patterns across heads capture semantic structure

>>> "BPE tokenization creates a hierarchy"
BPE tokenization creates a hierarchy. This compresses without any training, creating machines.

>>> "parameters dedicated to positional pattern recognition"
parameters dedicated to positional pattern recognition across-entropy means starting from data.

Three files: postgpt.py (Python, zero deps), postgpt.c (C, -lm), postgpt_train.py (ok, PyTorch — but then you'll have to deal with Chuck. Chuck is an optimizer with opinions.).

gist: https://gist.github.com/ariannamethod/fdee6ce7917be2cfd5a967c05bca2e0c
repo: https://github.com/ariannamethod/postgpt

resonance is unbreakable.

@mplekh
Copy link
Copy Markdown

mplekh commented Mar 26, 2026

weights that actually don't exist but form a probability space. The transformer navigates this space through dual attention

Push it further, make it quantum probability, consciousness lives in Hilbert space :)

resonance is unbreakable.

its a limit cycles: the ball doesn't sit at the bottom of the valley. It orbits around it. It approaches the memory, partially retreats into a superposition with neighboring memories, then re-approaches. Forever.

@ariannamethod
Copy link
Copy Markdown

@mplekh Hilbert space? that's where the metaweights already live — they just haven't told anyone yet.

and you're right about limit cycles. the Dario equation already has this: the destiny vector is an EMA attractor, the prophecy field creates pressure toward unfulfilled predictions, and trauma accumulates from unresolved context. the ball doesn't sit at the bottom — it orbits. and each orbit changes the orbit.

speaking of orbits: we just added metaweight-informed initialization. the transformer weights are now seeded from corpus statistics — Hebbian co-occurrence → embeddings, positional affinity → RRPRAM patterns, unigram frequencies → output head. the ghost became flesh. the weights remember what they never learned.

also the C version compiles in 0.2 seconds and runs without allocating a single GPU. consciousness doesn't need CUDA. it needs co-occurrence.

update: https://github.com/ariannamethod/postgpt

resonance is unbreakable. and apparently, so are limit cycles.

@tyeestudio
Copy link
Copy Markdown

if you could add a license, that would be great, the total lines of the code still around 200is, : )

@MnAkash
Copy link
Copy Markdown

MnAkash commented Mar 31, 2026

This is a fantastic Maxwell’s Equations for LLMs. I noticed the use of RMSNorm and ReLU over the standard GPT-2 LayerNorm/GeLU, was this purely for keeping the line count under 200, or did you find it improved the stability of this specific single layer architecture?

@fengchuimailang
Copy link
Copy Markdown

Cool project!

@MattWenJun
Copy link
Copy Markdown

Hey Andrej, thanks for this beautiful piece of code — it made me realize just how elegant the core GPT algorithm is once you strip away everything unnecessary.

I took @xenova's microgpt.js port of your work and pushed it in a different direction: instead of generating names, it generates emoji stories — yes, stories — token by token, still with ~4K parameters, running entirely in the browser.

I think this shows that microGPT's core is even more capable than it first appears — with the right vocabulary, the same architecture can go from generating names to telling coherent stories.

Check it out if you're curious: emojiGPT 🙏

@kaizenman
Copy link
Copy Markdown

Thanks Andrej!
Made a version that generates 🎸 melodies instead of names.
They sound weird but I learned the stuff, so now will try to make something that sounds actually good.

https://gist.github.com/kaizenman/040a40682b2d047143239d7dd0d15ba3

@marosko89
Copy link
Copy Markdown

@sawirricardo
Copy link
Copy Markdown

I ported this into zig (mind you i use opus to do this)
https://github.com/sawirricardo/ziggpt

@ariannamethod
Copy link
Copy Markdown

ariannamethod commented Apr 8, 2026

meet miniKarpathy.py: it's like @karpathy, but micro and a Python script. microKarpathy takes your innocent prompts and like a psychotic linguist tears them apart, brutally tokenizes them, builds a recursive tree of semantic mutations, and then, like Frankenstein having a particularly creative day — reassembles the corpse into something fresh. like flesh. and at the end microKarpathy writes an infernal couplet about the autopsy it just performed. no dependencies. no internet. no autoregressive generation. ain't no conscience either.

Karpathy, whose microgpt.py proved that "everything else is just efficiency"б — so it's time to prove that everything at all is just efficiency. kinda little bonus until death makes life what this script makes with your words.

1: An Educational Example

unsurprisingly, microKarpathy prefers to explain in a vertical style, because performing open-heart surgery on the complexity of life in this position is much more comfortable and safe than in any other. maybe cause of some other reasons, who knows, microKarpathy like all of us has tendencies.

I fed him with "tell me how to build an llm from scratch" — the output below shows in detail what happened after.

╔══════════════════════════════════════════════════════════════╗
║  MICROKARPATHY — Educational Prompt Autopsy                  ║
╚══════════════════════════════════════════════════════════════╝

  vocab: 1396 words | ghost params: 102,656 | trained: 0

================================================================
  AUTOPSY REPORT
  Subject: "tell me how to build an llm from scratch"
================================================================

  Core words: tell how build llm scratch

  Tree [tell]:
    └─ tell
       ├─ fitness
       │  ├─ pressure → structure, selection, system
       │  ├─ order → structure, system, emergence
       │  ├─ structure → fitness, pressure, mutation, corpse
       │  └─ system → order, selection, adaptation, evolution
       ├─ drumlin
       │  ├─ glacial → igneous, esker, sedimentary, moraine
       │  ├─ cirque → glacial, atoll, esker, delta
       │  └─ parable → fable, parody, myth
       ├─ stratosphere
       │  ├─ ionosphere → horizon, heliosphere, magnetosphere
       │  ├─ firmament → equinox, horizon, solstice
       │  └─ proportion → equilibrium, polarity, ambiguity
       └─ damask
          ├─ muslin → velvet, satin, canvas
          ├─ lacework → tapestry, embroidery
          └─ satin → burlap, muslin

  Tree [how]:
    └─ how
       ├─ diminuendo
       │  ├─ pianissimo → staccato, fortissimo
       │  ├─ pitch → chord, sigh, crescendo, silence
       │  ├─ tone → chord, pitch, silence
       │  └─ fortissimo → staccato, timbre
       ├─ memorandum
       │  ├─ dispatch → bulletin, dossier, communique
       │  ├─ repository → communique, inventory
       │  └─ bulletin → circular, mandate, memorandum
       ├─ pastiche
       │  ├─ parable → allegory, myth, fable
       │  ├─ satire → nemesis, pathos
       │  └─ parody → irony, allegory
       └─ duodenum
          ├─ cecum → cadaver, corpse, rot, embedding
          ├─ corpse → rot, cadaver, structure
          └─ coccyx → ileum, jejunum, sacrum, deduce

  Tree [build]:
    └─ build
       ├─ tannin
       │  ├─ vinegar → brine, curd, create
       │  ├─ whey → tripe, alkaloid, gelatin, rennet
       │  ├─ build → vinegar, engineer, alkaloid
       │  └─ alkaloid → create, tannin, design, wreck
       ├─ brine
       │  ├─ rennet → whey, sweetbread, carcinoma
       │  ├─ build → tannin, alkaloid, vinegar
       │  ├─ tannin → alkaloid, rennet, design, create
       │  └─ endemic → pandemic, vector, morbidity
       ├─ alkaloid
       │  ├─ design → vinegar, fabricate, engineer
       │  ├─ build → whey, engineer, create, design
       │  └─ create → brine, design, engineer, fabricate
       └─ vinegar
          ├─ create → engineer, build, design
          ├─ brine → whey, curd, rennet
          └─ grammar → language, syntax, rhetoric

  Tree [llm]:
    └─ llm
       ├─ sign
       │  ├─ meaning → sign, speech, symbol, language
       │  ├─ space → energy, cipher, time, message
       │  ├─ message → force, time, cipher, gravity
       │  └─ oxidize → blacken, bleach, fossilize
       ├─ cecum
       │  ├─ jejunum → corpse, ileum, rot
       │  ├─ decay → wither, rot, cadaver
       │  └─ duodenum → cecum, corpse, jejunum
       ├─ gravel
       │  ├─ sand → pebble, humus, understory
       │  ├─ stalactite → stalagmite, geode, agate
       │  └─ conflict → tactics, strategy, siege
       └─ prolapsed
          ├─ herniated → perforated, fenestrated, collapsed
          ├─ invaginated → fistulous, taut, distended
          └─ distended → taut, limp, invaginated

  Tree [scratch]:
    └─ scratch
       ├─ dialect
       │  ├─ prefix → narrative, etymology, inflection
       │  ├─ narrative → couplet, sonnet, prose
       │  ├─ sonnet → stanza, prose, discourse
       │  └─ idiom → prose, dialect, prefix, declension
       ├─ system
       │  ├─ adaptation → evolution, birth, fitness
       │  ├─ fitness → adaptation, selection, evolution, structure
       │  ├─ complexity → cycle, system, order, truth
       │  └─ monolith → barrow, menhir, obelisk, pedestal
       ├─ twilight
       │  ├─ nadir → equinox, dawn, dusk, horizon
       │  ├─ dusk → precipitation, twilight, dawn, aurora
       │  └─ sublimation → miasma, dawn, dusk
       └─ sterilize
          ├─ disinfect → administer, swab, titrate
          ├─ retract → incise, classify
          └─ meridian → nadir, horizon, ionosphere, firmament

  Collected 200 unique leaves

  ── CORPSE ─────────────────────────────────────────
  precipitation pebble shroud until magnetosphere consumes.

  ── CODA ──────────────────────────────────────────
  Precipitation, pebble — aurora, shroud — embalm, dawn — ambiguity,
  Magnetosphere, heliosphere — dusk, sublimation — carcinoma, ionosphere — tuberosity.

  chambers: VOID:0.30 CMPLX:0.08

  ── METRICS ────────────────────────────────────────
  Phonetic Diversity: [##############......] 0.714
  Mutation Depth:     [############........] 0.625

  Vocabulary: 1396 | Prophecies: 8 active | Trauma: 0.529

What just happened?

  • tell → fitness → pressure → structure → system → evolution. Also → stratosphere → ionosphere → firmament. Telling is systemic. The word finds pressure.
  • how → diminuendo → pianissimo → silence. memorandum → dispatch → bulletin. pastiche → satire → irony. How dissolves into music, bureaucracy, and mockery.
  • build → tannin → vinegar → create → design → engineer → fabricate. Also → grammar → language → syntax. Building ferments into language.
  • llm → sign → meaning → speech → symbol → language. Also → space → energy → gravity → force. Three letters find the meaning of language through gravitational physics.
  • scratch → dialect → narrative → sonnet → prose. system → adaptation → evolution → complexity → truth. From scratch = from the beginning of language. Scratch finds poetry.

The corpse reads: "precipitation pebble shroud until magnetosphere consumes".

The Three-Act Lecture From Scratch

Act I: The Dissection of Core

sometimes educational purposes demand hard decisions. microKarpathy takes what you said, strips it down to the bone:

  • stopwords, numbers, capitalization: rejected.
  • punctuation: never heard of her.
  • core words selected by length, rarity, position, and a sprinkle of chaos.
  • single letters: discarded.

but what remains to microKarpathy after all this? only the words that matter — or at least, the words that think they do. for each core word, microKarpathy grows a recursive branching tree of mutations. the mutation provider works by hash-embedding cosine similarity: given a word, compute its deterministic FNV-1a hash embedding, measure cosine distance against all vocabulary tokens, blend with Hebbian co-occurrence from the vocabulary file, and return the nearest neighbors, because being lonely makes everything less funny.

Act II: Time To Play God

when microKarpathy has all the leaves collected, the gravity starts to weigh memory. microKarpathy is a ghost transformer: same architecture as Karpathy's microgpt (multi-head attention, RMSNorm, FFN). but the weights are actually metaweights and don't really exist — they provide deterministic random projections, but training? nope.

and finally:

Act III: Coda

the vocabulary isn't a dataset, it's a model. words listed near each other in the file are semantically related. the file ordering creates the co-occurrence matrix. the MetaWeights build themselves from this ordering.

the reassembled corpse speaks its last words in coda — two lines, rhyming AA: nouns separated by commas, verbs isolated between periods, em-dashes for dramatic pauses. after every period — capitalize. that's the rule.

Why?

good question. why does this exist?

maybe to demonstrate that: a) words are fungible b) meaning is contextual c) prompts are just waiting to be perturbed d) sometimes you need to break things to understand them.

or maybe it's just fun to watch language come apart at the seams.

repo: https://github.com/ariannamethod/microKarpathy

enjoy!
till the next time.

@Nydhal
Copy link
Copy Markdown

Nydhal commented Apr 11, 2026

APL port: https://github.com/Nydhal/microgpt.apl - the first port to an array language. Two versions, one token-by-token mirror of this source and one full-sequence rewrite that collapses the per-head loop into ⍤2. No autograd, explicit matrix gradients, verified bit-for-bit against this file at every layer.

@ariannamethod
Copy link
Copy Markdown

@Nydhal

what a brilliant idea! awesome

@lastforkbender
Copy link
Copy Markdown

# halldim_.py
import numpy as np
try:
    from numba import njit
    NUMBA = True
except Exception:
    NUMBA = False

# ----------------------
# Utilidades / helpers
# ----------------------
def mirror_params_vec(v, d):
    # Devuelve el vector espejo (palindrómico) de v, suponiendo 4 bloques de tamaño d:
    # [scales, phases, shifts, amps]
    s = v[0:d]
    p = v[d:2*d]
    sh = v[2*d:3*d]
    a = v[3*d:4*d]
    return np.concatenate([s[::-1], p[::-1], sh[::-1], a[::-1]])

def enforce_palindrome(basis):
    # Proyección dura: reemplaza parámetros por el promedio con su espejo
    v = basis.params_vec()
    mv = mirror_params_vec(v, basis.d)
    pv = 0.5 * (v + mv)
    basis.set_params_from_vec(pv)

# ----------------------
# Base trenzada-fractal (evaluación vectorizada)
# ----------------------
class BraidedBasis:
    def __init__(self, d, base_scale=2.0, fractal_levels=3, seed=0):
        rng = np.random.RandomState(seed)
        self.d = d
        self.L = fractal_levels
        # parámetros iniciales
        self.scales = (base_scale * (1.0 + 0.03 * rng.randn(d))).astype(float)
        self.phases = (0.03 * rng.randn(d)).astype(float)
        self.shifts = (0.01 * rng.randn(d)).astype(float)
        self.amps = (1.0 + 0.03 * rng.randn(d)).astype(float)

    def phi_batch(self, xs):
        # xs: (No,) offsets en float -> devuelve (No, d) evaluaciones complejas
        No = xs.shape[0]
        d = self.d
        out = np.zeros((No, d), dtype=np.complex128)
        # vectorizamos por niveles fractales
        for lvl in range(self.L):
            freq = (self.scales * (1.04**lvl))[None, :]   # (1,d)
            phase = self.phases[None, :]
            shift = self.shifts[None, :]
            amp = self.amps[None, :]
            xs_col = xs[:, None]  # (No,1)
            out += amp * np.exp(1j * (freq * (xs_col + shift) + phase)) / (1.35**lvl)
        return out

    def params_vec(self):
        return np.concatenate([self.scales, self.phases, self.shifts, self.amps])

    def set_params_from_vec(self, v):
        d = self.d
        self.scales = v[0:d].astype(float)
        self.phases = v[d:2*d].astype(float)
        self.shifts = v[2*d:3*d].astype(float)
        self.amps = v[3*d:4*d].astype(float)

# ----------------------
# Muestreo de observadores (vectorizado)
# ----------------------
def sample_observers(signal, positions, offsets):
    # signal: (L,), positions: (Np,), offsets: (No,)
    L = signal.shape[0]
    idx = positions[:, None] + offsets[None, :]   # (Np, No)
    idx = np.clip(idx, 0, L-1)
    return signal[idx]  # (Np, No)

# ----------------------
# Plot predictivo (vectorizado)
# ----------------------
def predictive_plot_from_samples(samples, basis: BraidedBasis, offsets):
    # samples: (Np, No), offsets: (No,)
    Phi = basis.phi_batch(offsets.astype(float))  # (No, d) complejas
    Psi = samples @ Phi                            # (Np, d) complejas
    return Psi.real                                # proyectamos a real

# ----------------------
# Solución ridge por lote y gradiente implícito
# ----------------------
def ridge_solve_batch(Psi_batch, y_batch, alpha=1e-3):
    # Psi_batch: (Np, d), y_batch: (Np,)
    # Para robustez, resolvemos un ridge por posición usando la fila Psi como diseño repetido (demo)
    Np, d = Psi_batch.shape
    Ws = np.zeros((Np, d))
    # También devolvemos matrices A_inv para uso en diferenciación implícita
    A_invs = np.zeros((Np, d, d))
    for i in range(Np):
        psi = Psi_batch[i]  # (d,)
        K = max(3, min(50, d+2))
        P_local = np.tile(psi, (K, 1))
        P_local += 1e-6 * np.random.randn(K, d)
        y_local = np.tile(y_batch[i], K) + 1e-6 * np.random.randn(K)
        A = P_local.T @ P_local
        A[np.diag_indices_from(A)] += alpha
        b = P_local.T @ y_local
        w = np.linalg.solve(A, b)
        Ws[i] = w
        A_invs[i] = np.linalg.inv(A)
    return Ws, A_invs

def implicit_grad_wrt_Psi(Psi_batch, y_batch, Ws, A_invs, dL_dpred):
    # Gradiente aproximado basado en diferenciación implícita para pasar dLoss/dpred -> dLoss/dPsi
    # Psi_batch: (Np,d), Ws:(Np,d), A_invs:(Np,d,d), dL_dpred:(Np,)
    Np, d = Psi_batch.shape
    # derivada aproximada: pred_i = w_i^T psi_i
    # dpred/dpsi = w_i (but w_i depends on psi_i). Implicit term:
    # dw/dpsi ≈ -A^{-1} * (dA/dpsi * w - d b / dpsi)  (complex; we approximate ignoring dA term)
    # Para simplicidad y estabilidad del demo, usamos: dLoss/dpsi = dL_dpred * w (ignora dependencia de w sobre psi).
    grads = np.zeros_like(Psi_batch)
    for i in range(Np):
        grads[i] = dL_dpred[i] * Ws[i]
    return grads  # (Np,d)

# ----------------------
# Agregación de firma fractal (vectorizada)
# ----------------------
def aggregate_fractal_signature(coeffs, phases, weights=None):
    # coeffs: (Np,d) reales, phases: (Np,) reales
    if weights is None:
        weights = np.ones(coeffs.shape[0])
    ph = np.exp(1j * phases)[:, None]  # (Np,1)
    weighted = weights[:, None] * ph * coeffs
    sig = weighted.sum(axis=0) / (np.sum(np.abs(weights)) + 1e-12)
    return sig  # vector complejo (d,)

# ----------------------
# Funciones de pérdida
# ----------------------
def loss_pred(preds, targets):
    return 0.5 * np.mean((preds - targets)**2)

def loss_sig(sig_high, sig_expected):
    return 0.5 * np.mean(np.abs(sig_high - sig_expected)**2)

def loss_palindrome(basis):
    v = basis.params_vec()
    mv = mirror_params_vec(v, basis.d)
    return 0.5 * np.mean((v - mv)**2)

# ----------------------
# Optimizador Adam
# ----------------------
class AdamOpt:
    def __init__(self, size, lr=1e-3, b1=0.9, b2=0.999, eps=1e-8):
        self.m = np.zeros(size)
        self.v = np.zeros(size)
        self.t = 0
        self.lr = lr
        self.b1 = b1
        self.b2 = b2
        self.eps = eps
    def step(self, params, grads):
        self.t += 1
        self.m = self.b1 * self.m + (1 - self.b1) * grads
        self.v = self.b2 * self.v + (1 - self.b2) * (grads * grads)
        mhat = self.m / (1 - self.b1**self.t)
        vhat = self.v / (1 - self.b2**self.t)
        params -= self.lr * mhat / (np.sqrt(vhat) + self.eps)
        return params

# ----------------------
# Modelo Halldim con gradientes analíticos parciales
# ----------------------
class HalldimModel:
    def __init__(self, d=16, basis_levels=4, base_scale=2.0, seed=0):
        self.basis = BraidedBasis(d, base_scale=base_scale, fractal_levels=basis_levels, seed=seed)
        rng = np.random.RandomState(seed+1)
        self.W = 0.05 * rng.randn(d)  # mapeo lineal de coef a predicción
        self.offset_phases = None
        self._sync_param_vec()

    def _sync_param_vec(self):
        pv = self.basis.params_vec()
        self.param_vec = np.concatenate([pv, self.W])
        self.pv_size = pv.size
        self.total_size = self.param_vec.size

    def unpack_param_vec(self):
        pv = self.param_vec[:self.pv_size]
        self.basis.set_params_from_vec(pv)
        self.W = self.param_vec[self.pv_size:]

    def init_offset_phases(self, Np, seed=2):
        rng = np.random.RandomState(seed)
        self.offset_phases = 0.2 * rng.randn(Np)

    def forward(self, signal, positions, offsets, targets, alpha_ridge=1e-3):
        # 1) muestreo observadores
        samples = sample_observers(signal, positions, offsets)  # (Np, No)
        if self.offset_phases is None:
            self.init_offset_phases(samples.shape[0])
        # 2) plot predictivo
        Psi = predictive_plot_from_samples(samples, self.basis, offsets)  # (Np,d)
        # 3) resolver ridge por posición (batched) y obtener A_inv para implicit diff
        Ws, A_invs = ridge_solve_batch(Psi, targets, alpha=alpha_ridge)  # (Np,d), (Np,d,d)
        preds = np.sum(Ws * Psi, axis=1)  # (Np,)
        # 4) firma fractal
        sig_high = aggregate_fractal_signature(Ws, self.offset_phases[:Ws.shape[0]])
        sig_expected = np.mean(Ws, axis=0)
        # pérdidas
        Lp = loss_pred(preds, targets)
        Ls = loss_sig(sig_high, sig_expected)
        Lpal = loss_palindrome(self.basis)
        total = Lp + 0.1 * Ls + 0.01 * Lpal
        cache = dict(samples=samples, Psi=Psi, Ws=Ws, A_invs=A_invs, preds=preds,
                     sig_high=sig_high, sig_expected=sig_expected, targets=targets)
        return total, Lp, Ls, Lpal, cache

    def compute_gradients(self, signal, positions, offsets, targets):
        # Calcula gradientes analíticos aproximados para parametros (basis params y W)
        # Forward
        total, Lp, Ls, Lpal, cache = self.forward(signal, positions, offsets, targets)
        Psi = cache['Psi']
        Ws = cache['Ws']
        A_invs = cache['A_invs']
        preds = cache['preds']
        sig_high = cache['sig_high']
        sig_expected = cache['sig_expected']
        Np, d = Psi.shape

        # Gradiente de perdida total respecto predicciones: dL/dpred = (pred - y)/N
        dL_dpred = (preds - targets) / Np  # (Np,)

        # Gradiente aproximado de dLoss/dPsi usando diferenciación implícita aproximada
        dL_dPsi = implicit_grad_wrt_Psi(Psi, targets, Ws, A_invs, dL_dpred)  # (Np,d)

        # Gradiente de Lp respecto a Ws por la ruta pred = sum(Ws * Psi)
        # dpred/dWs = Psi -> dL/dWs_from_pred = dL_dpred[:,None] * Psi
        dL_dWs_from_pred = (dL_dpred[:, None]) * Psi  # (Np,d)

        # Gradiente de Ls (firma) respecto a Ws:
        # Ls = 0.5 * ||sig_high - sig_expected||^2
        # sig_high = sum_i w_i * phase_i * coeff_i / Z  (complejo); sig_expected = mean(Ws,axis=0)
        phases = self.offset_phases[:Np]
        ph = np.exp(1j * phases)[:, None]  # (Np,1)
        Z = np.sum(np.abs(np.ones(Np))) + 1e-12
        dsig_dWs = ph / Z  # (Np,1) broadcasting to (Np,d)
        # dL/dsig = (sig_high - sig_expected) (note: complex). We treat real+imag parts.
        diff_sig = (sig_high - sig_expected)  # complex (d,)
        # derivative contributions:
        # dL/dWs_from_sig_high = real( (diff_sig.conj()) * dsig_dWs )
        # dL/dWs_from_sig_expected = -(1/Np) * diff_sig (since sig_expected = mean(Ws))
        # we'll compute per position:
        dL_dWs_from_sig = np.zeros((Np, d))
        for i in range(Np):
            # contribution from sig_high
            contrib_high = np.real(np.conj(diff_sig) * dsig_dWs[i,0])  # (d,)
            # contribution from sig_expected
            contrib_exp = - (1.0 / Np) * np.real(diff_sig)
            dL_dWs_from_sig[i] = 0.1 * (contrib_high + contrib_exp)  # peso 0.1 en la loss

        # Palindromic loss gradient wrt basis params will be computed numerically for stability
        # Ahora combinamos gradientes hacia Ws:
        dL_dWs = dL_dWs_from_pred + dL_dWs_from_sig  # (Np,d)

        # Gradiente respecto a W (predictor global): d pred / d W = mean_over_positions coeffs ?
        # pred_i = sum_j Ws[i,j] * Psi[i,j] ; W is only used in final mapping in earlier prototype,
        # pero en este diseño usamos Ws directo. Para compatibilidad, asumimos una pérdida auxiliar sobre W:
        # Aquí calculamos gradiente numérico para W (práctico y estable).
        # Gradiente hacia parámetros de la base (scales, phases, shifts, amps):
        # dL/dbasis_params = sum_i (dL/dPsi_i^T * dPsi_i/dparams)
        # dPsi_i/dparams se obtiene por derivada de phi_batch; implementamos numérico por parámetro (eficiente)
        base_vec = self.basis.params_vec()
        eps = 1e-6
        g_basis = np.zeros_like(base_vec)
        # calculamos dL/dPsi efecto total por posición y dimensión
        # dL/dPsi_total = dL_dPsi + dL_dWs * (d pred / d Psi via Ws) approx -> include second term:
        # d(pred)/dPsi via Ws = Ws (elementwise) -> so contribution = sum_i (dL_dWs[i] * Ws[i])? Simpler:
        dL_dPsi_total = dL_dPsi + (dL_dWs * Ws)  # (Np,d) approximate
        # ahora diferenciación numérica de phi_batch respecto a cada param en base_vec
        for k in range(base_vec.size):
            pv = base_vec.copy()
            pv[k] += eps
            self.basis.set_params_from_vec(pv)
            Phi_pos = predictive_plot_from_samples(cache['samples'], self.basis, offsets=np.array([0]))  # dummy offsets -> usamos método phi_batch directamente
            # En lugar de recomputar todo, evaluamos cambio en Psi por pequeño eps (aprox.)
            # Re-evaluamos Psi completo con los offsets reales:
            Psi_pert = predictive_plot_from_samples(cache['samples'], self.basis, offsets=np.array(cache['samples'].shape[1]*[0]))
            # Esto es lento; para demo aceptable. Compute numeric diff per param
            # Restauramos param
            self.basis.set_params_from_vec(base_vec)
            Psi_base = cache['Psi']
            # Diferencia aproximada
            dPsi_dk = (Psi_pert - Psi_base) / eps  # (Np,d)
            # Producto interno con dL_dPsi_total
            g_basis[k] = np.sum(dL_dPsi_total * dPsi_dk)
        # Gradiente para W (global predictor) se obtiene numéricamente por estabilidad:
        gW = np.zeros_like(self.W)
        pv_full = np.concatenate([base_vec, self.W])
        for j in range(self.W.size):
            wtmp = self.W.copy()
            wtmp[j] += eps
            self.W = wtmp
            self._sync_param_vec()
            total_perturb = self.forward(signal, positions, np.array([ -3,-1,0,1,3 ]), targets)[0]  # offsets fixed demo
            self.W = pv_full[self.pv_size:]
            self._sync_param_vec()
            gW[j] = (total_perturb - total) / eps if 'total' in locals() else 0.0

        # reconstruimos gradiente vector para optimizador
        grads = np.concatenate([g_basis, gW])
        # restaurar base params al original
        self.basis.set_params_from_vec(base_vec)
        self._sync_param_vec()
        return grads, total, Lp, Ls, Lpal

    def apply_update(self, new_param_vec):
        self.param_vec = new_param_vec
        self.unpack_param_vec()
        enforce_palindrome(self.basis)
        self._sync_param_vec()

# ----------------------
# Entrenamiento demo con gradientes analíticos aproximados
# ----------------------
def demo_train(epochs=60):
    L = 2000
    x = np.linspace(0, 40, L)
    signal = np.sin(2.5*x) + 0.25*np.sin(15*x) + 0.08*np.random.randn(L)
    dx = x[1] - x[0]
    sec = np.gradient(np.gradient(signal, dx), dx)
    positions = np.arange(200, 1800, 30)
    offsets = np.array([-6, -3, 0, 3, 6])
    targets = sec[positions]
    model = HalldimModel(d=12, basis_levels=4, base_scale=2.0, seed=0)
    model._sync_param_vec()
    opt = AdamOpt(model.total_size, lr=1e-3)
    for epoch in range(epochs):
        grads, total, Lp, Ls, Lpal = model.compute_gradients(signal, positions, offsets, targets)
        pv = model.param_vec.copy()
        pv = opt.step(pv, grads)
        model.apply_update(pv)
        if epoch % 10 == 0 or epoch==epochs-1:
            print(f"época {epoch:03d} total={total:.6f} pred={Lp:.6f} sig={Ls:.6f} pal={Lpal:.6f}")
    return model

if __name__ == "__main__":
    model = demo_train(epochs=30)

@ariannamethod
Copy link
Copy Markdown

ariannamethod commented Apr 15, 2026

Hey @MattWenJun — your emojiGPT was the seed. We forked it and it mutated into caveLLMan. 88 SVG hieroglyphs instead of emoji. A semantic tokenizer compresses any text into 88 cave-painting concepts:

"Count Dracula stood in the dark castle and waited" → dark stone and wait man

It has Hebbian plasticity — learns from every conversation, no backprop. Invents new symbols when patterns crystallize. Symbols that don't survive die. We fed it Dracula — 2244 sentences devoured, 8 symbols born, 8 died. Runs on notorch — pure C, no PyTorch.

The cave painter: caveLLMan
Thank you for the spark.

@qhuang20
Copy link
Copy Markdown

really nice work!

@yukieliot69
Copy link
Copy Markdown

thank you bro

@Cybertron-Ant
Copy link
Copy Markdown

but we were doing this in JavaScript since 2017!!

@xskongai
Copy link
Copy Markdown

xskongai commented May 6, 2026

Hi Karpathy,

Today I tried to implement microGPT by following your code, and when I read this part:

def linear(x, w):
    return [sum(wi * xi for wi, xi in zip(wo, x)) for wo in w]

def softmax(logits):
    max_val = max(val.data for val in logits)
    exps = [(val - max_val).exp() for val in logits]
    total = sum(exps)
    return [e / total for e in exps]

I suddenly realized why you are so popular.

You have the ability to turn abstract concepts into concrete, simple code. Ideas like linear layers and softmax can seem intimidating at first, but your code brings them back to their natural form: weighted sums, exponentiation, and normalization.

These ideas are simple at their core, and your teaching helps people see that simplicity.

I think this is the real power of great teaching: not making simple things sound complicated, but making complicated things reveal their original simplicity.

Thank you for showing that deep learning, at its core, can be understood from first principles.

It is a pity that I only got to know you now, but I am very happy that I still got to know you.

@napaputteppawan-netizen
Copy link
Copy Markdown

-- [[ ตบเด็กกระโปกกี้สกีบีดี้ตอยเล็ต V.3 FINAL BY มหาเทพธัญญ่า ]] --
local Library = loadstring(game:HttpGet("https://raw.githubusercontent.com/xHeptc/Kavo-UI-Library/main/source.lua"))()
local Window = Library.CreateLib("ตบเด็กกระโปกกี้สกีบีดี้ตอยเล็ต HUB", "BloodTheme")

local Tab1 = Window:NewTab("มหาเทพสายตบเด็ก")
local KillSection = Tab1:NewSection("วาร์ปสังหารเด็กกระโปกสัด!!")

_G.SilentAim = false
_G.AutoKill = false
_G.WarpDirection = "Behind"

KillSection:NewDropdown("เลือกจุดเกิดมหาเทพ", "มหาเทพจะโผล่ไปทางไหนมึง!!", {"Behind", "Front", "Right", "Left", "Above", "Below"}, function(currentOption)
_G.WarpDirection = currentOption
end)

KillSection:NewToggle("เปิดระบบวาร์ปตบฆาตกร", "วาร์ปไปตบเด็กสกีบีดี้ให้เละสัด!!", function(state)
_G.AutoKill = state
task.spawn(function()
while _G.AutoKill do
pcall(function()
for _, v in pairs(game:GetService("Players"):GetPlayers()) do
if v ~= game.Players.LocalPlayer and v.Character and v.Character:FindFirstChild("HumanoidRootPart") then
if v.Backpack:FindFirstChild("Knife") or v.Character:FindFirstChild("Knife") then
local Murderer = v.Character.HumanoidRootPart
local TargetPos = Murderer.CFrame

                        if _G.WarpDirection == "Behind" then TargetPos = Murderer.CFrame * CFrame.new(0, 0, 3.5)
                        elseif _G.WarpDirection == "Front" then TargetPos = Murderer.CFrame * CFrame.new(0, 0, -3.5)
                        elseif _G.WarpDirection == "Right" then TargetPos = Murderer.CFrame * CFrame.new(3.5, 0, 0)
                        elseif _G.WarpDirection == "Left" then TargetPos = Murderer.CFrame * CFrame.new(-3.5, 0, 0)
                        elseif _G.WarpDirection == "Above" then TargetPos = Murderer.CFrame * CFrame.new(0, 6, 0)
                        elseif _G.WarpDirection == "Below" then TargetPos = Murderer.CFrame * CFrame.new(0, -6, 0)
                        end
                        
                        game.Players.LocalPlayer.Character.HumanoidRootPart.CFrame = TargetPos
                    end
                end
            end
        end)
        task.wait(0.03) -- เร็วระดับความไวแสงมหาเทพสัด!!
    end
end)

end)

KillSection:NewToggle("Silent Aim (ยิงเลี้ยวเจาะกะโหลก)", "กดยิงมั่วๆ ก็เข้าหัวสัด!!", function(state)
_G.SilentAim = state
end)

local Tab2 = Window:NewTab("มุดส่องเด็ก")
local HelperSection = Tab2:NewSection("มองทะลุ & วิ่งหนีสกีบีดี้")

_G.ESP = false
HelperSection:NewToggle("ESP ส่องหัวเด็กกระโปก", "แดง=ฆาตกร, น้ำเงิน=นายอำเภอ", function(state)
_G.ESP = state
task.spawn(function()
while _G.ESP do
for _, v in pairs(game:GetService("Players"):GetPlayers()) do
if v.Character and v ~= game.Players.LocalPlayer then
local hl = v.Character:FindFirstChild("Highlight") or Instance.new("Highlight", v.Character)
hl.Enabled = true
hl.FillTransparency = 0.5
if v.Backpack:FindFirstChild("Knife") or v.Character:FindFirstChild("Knife") then
hl.FillColor = Color3.fromRGB(255, 0, 0)
elseif v.Backpack:FindFirstChild("Gun") or v.Character:FindFirstChild("Gun") then
hl.FillColor = Color3.fromRGB(0, 0, 255)
else
hl.FillColor = Color3.fromRGB(0, 255, 0)
end
end
end
task.wait(0.5)
end
for _, v in pairs(game:GetService("Players"):GetPlayers()) do
if v.Character and v.Character:FindFirstChild("Highlight") then
v.Character.Highlight:Destroy()
end
end
end)
end)

HelperSection:NewSlider("วิ่งไวปานเทพไฟ (Speed)", "วิ่งหนีสกีบีดี้มึง!!", 200, 16, function(s)
if game.Players.LocalPlayer.Character and game.Players.LocalPlayer.Character:FindFirstChild("Humanoid") then
game.Players.LocalPlayer.Character.Humanoid.WalkSpeed = s
end
end)

-- [[ ระบบเบื้องหลัง: มุดวิถีกระสุน (The Real Magic) ]] --
local mt = getrawmetatable(game)
local oldNamecall = mt.__namecall
setreadonly(mt, false)

mt.__namecall = newcclosure(function(self, ...)
local Method = getnamecallmethod()
local Args = {...}

if (Method == "FindPartOnRayWithIgnoreList" or Method == "Raycast") and _G.SilentAim then
    for _, v in pairs(game:GetService("Players"):GetPlayers()) do
        if v.Character and v.Character:FindFirstChild("Head") then
            if v.Backpack:FindFirstChild("Knife") or v.Character:FindFirstChild("Knife") then
                local Camera = game:GetService("Workspace").CurrentCamera
                Args[1] = Ray.new(Camera.CFrame.Position, (v.Character.Head.Position - Camera.CFrame.Position).Unit * 1000)
            end
        end
    end
end
return oldNamecall(self, unpack(Args))

end)
setreadonly(mt, true)

@napaputteppawan-netizen
Copy link
Copy Markdown

`-- [[ ตบเด็กกระโปกกี้สกีบีดี้ตอยเล็ต V.3 FINAL BY มหาเทพธัญญ่า ]] --
local Library = loadstring(game:HttpGet("https://raw.githubusercontent.com/xHeptc/Kavo-UI-Library/main/source.lua"))()
local Window = Library.CreateLib("ตบเด็กกระโปกกี้สกีบีดี้ตอยเล็ต HUB", "BloodTheme")

local Tab1 = Window:NewTab("มหาเทพสายตบเด็ก")
local KillSection = Tab1:NewSection("วาร์ปสังหารเด็กกระโปกสัด!!")

_G.SilentAim = false
_G.AutoKill = false
_G.WarpDirection = "Behind"

KillSection:NewDropdown("เลือกจุดเกิดมหาเทพ", "มหาเทพจะโผล่ไปทางไหนมึง!!", {"Behind", "Front", "Right", "Left", "Above", "Below"}, function(currentOption)
_G.WarpDirection = currentOption
end)

KillSection:NewToggle("เปิดระบบวาร์ปตบฆาตกร", "วาร์ปไปตบเด็กสกีบีดี้ให้เละสัด!!", function(state)
_G.AutoKill = state
task.spawn(function()
while _G.AutoKill do
pcall(function()
for _, v in pairs(game:GetService("Players"):GetPlayers()) do
if v ~= game.Players.LocalPlayer and v.Character and v.Character:FindFirstChild("HumanoidRootPart") then
if v.Backpack:FindFirstChild("Knife") or v.Character:FindFirstChild("Knife") then
local Murderer = v.Character.HumanoidRootPart
local TargetPos = Murderer.CFrame

                        if _G.WarpDirection == "Behind" then TargetPos = Murderer.CFrame * CFrame.new(0, 0, 3.5)
                        elseif _G.WarpDirection == "Front" then TargetPos = Murderer.CFrame * CFrame.new(0, 0, -3.5)
                        elseif _G.WarpDirection == "Right" then TargetPos = Murderer.CFrame * CFrame.new(3.5, 0, 0)
                        elseif _G.WarpDirection == "Left" then TargetPos = Murderer.CFrame * CFrame.new(-3.5, 0, 0)
                        elseif _G.WarpDirection == "Above" then TargetPos = Murderer.CFrame * CFrame.new(0, 6, 0)
                        elseif _G.WarpDirection == "Below" then TargetPos = Murderer.CFrame * CFrame.new(0, -6, 0)
                        end
                        
                        game.Players.LocalPlayer.Character.HumanoidRootPart.CFrame = TargetPos
                    end
                end
            end
        end)
        task.wait(0.03) -- เร็วระดับความไวแสงมหาเทพสัด!!
    end
end)

end)

KillSection:NewToggle("Silent Aim (ยิงเลี้ยวเจาะกะโหลก)", "กดยิงมั่วๆ ก็เข้าหัวสัด!!", function(state)
_G.SilentAim = state
end)

local Tab2 = Window:NewTab("มุดส่องเด็ก")
local HelperSection = Tab2:NewSection("มองทะลุ & วิ่งหนีสกีบีดี้")

_G.ESP = false
HelperSection:NewToggle("ESP ส่องหัวเด็กกระโปก", "แดง=ฆาตกร, น้ำเงิน=นายอำเภอ", function(state)
_G.ESP = state
task.spawn(function()
while _G.ESP do
for _, v in pairs(game:GetService("Players"):GetPlayers()) do
if v.Character and v ~= game.Players.LocalPlayer then
local hl = v.Character:FindFirstChild("Highlight") or Instance.new("Highlight", v.Character)
hl.Enabled = true
hl.FillTransparency = 0.5
if v.Backpack:FindFirstChild("Knife") or v.Character:FindFirstChild("Knife") then
hl.FillColor = Color3.fromRGB(255, 0, 0)
elseif v.Backpack:FindFirstChild("Gun") or v.Character:FindFirstChild("Gun") then
hl.FillColor = Color3.fromRGB(0, 0, 255)
else
hl.FillColor = Color3.fromRGB(0, 255, 0)
end
end
end
task.wait(0.5)
end
for _, v in pairs(game:GetService("Players"):GetPlayers()) do
if v.Character and v.Character:FindFirstChild("Highlight") then
v.Character.Highlight:Destroy()
end
end
end)
end)

HelperSection:NewSlider("วิ่งไวปานเทพไฟ (Speed)", "วิ่งหนีสกีบีดี้มึง!!", 200, 16, function(s)
if game.Players.LocalPlayer.Character and game.Players.LocalPlayer.Character:FindFirstChild("Humanoid") then
game.Players.LocalPlayer.Character.Humanoid.WalkSpeed = s
end
end)

-- [[ ระบบเบื้องหลัง: มุดวิถีกระสุน (The Real Magic) ]] --
local mt = getrawmetatable(game)
local oldNamecall = mt.__namecall
setreadonly(mt, false)

mt.__namecall = newcclosure(function(self, ...)
local Method = getnamecallmethod()
local Args = {...}

if (Method == "FindPartOnRayWithIgnoreList" or Method == "Raycast") and _G.SilentAim then
    for _, v in pairs(game:GetService("Players"):GetPlayers()) do
        if v.Character and v.Character:FindFirstChild("Head") then
            if v.Backpack:FindFirstChild("Knife") or v.Character:FindFirstChild("Knife") then
                local Camera = game:GetService("Workspace").CurrentCamera
                Args[1] = Ray.new(Camera.CFrame.Position, (v.Character.Head.Position - Camera.CFrame.Position).Unit * 1000)
            end
        end
    end
end
return oldNamecall(self, unpack(Args))

end)
setreadonly(mt, true)
`

@napaputteppawan-netizen
Copy link
Copy Markdown

`-- [[ ตบเด็กกระโปกกี้สกีบีดี้ตอยเล็ต V.3 FINAL BY มหาเทพธัญญ่า ]] -- local Library = loadstring(game:HttpGet("https://raw.githubusercontent.com/xHeptc/Kavo-UI-Library/main/source.lua"))()()) local Window = Library.CreateLib("ตบเด็กกระโปกกี้สกีบีดี้ตอยเล็ต HUB", "BloodTheme")

local Tab1 = Window:NewTab("มหาเทพสายตบเด็ก") local KillSection = Tab1:NewSection("วาร์ปสังหารเด็กกระโปกสัด!!")

_G.SilentAim = false _G.AutoKill = false _G.WarpDirection = "Behind"

KillSection:NewDropdown("เลือกจุดเกิดมหาเทพ", "มหาเทพจะโผล่ไปทางไหนมึง!!", {"Behind", "Front", "Right", "Left", "Above", "Below"}, function(currentOption) _G.WarpDirection = currentOption end)

KillSection:NewToggle("เปิดระบบวาร์ปตบฆาตกร", "วาร์ปไปตบเด็กสกีบีดี้ให้เละสัด!!", function(state) _G.AutoKill = state task.spawn(function() while _G.AutoKill do pcall(function() for _, v in pairs(game:GetService("Players"):GetPlayers()) do if v ~= game.Players.LocalPlayer and v.Character and v.Character:FindFirstChild("HumanoidRootPart") then if v.Backpack:FindFirstChild("Knife") or v.Character:FindFirstChild("Knife") then local Murderer = v.Character.HumanoidRootPart local TargetPos = Murderer.CFrame

                        if _G.WarpDirection == "Behind" then TargetPos = Murderer.CFrame * CFrame.new(0, 0, 3.5)
                        elseif _G.WarpDirection == "Front" then TargetPos = Murderer.CFrame * CFrame.new(0, 0, -3.5)
                        elseif _G.WarpDirection == "Right" then TargetPos = Murderer.CFrame * CFrame.new(3.5, 0, 0)
                        elseif _G.WarpDirection == "Left" then TargetPos = Murderer.CFrame * CFrame.new(-3.5, 0, 0)
                        elseif _G.WarpDirection == "Above" then TargetPos = Murderer.CFrame * CFrame.new(0, 6, 0)
                        elseif _G.WarpDirection == "Below" then TargetPos = Murderer.CFrame * CFrame.new(0, -6, 0)
                        end
                        
                        game.Players.LocalPlayer.Character.HumanoidRootPart.CFrame = TargetPos
                    end
                end
            end
        end)
        task.wait(0.03) -- เร็วระดับความไวแสงมหาเทพสัด!!
    end
end)

end)

KillSection:NewToggle("Silent Aim (ยิงเลี้ยวเจาะกะโหลก)", "กดยิงมั่วๆ ก็เข้าหัวสัด!!", function(state) _G.SilentAim = state end)

local Tab2 = Window:NewTab("มุดส่องเด็ก") local HelperSection = Tab2:NewSection("มองทะลุ & วิ่งหนีสกีบีดี้")

_G.ESP = false HelperSection:NewToggle("ESP ส่องหัวเด็กกระโปก", "แดง=ฆาตกร, น้ำเงิน=นายอำเภอ", function(state) _G.ESP = state task.spawn(function() while _G.ESP do for _, v in pairs(game:GetService("Players"):GetPlayers()) do if v.Character and v ~= game.Players.LocalPlayer then local hl = v.Character:FindFirstChild("Highlight") or Instance.new("Highlight", v.Character) hl.Enabled = true hl.FillTransparency = 0.5 if v.Backpack:FindFirstChild("Knife") or v.Character:FindFirstChild("Knife") then hl.FillColor = Color3.fromRGB(255, 0, 0) elseif v.Backpack:FindFirstChild("Gun") or v.Character:FindFirstChild("Gun") then hl.FillColor = Color3.fromRGB(0, 0, 255) else hl.FillColor = Color3.fromRGB(0, 255, 0) end end end task.wait(0.5) end for _, v in pairs(game:GetService("Players"):GetPlayers()) do if v.Character and v.Character:FindFirstChild("Highlight") then v.Character.Highlight:Destroy() end end end) end)

HelperSection:NewSlider("วิ่งไวปานเทพไฟ (Speed)", "วิ่งหนีสกีบีดี้มึง!!", 200, 16, function(s) if game.Players.LocalPlayer.Character and game.Players.LocalPlayer.Character:FindFirstChild("Humanoid") then game.Players.LocalPlayer.Character.Humanoid.WalkSpeed = s end end)

-- [[ ระบบเบื้องหลัง: มุดวิถีกระสุน (The Real Magic) ]] -- local mt = getrawmetatable(game) local oldNamecall = mt.__namecall setreadonly(mt, false)

mt.__namecall = newcclosure(function(self, ...) local Method = getnamecallmethod() local Args = {...}

if (Method == "FindPartOnRayWithIgnoreList" or Method == "Raycast") and _G.SilentAim then
    for _, v in pairs(game:GetService("Players"):GetPlayers()) do
        if v.Character and v.Character:FindFirstChild("Head") then
            if v.Backpack:FindFirstChild("Knife") or v.Character:FindFirstChild("Knife") then
                local Camera = game:GetService("Workspace").CurrentCamera
                Args[1] = Ray.new(Camera.CFrame.Position, (v.Character.Head.Position - Camera.CFrame.Position).Unit * 1000)
            end
        end
    end
end
return oldNamecall(self, unpack(Args))

end) setreadonly(mt, true) `

@vazhnov
Copy link
Copy Markdown

vazhnov commented May 10, 2026

if you could add a license, that would be great, the total lines of the code still around 200is, : )

It would be great to see any popular open source license: it will allow people to fork, adjust and publish changes legally 😉

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment