Skip to content

Instantly share code, notes, and snippets.

@shreyansh26
Forked from YouJiacheng/dsv3_macs.py
Created January 30, 2025 03:02
Show Gist options
  • Save shreyansh26/22afc1e267902350ecdd87fad3ef9a74 to your computer and use it in GitHub Desktop.
Save shreyansh26/22afc1e267902350ecdd87fad3ef9a74 to your computer and use it in GitHub Desktop.
from dataclasses import dataclass
@dataclass
class Args:
vocab_size: int = 129280
dim: int = 7168
inter_dim: int = 18432
moe_inter_dim: int = 2048
n_layers: int = 61
n_dense_layers: int = 3
n_heads: int = 128
n_routed_experts: int = 256
n_shared_experts: int = 1
n_activated_experts: int = 8
q_lora_rank: int = 1536
kv_lora_rank: int = 512
qk_nope_head_dim: int = 128
qk_rope_head_dim: int = 64
v_head_dim: int = 128
seq_len = 4096
@property
def qk_head_dim(self):
return self.qk_nope_head_dim + self.qk_rope_head_dim
@property
def n_effective_experts(self):
return self.n_shared_experts + self.n_activated_experts
args = Args()
def sdpa_macs():
macs = 0
macs += args.seq_len * args.n_heads * args.qk_head_dim
macs += args.seq_len * args.n_heads * args.v_head_dim
return macs / 2
def mla_macs():
macs = 0
# Q down + up
macs += args.dim * args.q_lora_rank
macs += args.q_lora_rank * args.n_heads * args.qk_head_dim
# KV down + up
macs += args.dim * (args.kv_lora_rank + args.qk_rope_head_dim)
macs += args.kv_lora_rank * args.n_heads * (args.qk_nope_head_dim + args.v_head_dim)
# attn
# macs += sdpa_fwd_macs()
# O
macs += args.n_heads * args.v_head_dim * args.dim
return macs
def mlp_macs(inter_dim: int):
return args.dim * inter_dim * 3
def moe_macs():
return args.n_effective_experts * mlp_macs(args.moe_inter_dim)
def head_macs():
return args.dim * args.vocab_size
def model_macs():
macs = 0
macs += args.n_layers * mla_macs()
macs += args.n_dense_layers * mlp_macs(args.inter_dim)
macs += (args.n_layers - args.n_dense_layers) * moe_macs()
macs += head_macs()
return macs
print(model_macs() / 1e9)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment