Skip to content

Instantly share code, notes, and snippets.

@finbarrtimbers
Created July 17, 2025 14:31
Show Gist options
  • Select an option

  • Save finbarrtimbers/991ce4e043c61c41332a7e636b1be978 to your computer and use it in GitHub Desktop.

Select an option

Save finbarrtimbers/991ce4e043c61c41332a7e636b1be978 to your computer and use it in GitHub Desktop.
Tikz diagram of a decoder block
% Transformer Decoder Layer (minimal, cross‑attn removed)
% TikZ diagram mimicking the iconic style from "Attention Is All You Need".
% Residual arrows fully inside the layer box, bifurcating around blocks.
% Compile with: pdflatex decoder_layer.tex
\documentclass[tikz,border=10pt]{standalone}
\usepackage{tikz}
\usetikzlibrary{arrows.meta,positioning,decorations.pathreplacing,calc,fit}
% ----------------- global styles -------------------
\tikzset{
font=\sffamily,
>=Stealth,
line width=0.8pt,
flow/.style={->},
skip/.style={->, rounded corners=4pt},
% block styles
ffblock/.style ={rectangle, rounded corners=3pt, draw=black, fill=cyan!20,
minimum width=4cm, minimum height=1.2cm, align=center},
attnblock/.style={rectangle, rounded corners=3pt, draw=black, fill=orange!20,
minimum width=4cm, minimum height=1.6cm, align=center},
addnorm/.style ={rectangle, rounded corners=3pt, draw=black, fill=yellow!30,
minimum width=3.2cm, minimum height=0.9cm, align=center, font=\footnotesize},
}
\begin{document}
\begin{tikzpicture}[node distance=0.8cm]
% --------------------------------------------------
% Blocks (bottom → top)
\node[attnblock] (mha) {\footnotesize Masked\\Multi‑Head\\Attention};
\node[addnorm] (add1) [above=0.6cm of mha] {Add \& Norm};
\node[ffblock] (ff) [above=0.95cm of add1] {Feed\\Forward};
\node[addnorm] (add2) [above=0.6cm of ff] {Add \& Norm};
% --------------------------------------------------
% Main vertical data‑flow
\coordinate (input) at ($(mha.south)+(0,-1.0)$);
\coordinate (output) at ($(add2.north)+(0,1.0)$);
\draw[flow] (input) -- (mha.south);
\draw[flow] (mha.north) -- (add1.south);
\draw[flow] (add1.north) -- (ff.south);
\draw[flow] (ff.north) -- (add2.south);
\draw[flow] (add2.north) -- (output);
% --------------------------------------------------
% Internal residual (skip) connections à la Vaswani
% left channel coordinates (inside the bbox, hugging blocks)
\coordinate (leftChan1) at ($(mha.west)+(-0.35cm,0)$);
\coordinate (leftChan2) at ($(ff.west)+(-0.35cm,0)$);
% 1) input → add1 (wrap masked MHA)
\draw[skip]
(input) -- ++(0,0.75cm)
-- ++(-2.15cm,0)
-- ($(add1.west)-(0.55cm,0)$) -- (add1.west);
% 2) add1 output → add2 (wrap feed‑forward)
\draw[skip]
(add1.north) -- ++(0,0.45cm)
-- ++(-2.15cm,0)
-- ($(add2.west)-(0.55cm,0)$) -- (add2.west);
% --------------------------------------------------
% Bounding rounded rectangle encapsulating the layer
\node[draw, rounded corners=14pt, inner sep=0.5cm, fit=(mha)(add2)] (bbox) {};
% --------------------------------------------------
% N× label on the right side
\node[font=\Large] at ($(bbox.east)+(0.6cm,0)$) {\(\times N\)};
% --------------------------------------------------
% Labels for external signals (optional)
\node[font=\footnotesize, below=0.0cm of input] {Input};
\node[font=\footnotesize, above=0.0cm of output] {Output};
\end{tikzpicture}
\end{document}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment