Skip to content

Instantly share code, notes, and snippets.

@attentionmech
Created May 10, 2025 10:16
Show Gist options
  • Save attentionmech/89f2d45c743804390c843860bae4b5ea to your computer and use it in GitHub Desktop.
Save attentionmech/89f2d45c743804390c843860bae4b5ea to your computer and use it in GitHub Desktop.
matmul kernel vis
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d.art3d import Poly3DCollection
import matplotlib.colors as mcolors
# Timeline JSON-style data
timeline = [
{"step": "kernel_launch", "objects": {"threads": 4096, "blocks": 16, "warps": 128}},
{"step": "index_calculation", "objects": {"row_index": 4096, "col_index": 4096}},
{"step": "tile_phase_0_shared_memory_load", "objects": {"global_reads": 8192, "shared_stores": 8192}},
{"step": "tile_phase_0_sync_1", "objects": {"sync_barriers": 16}},
{"step": "tile_phase_0_mac", "objects": {"mac_ops": 65536}},
{"step": "tile_phase_0_sync_2", "objects": {"sync_barriers": 16}},
{"step": "tile_phase_1_shared_memory_load", "objects": {"global_reads": 8192, "shared_stores": 8192}},
{"step": "tile_phase_1_sync_1", "objects": {"sync_barriers": 16}},
{"step": "tile_phase_1_mac", "objects": {"mac_ops": 65536}},
{"step": "tile_phase_1_sync_2", "objects": {"sync_barriers": 16}},
{"step": "tile_phase_2_shared_memory_load", "objects": {"global_reads": 8192, "shared_stores": 8192}},
{"step": "tile_phase_2_sync_1", "objects": {"sync_barriers": 16}},
{"step": "tile_phase_2_mac", "objects": {"mac_ops": 65536}},
{"step": "tile_phase_2_sync_2", "objects": {"sync_barriers": 16}},
{"step": "tile_phase_3_shared_memory_load", "objects": {"global_reads": 8192, "shared_stores": 8192}},
{"step": "tile_phase_3_sync_1", "objects": {"sync_barriers": 16}},
{"step": "tile_phase_3_mac", "objects": {"mac_ops": 65536}},
{"step": "tile_phase_3_sync_2", "objects": {"sync_barriers": 16}},
{"step": "write_output", "objects": {"global_writes": 4096}}
]
# Define a color for each object type
object_colors = {
"threads": "skyblue",
"blocks": "teal",
"warps": "orange",
"row_index": "green",
"col_index": "lime",
"global_reads": "red",
"shared_stores": "magenta",
"sync_barriers": "gray",
"mac_ops": "gold",
"global_writes": "blue"
}
# Normalize object sizes (log scale to prevent overflow)
import numpy as np
def normalize(value, scale=10):
return np.log2(value + 1) / scale
fig = plt.figure(figsize=(12, 10))
ax = fig.add_subplot(111, projection='3d')
layer_gap = 3
x_offset = 0
for i, step in enumerate(timeline):
z_base = i * layer_gap
x = 0
for obj_type, count in step["objects"].items():
dx = normalize(count)
dy = 1
dz = 1
color = object_colors.get(obj_type, "black")
ax.bar3d(x, x_offset, z_base, dx, dy, dz, color=color, alpha=0.7, edgecolor="black")
x += dx + 0.2
# Set labels and view
ax.set_xlabel("Object Blocks (scaled)")
ax.set_ylabel("Step Layer")
ax.set_zlabel("Timeline")
ax.set_title("CUDA Kernel Timeline as 3D Block Building")
plt.tight_layout()
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment