Last active
April 16, 2025 16:04
-
-
Save geohot/0cad05378fcbaeb0dceec3e89e0d4d7b to your computer and use it in GitHub Desktop.
A 1024x1024x1024 matmul with a 2x2x2 core in OpenCL
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
__kernel void matmul(__global float* data0, const __global float* data1, const __global float* data2) { | |
int gidx0 = get_group_id(1); /* 512 */ | |
int gidx1 = get_group_id(0); /* 512 */ | |
float2 acc0 = (float2)(0.0f,0.0f); | |
float2 acc1 = (float2)(0.0f,0.0f); | |
for (int ridx0 = 0; ridx0 < 512; ++ridx0) { | |
float2 val0 = (float2)(*((__global float2*)(data1+(gidx0*2048)+(ridx0*2)))); | |
float2 val1 = (float2)(*((__global float2*)(data1+(gidx0*2048)+(ridx0*2)+1024))); | |
float2 val2 = (float2)(*((__global float2*)(data2+(gidx1*2)+(ridx0*2048)))); | |
float2 val3 = (float2)(*((__global float2*)(data2+(gidx1*2)+(ridx0*2048)+1024))); | |
(acc0).x = (((val0).x*(val2).x)+(acc0).x); | |
(acc0).x = (((val0).y*(val3).x)+(acc0).x); | |
(acc1).x = (((val1).x*(val2).x)+(acc1).x); | |
(acc1).x = (((val1).y*(val3).x)+(acc1).x); | |
(acc0).y = (((val0).x*(val2).y)+(acc0).y); | |
(acc0).y = (((val0).y*(val3).y)+(acc0).y); | |
(acc1).y = (((val1).x*(val2).y)+(acc1).y); | |
(acc1).y = (((val1).y*(val3).y)+(acc1).y); | |
} | |
*((__global float2*)(data0+(gidx0*2048)+(gidx1*2))) = (float2)(float2)((acc0).x,(acc0).y); | |
*((__global float2*)(data0+(gidx0*2048)+(gidx1*2)+1024)) = (float2)(float2)((acc1).x,(acc1).y); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment