Skip to content

Instantly share code, notes, and snippets.

View pashu123's full-sized avatar
๐Ÿ˜‡
Working from home

Prashant Kumar pashu123

๐Ÿ˜‡
Working from home
View GitHub Profile
import onnx
import onnx.helper as helper
# Define the model's input and output
batch_size = 1
seq_len = 512
hidden_size = 4096
kv_seq_len = 4
kv_hidden_size = 16
num_heads = 32 # Example number of attention heads
#map = affine_map<(d0, d1, d2, d3) -> ()>
#map1 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
#map2 = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d3)>
#map3 = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d4, d3)>
#map4 = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d4)>
#map5 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>
#map6 = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
module {
func.func @attention(%arg0: tensor<1x32x128x512xf32>, %arg1: tensor<1x32x128x512xf32>, %arg2: tensor<1x32x128x512xf32>) -> tensor<1x32x128x512xf32> {
%cst = arith.constant 1.250000e-01 : f32
#map = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d4)>
#map1 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d5, d4)>
#map2 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d5, d3)>
#map3 = affine_map<(d0, d1, d2, d3, d4, d5) -> ()>
#map4 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3)>
module {
func.func @attention(%arg0: tensor<1x32x128x512xf32>, %arg1: tensor<1x32x128x512xf32>, %arg2: tensor<1x32x128x512xf32>) -> tensor<1x32x128x512xf32> {
%cst = arith.constant 1.250000e-01 : f32
%0 = tensor.empty() : tensor<1x32x128x512xf32>
%1 = iree_linalg_ext.attention {indexing_maps = [#map, #map1, #map2, #map3, #map4]} ins(%arg0, %arg1, %arg2, %cst : tensor<1x32x128x512xf32>, tensor<1x32x128x512xf32>, tensor<1x32x128x512xf32>, f32) outs(%0 : tensor<1x32x128x512xf32>) {
hal.executable public @prefill_bs1$async_dispatch_39 {
hal.executable.variant public @embedded_elf_x86_64 target(<"llvm-cpu", "embedded-elf-x86_64", {cpu = "znver4", cpu_features = "+prfchw,-cldemote,+avx,+aes,+sahf,+pclmul,-xop,+crc32,-amx-fp8,+xsaves,-avx512fp16,-usermsr,-sm4,-egpr,+sse4.1,+avx512ifma,+xsave,+sse4.2,-tsxldtrk,-sm3,-ptwrite,-widekl,-movrs,+invpcid,+64bit,+xsavec,-avx10.1-512,+avx512vpopcntdq,+cmov,-avx512vp2intersect,+avx512cd,+movbe,-avxvnniint8,-ccmp,-amx-int8,-kl,-avx10.1-256,+evex512,-avxvnni,-rtm,+adx,+avx2,-hreset,-movdiri,-serialize,-sha512,+vpclmulqdq,+avx512vl,-uintr,-cf,+clflushopt,-raoint,-cmpccxadd,+bmi,-amx-tile,+sse,-avx10.2-256,+gfni,-avxvnniint16,-amx-fp16,-zu,-ndd,+xsaveopt,+rdrnd,+avx512f,-amx-bf16,+avx512bf16,+avx512vnni,-push2pop2,+cx8,+avx512bw,+sse3,+pku,-nf,-amx-tf32,-amx-avx512,+fsgsbase,+clzero,+mwaitx,-lwp,+lzcnt,+sha,-movdir64b,-ppx,+wbnoinvd,-enqcmd,-amx-transpose,-avx10.2-512,-avxneconvert,-tbm,-pconfig,-amx-complex,+ssse3,+cx16,+bmi2,+fma,+popcnt,-avxifma,+f16
#map = affine_map<(d0, d1, d2, d3) -> ()>
#map1 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
#map2 = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d3)>
#map3 = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d4, d3)>
#map4 = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d4)>
#map5 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>
#map6 = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
module {
func.func @attention(%arg0: tensor<1x128x32x64xbf16>, %arg1: tensor<1x128x32x64xbf16>, %arg2: tensor<1x128x32x64xbf16>) -> tensor<1x128x32x64xbf16> {
%cst = arith.constant 1.250000e-01 : bf16
#map = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d4)>
#map1 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d5, d4)>
#map2 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d5, d3)>
#map3 = affine_map<(d0, d1, d2, d3, d4, d5) -> ()>
#map4 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3)>
module {
func.func @attention(%arg0: tensor<1x128x32x64xbf16>, %arg1: tensor<1x128x32x64xbf16>, %arg2: tensor<1x128x32x64xbf16>) -> tensor<1x128x32x64xbf16> {
%cst = arith.constant 1.250000e-01 : bf16
%0 = tensor.empty() : tensor<1x128x32x64xbf16>
%1 = iree_linalg_ext.attention {indexing_maps = [#map, #map1, #map2, #map3, #map4]} ins(%arg0, %arg1, %arg2, %cst : tensor<1x128x32x64xbf16>, tensor<1x128x32x64xbf16>, tensor<1x128x32x64xbf16>, bf16) outs(%0 : tensor<1x128x32x64xbf16>) {
#map = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d4)>
#map1 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d5, d4)>
#map2 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d5, d3)>
#map3 = affine_map<(d0, d1, d2, d3, d4, d5) -> ()>
#map4 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3)>
module {
func.func @attention(%arg0: tensor<1x128x32x64xbf16>, %arg1: tensor<1x128x32x64xbf16>, %arg2: tensor<1x128x32x64xbf16>, %arg3: tensor<1x128x32x64xbf16>) -> tensor<1x128x32x64xbf16> {
%cst = arith.constant 1.250000e-01 : bf16
%0 = tensor.empty() : tensor<1x128x32x64xbf16>
%1 = iree_linalg_ext.attention {indexing_maps = [#map, #map1, #map2, #map3, #map4]} ins(%arg0, %arg1, %arg2, %cst : tensor<1x128x32x64xbf16>, tensor<1x128x32x64xbf16>, tensor<1x128x32x64xbf16>, bf16) outs(%0 : tensor<1x128x32x64xbf16>) {
processor : 0
vendor_id : AuthenticAMD
cpu family : 25
model : 116
model name : AMD Ryzen 9 7940HS w/ Radeon 780M Graphics
stepping : 1
microcode : 0xa704104
cpu MHz : 400.000
cache size : 1024 KB
physical id : 0
#map = affine_map<(d0, d1, d2, d3) -> ()>
#map1 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
#map2 = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d3)>
#map3 = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d4, d3)>
#map4 = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d4)>
#map5 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>
#map6 = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
module {
func.func @attention(%arg0: tensor<1x128x32x64xf32>, %arg1: tensor<1x128x32x64xf32>, %arg2: tensor<1x128x32x64xf32>) -> tensor<1x128x32x64xf32> {
%cst = arith.constant 1.250000e-01 : f32
#map = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d4)>
#map1 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d5, d4)>
#map2 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d5, d3)>
#map3 = affine_map<(d0, d1, d2, d3, d4, d5) -> ()>
#map4 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3)>
module {
func.func @attention(%arg0: tensor<1x128x32x64xf32>, %arg1: tensor<1x128x32x64xf32>, %arg2: tensor<1x128x32x64xf32>) -> tensor<1x128x32x64xf32> {
%cst = arith.constant 1.250000e-01 : f32
%0 = tensor.empty() : tensor<1x128x32x64xf32>
%1 = iree_linalg_ext.attention {indexing_maps = [#map, #map1, #map2, #map3, #map4]} ins(%arg0, %arg1, %arg2, %cst : tensor<1x128x32x64xf32>, tensor<1x128x32x64xf32>, tensor<1x128x32x64xf32>, f32) outs(%0 : tensor<1x128x32x64xf32>) {