pashu123’s gists

pashu123 / onnx_gen.py

Created January 20, 2025 18:44

	import onnx
	import onnx.helper as helper

	# Define the model's input and output
	batch_size = 1
	seq_len = 512
	hidden_size = 4096
	kv_seq_len = 4
	kv_hidden_size = 16
	num_heads = 32 # Example number of attention heads

pashu123 / decomposed_kernel.mlir

Created January 20, 2025 18:36

	#map = affine_map<(d0, d1, d2, d3) -> ()>
	#map1 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
	#map2 = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d3)>
	#map3 = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d4, d3)>
	#map4 = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d4)>
	#map5 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>
	#map6 = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
	module {
	func.func @attention(%arg0: tensor<1x32x128x512xf32>, %arg1: tensor<1x32x128x512xf32>, %arg2: tensor<1x32x128x512xf32>) -> tensor<1x32x128x512xf32> {
	%cst = arith.constant 1.250000e-01 : f32

pashu123 / attn_kernel.mlir

Created January 20, 2025 18:35

	#map = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d4)>
	#map1 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d5, d4)>
	#map2 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d5, d3)>
	#map3 = affine_map<(d0, d1, d2, d3, d4, d5) -> ()>
	#map4 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3)>
	module {
	func.func @attention(%arg0: tensor<1x32x128x512xf32>, %arg1: tensor<1x32x128x512xf32>, %arg2: tensor<1x32x128x512xf32>) -> tensor<1x32x128x512xf32> {
	%cst = arith.constant 1.250000e-01 : f32
	%0 = tensor.empty() : tensor<1x32x128x512xf32>
	%1 = iree_linalg_ext.attention {indexing_maps = [#map, #map1, #map2, #map3, #map4]} ins(%arg0, %arg1, %arg2, %cst : tensor<1x32x128x512xf32>, tensor<1x32x128x512xf32>, tensor<1x32x128x512xf32>, f32) outs(%0 : tensor<1x32x128x512xf32>) {

pashu123 / module_prefill_bs1$async_dispatch_39.mlir

Created January 17, 2025 15:21

hal.executable public @prefill_bs1$async_dispatch_39 {

  hal.executable.variant public @embedded_elf_x86_64 target(<"llvm-cpu", "embedded-elf-x86_64", {cpu = "znver4", cpu_features = "+prfchw,-cldemote,+avx,+aes,+sahf,+pclmul,-xop,+crc32,-amx-fp8,+xsaves,-avx512fp16,-usermsr,-sm4,-egpr,+sse4.1,+avx512ifma,+xsave,+sse4.2,-tsxldtrk,-sm3,-ptwrite,-widekl,-movrs,+invpcid,+64bit,+xsavec,-avx10.1-512,+avx512vpopcntdq,+cmov,-avx512vp2intersect,+avx512cd,+movbe,-avxvnniint8,-ccmp,-amx-int8,-kl,-avx10.1-256,+evex512,-avxvnni,-rtm,+adx,+avx2,-hreset,-movdiri,-serialize,-sha512,+vpclmulqdq,+avx512vl,-uintr,-cf,+clflushopt,-raoint,-cmpccxadd,+bmi,-amx-tile,+sse,-avx10.2-256,+gfni,-avxvnniint16,-amx-fp16,-zu,-ndd,+xsaveopt,+rdrnd,+avx512f,-amx-bf16,+avx512bf16,+avx512vnni,-push2pop2,+cx8,+avx512bw,+sse3,+pku,-nf,-amx-tf32,-amx-avx512,+fsgsbase,+clzero,+mwaitx,-lwp,+lzcnt,+sha,-movdir64b,-ppx,+wbnoinvd,-enqcmd,-amx-transpose,-avx10.2-512,-avxneconvert,-tbm,-pconfig,-amx-complex,+ssse3,+cx16,+bmi2,+fma,+popcnt,-avxifma,+f16

pashu123 / decomposedbf16.mlir

Created January 16, 2025 15:05

	#map = affine_map<(d0, d1, d2, d3) -> ()>
	#map1 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
	#map2 = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d3)>
	#map3 = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d4, d3)>
	#map4 = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d4)>
	#map5 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>
	#map6 = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
	module {
	func.func @attention(%arg0: tensor<1x128x32x64xbf16>, %arg1: tensor<1x128x32x64xbf16>, %arg2: tensor<1x128x32x64xbf16>) -> tensor<1x128x32x64xbf16> {
	%cst = arith.constant 1.250000e-01 : bf16

pashu123 / attnbf16.mlir

Created January 16, 2025 15:05

	#map = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d4)>
	#map1 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d5, d4)>
	#map2 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d5, d3)>
	#map3 = affine_map<(d0, d1, d2, d3, d4, d5) -> ()>
	#map4 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3)>
	module {
	func.func @attention(%arg0: tensor<1x128x32x64xbf16>, %arg1: tensor<1x128x32x64xbf16>, %arg2: tensor<1x128x32x64xbf16>) -> tensor<1x128x32x64xbf16> {
	%cst = arith.constant 1.250000e-01 : bf16
	%0 = tensor.empty() : tensor<1x128x32x64xbf16>
	%1 = iree_linalg_ext.attention {indexing_maps = [#map, #map1, #map2, #map3, #map4]} ins(%arg0, %arg1, %arg2, %cst : tensor<1x128x32x64xbf16>, tensor<1x128x32x64xbf16>, tensor<1x128x32x64xbf16>, bf16) outs(%0 : tensor<1x128x32x64xbf16>) {

pashu123 / attnbf16.mlir

Created January 16, 2025 15:01

	#map = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d4)>
	#map1 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d5, d4)>
	#map2 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d5, d3)>
	#map3 = affine_map<(d0, d1, d2, d3, d4, d5) -> ()>
	#map4 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3)>
	module {
	func.func @attention(%arg0: tensor<1x128x32x64xbf16>, %arg1: tensor<1x128x32x64xbf16>, %arg2: tensor<1x128x32x64xbf16>, %arg3: tensor<1x128x32x64xbf16>) -> tensor<1x128x32x64xbf16> {
	%cst = arith.constant 1.250000e-01 : bf16
	%0 = tensor.empty() : tensor<1x128x32x64xbf16>
	%1 = iree_linalg_ext.attention {indexing_maps = [#map, #map1, #map2, #map3, #map4]} ins(%arg0, %arg1, %arg2, %cst : tensor<1x128x32x64xbf16>, tensor<1x128x32x64xbf16>, tensor<1x128x32x64xbf16>, bf16) outs(%0 : tensor<1x128x32x64xbf16>) {

pashu123 / cpuinfo

Created January 16, 2025 14:58

	processor : 0
	vendor_id : AuthenticAMD
	cpu family : 25
	model : 116
	model name : AMD Ryzen 9 7940HS w/ Radeon 780M Graphics
	stepping : 1
	microcode : 0xa704104
	cpu MHz : 400.000
	cache size : 1024 KB
	physical id : 0

pashu123 / decomposed_kernel.mlir

Created January 16, 2025 14:49

	#map = affine_map<(d0, d1, d2, d3) -> ()>
	#map1 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
	#map2 = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d3)>
	#map3 = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d4, d3)>
	#map4 = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d4)>
	#map5 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>
	#map6 = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
	module {
	func.func @attention(%arg0: tensor<1x128x32x64xf32>, %arg1: tensor<1x128x32x64xf32>, %arg2: tensor<1x128x32x64xf32>) -> tensor<1x128x32x64xf32> {
	%cst = arith.constant 1.250000e-01 : f32

pashu123 / attn_kernel.mlir

Created January 16, 2025 14:44

	#map = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d4)>
	#map1 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d5, d4)>
	#map2 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d5, d3)>
	#map3 = affine_map<(d0, d1, d2, d3, d4, d5) -> ()>
	#map4 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3)>
	module {
	func.func @attention(%arg0: tensor<1x128x32x64xf32>, %arg1: tensor<1x128x32x64xf32>, %arg2: tensor<1x128x32x64xf32>) -> tensor<1x128x32x64xf32> {
	%cst = arith.constant 1.250000e-01 : f32
	%0 = tensor.empty() : tensor<1x128x32x64xf32>
	%1 = iree_linalg_ext.attention {indexing_maps = [#map, #map1, #map2, #map3, #map4]} ins(%arg0, %arg1, %arg2, %cst : tensor<1x128x32x64xf32>, tensor<1x128x32x64xf32>, tensor<1x128x32x64xf32>, f32) outs(%0 : tensor<1x128x32x64xf32>) {

Prashant Kumar pashu123