pashu123’s gists

pashu123 / compare.py

Created April 23, 2025 23:41

	import numpy as np

	# Load arrays with FP16 data type
	old = np.load('old.npy').astype(np.float16)
	new = np.load('new.npy').astype(np.float16)

	print(old.shape)
	print(new.shape)

	# Check shape compatibility

pashu123 / test.mlir

Created April 23, 2025 23:39

	#map = affine_map<(d0, d1, d2) -> (d2, d0)>
	#map1 = affine_map<(d0, d1, d2) -> (d1, d2)>
	#map2 = affine_map<(d0, d1, d2) -> (d0, d1)>
	module {
	func.func @faulty(%arg0: tensor<2816x2xf16>, %arg1: tensor<1280x2816xf16>) -> tensor<2x1280xf32> {
	%cst = arith.constant 0.000000e+00 : f32
	%0 = tensor.empty() : tensor<2x1280xf32>
	%1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<2x1280xf32>) -> tensor<2x1280xf32>
	%2 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "reduction"]} ins(%arg0, %arg1 : tensor<2816x2xf16>, tensor<1280x2816xf16>) outs(%1 : tensor<2x1280xf32>) {
	^bb0(%in: f16, %in_0: f16, %out: f32):

pashu123 / test.mlir

Created April 23, 2025 23:36

	#map = affine_map<(d0, d1, d2) -> (d2, d0)>
	#map1 = affine_map<(d0, d1, d2) -> (d1, d2)>
	#map2 = affine_map<(d0, d1, d2) -> (d0, d1)>
	#map3 = affine_map<(d0, d1) -> (d0, d1)>
	#map4 = affine_map<(d0, d1) -> (d1)>
	module {
	func.func @faulty(%arg0: tensor<2816x2xf16>, %arg1: tensor<1280x2816xf16>) -> tensor<2x1280xf32> {
	%cst = arith.constant 0.000000e+00 : f32
	%cst_0 = arith.constant 1.000000e+00 : f16
	%0 = tensor.empty() : tensor<2x1280xf16>

pashu123 / cc.sh

Created April 23, 2025 23:35

echo "compiling the old vmfb"

iree-compile --iree-hal-target-backends=rocm --iree-hip-target=gfx942 --iree-vm-bytecode-module-output-format=flatbuffer-binary --iree-preprocessing-pass-pipeline="builtin.module(util.func(iree-global-opt-raise-special-ops, iree-flow-canonicalize), iree-preprocessing-transpose-convolution-pipeline, iree-preprocessing-pad-to-intrinsics, util.func(iree-preprocessing-generalize-linalg-matmul-experimental))" --iree-hal-dump-executable-files-to=dump/ --iree-dispatch-creation-enable-aggressive-fusion --iree-dispatch-creation-enable-fuse-horizontal-contractions=false --iree-opt-aggressively-propagate-transposes=true --iree-codegen-llvmgpu-use-vector-distribution=true --iree-opt-data-tiling=false --iree-vm-target-truncate-unsupported-floats --iree-opt-outer-dim-concat=true  --iree-hal-indirect-command-buffers=true --iree-stream-resource-memory-model=discrete --iree-hal-memoization=true --iree-opt-strip-assertions --iree-global-opt-propagate-transposes=true --iree-opt-const-eval=false --i

pashu123 / gen.py

Created April 23, 2025 23:35

	import numpy as np
	import os

	# Tensor shapes
	shapes = {
	"arg0": (2816, 2),
	"arg1": (1280, 2816),
	}

	# Generate and save tensors

pashu123 / err.txt

Created April 22, 2025 23:45

	I am inside the populateWarpAndThreadIndices
	The rank is 1
	The threadIds size is2
	I am inside the populateWarpAndThreadIndices
	The rank is 0
	The threadIds size is0
	I am inside the populateWarpAndThreadIndices
	The rank is 1
	The threadIds size is2
	I am inside the populateWarpAndThreadIndices

pashu123 / err.txt

Created April 22, 2025 23:37

	I am inside the populateWarpAndThreadIndices
	The rank is 1
	The threadIds size is2
	I am inside the populateWarpAndThreadIndices
	The rank is 1
	The threadIds size is2
	I am inside the populateWarpAndThreadIndices
	The rank is 0
	The threadIds size is0
	// -----// IR Dump After LLVMGPUVectorDistributePass (iree-llvmgpu-vector-distribute) //----- //

pashu123 / perf.txt

Created April 10, 2025 16:48

	Running with warp reduction
	---------------------------------------------------------------------------------------------------------
	Benchmark Time CPU Iterations UserCounters...
	---------------------------------------------------------------------------------------------------------
	BM__4_14336_4096/process_time/real_time 0.510 ms 0.489 ms 1394 items_per_second=1.96019k/s
	BM__4_14336_4096/process_time/real_time 0.512 ms 0.518 ms 1394 items_per_second=1.95449k/s
	BM__4_14336_4096/process_time/real_time 0.512 ms 0.519 ms 1394 items_per_second=1.9534k/s
	BM__4_14336_4096/process_time/real_time 0.513 ms 0.521 ms 1394 items_per_second=1.95046k/s
	BM__4_14336_4096/process_time/real_time 0.514 ms 0.521 ms 1394 items_per_second=1.94656k/s
	BM__4_14336_4096/process_time/real_time_mean 0.512 ms 0.513 ms 5

pashu123 / perf.txt

Created April 10, 2025 16:35

	---------------------------------------------------------------------------------------------------------
	Benchmark Time CPU Iterations UserCounters...
	---------------------------------------------------------------------------------------------------------
	BM__4_14336_4096/process_time/real_time 0.510 ms 0.489 ms 1394 items_per_second=1.96019k/s
	BM__4_14336_4096/process_time/real_time 0.512 ms 0.518 ms 1394 items_per_second=1.95449k/s
	BM__4_14336_4096/process_time/real_time 0.512 ms 0.519 ms 1394 items_per_second=1.9534k/s
	BM__4_14336_4096/process_time/real_time 0.513 ms 0.521 ms 1394 items_per_second=1.95046k/s
	BM__4_14336_4096/process_time/real_time 0.514 ms 0.521 ms 1394 items_per_second=1.94656k/s
	BM__4_14336_4096/process_time/real_time_mean 0.512 ms 0.513 ms 5 items_per_second=1.95302k/s

pashu123 / bench_result.txt

Created April 9, 2025 17:15

	---------------------------------------------------------------------------------------------------------
	Benchmark Time CPU Iterations UserCounters...
	---------------------------------------------------------------------------------------------------------
	BM__4_14336_4096/process_time/real_time 0.517 ms 0.524 ms 1353 items_per_second=1.93239k/s
	BM__4_14336_4096/process_time/real_time 0.517 ms 0.524 ms 1353 items_per_second=1.93329k/s
	BM__4_14336_4096/process_time/real_time 0.517 ms 0.524 ms 1353 items_per_second=1.93427k/s
	BM__4_14336_4096/process_time/real_time 0.517 ms 0.524 ms 1353 items_per_second=1.93414k/s
	BM__4_14336_4096/process_time/real_time 0.517 ms 0.524 ms 1353 items_per_second=1.93428k/s
	BM__4_14336_4096/process_time/real_time_mean 0.517 ms 0.524 ms 5 items_per_second=1.93368k/

Prashant Kumar pashu123