AmosLewis’s gists

AmosLewis / gpttosa_debug.txt

Created September 29, 2022 00:31

gpttosadebug

This file has been truncated, but you can view the full file.

	➜ SHARK git:(gpt) ✗ torch-mlir-opt -pass-pipeline='torch-backend-to-tosa-backend-pipeline' /tmp/_lambda.mlir -mlir-print-ir-after-all -mlir-pretty-debuginfo -mlir-disable-threading
	// -----// IR Dump After ConvertTorchToTosa (convert-torch-to-tosa) //----- //
	func.func @forward(%arg0: !torch.vtensor<[1,5],si64>) -> !torch.vtensor<[1,5,50257],f32> {
	%0 = torch_c.to_builtin_tensor %arg0 : !torch.vtensor<[1,5],si64> -> tensor<1x5xi64>
	%int5 = torch.constant.int 5
	%1 = torch_c.to_i64 %int5
	%int1 = torch.constant.int 1
	%2 = torch_c.to_i64 %int1
	%true = torch.constant.bool true
	%float0.000000e00 = torch.constant.float 0.000000e+00

AmosLewis / ElementwiseWhereSelfModule.mlir

Created October 3, 2022 21:42

ElementwiseWhereSelfModulemlir

	#loc0 = loc(unknown)
	module attributes {torch.debug_module_name = "ElementwiseWhereSelfModule"} {
	func.func @forward(%arg0: tensor<1x1x1xf32> loc(unknown), %arg1: tensor<1x1xf32> loc(unknown), %arg2: tensor<1xf32> loc(unknown)) -> tensor<1x1x1xf32> {
	%cst = arith.constant dense<5.000000e-01> : tensor<1x1x1xf32> loc(#loc1)
	%0 = "tosa.greater"(%arg0, %cst) : (tensor<1x1x1xf32>, tensor<1x1x1xf32>) -> tensor<1x1x1xi1> loc(#loc1)
	%1 = "tosa.select"(%0, %arg1, %arg2) : (tensor<1x1x1xi1>, tensor<1x1xf32>, tensor<1xf32>) -> tensor<1x1x1xf32> loc(#loc2)
	return %1 : tensor<1x1x1xf32> loc(#loc0)
	} loc(#loc0)
	} loc(#loc0)
	#loc1 = loc("/home/chi/src/ubuntu20/shark/torch-mlir/build/tools/torch-mlir/python_packages/torch_mlir/torch_mlir_e2e_test/test_suite/elementwise.py":150:27)

AmosLewis / ElementwiseAtenWhereSelfModule.mlir

Last active October 6, 2022 17:48

ElementwiseAtenWhereSelfModulemlir

	#loc0 = loc(unknown)
	module attributes {torch.debug_module_name = "ElementwiseAtenWhereSelfModule"} {
	func.func @forward(%arg0: tensor<1x1x5x5xi1> loc(unknown), %arg1: tensor<1x12x5x5xf32> loc(unknown), %arg2: tensor<?xf32> loc(unknown)) -> tensor<1x12x5x5xf32> {
	%0 = "tosa.select"(%arg0, %arg1, %arg2) : (tensor<1x1x5x5xi1>, tensor<1x12x5x5xf32>, tensor<?xf32>) -> tensor<1x12x5x5xf32> loc(#loc1)
	return %0 : tensor<1x12x5x5xf32> loc(#loc0)
	} loc(#loc0)
	} loc(#loc0)
	#loc1 = loc("/home/chi/src/ubuntu20/shark/torch-mlir/build/tools/torch-mlir/python_packages/torch_mlir/torch_mlir_e2e_test/test_suite/elementwise.py":150:15)

AmosLewis / where.mlir

Created October 6, 2022 17:36

wheremlirrun

	func.func @torch.aten.where.self(%arg0: !torch.vtensor<[1,1,5,5],i1>, %arg1: !torch.vtensor<[1,12,5,5],f32>, %arg2: !torch.vtensor<[],f32>) -> !torch.vtensor<[1,12,5,5],f32> {
	%0 = torch.aten.where.self %arg0, %arg1, %arg2 : !torch.vtensor<[1,1,5,5],i1>, !torch.vtensor<[1,12,5,5],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,12,5,5],f32>
	return %0 : !torch.vtensor<[1,12,5,5],f32>
	}

AmosLewis / select_fp32.mlir

Created October 6, 2022 18:22

tosaselect

	func.func @select_fp32(%arg0: tensor<1x1x5x5xi1>, %arg1: tensor<1x12x5x5xf32>, %arg2: tensor<f32>) -> tensor<1x12x5x5xf32> {
	%0 = "tosa.select"(%arg0, %arg1, %arg2) : (tensor<1x1x5x5xi1>, tensor<1x12x5x5xf32>, tensor<f32>) -> tensor<1x12x5x5xf32>
	return %0 : tensor<1x12x5x5xf32>
	}

AmosLewis / gpt2tosainferenceBug.txt

Created October 7, 2022 16:40

gpt2tosainferenceBug

	➜ SHARK git:(gpt) ✗ python tank/pytorch/gpt2/gpt2.py
	Torch Golden Result: tensor([[[ -31.8388, -30.9854, -34.4231, ..., -39.7515, -38.6848,
	-32.3074],
	[ -99.2055, -98.8202, -104.2251, ..., -112.2020, -109.0224,
	-100.2584],
	[-115.6919, -116.9150, -119.1486, ..., -124.9616, -123.2126,
	-116.6671],
	[-123.0994, -123.1445, -128.7349, ..., -130.6248, -130.6557,
	-125.1285],
	[ -80.2680, -81.8277, -89.0646, ..., -94.5047, -96.1721,

AmosLewis / torchmlirbuildcmd.txt

Created October 11, 2022 17:49

torchmlirbuildcmd

	cmake -GNinja -Bbuild
	-DCMAKE_BUILD_TYPE=Debug
	-DCMAKE_C_COMPILER=clang
	-DCMAKE_CXX_COMPILER=clang++
	-DPython3_FIND_VIRTUALENV=ONLY
	-DLLVM_ENABLE_PROJECTS=mlir
	-DLLVM_EXTERNAL_PROJECTS="torch-mlir;torch-mlir-dialects"
	-DLLVM_EXTERNAL_TORCH_MLIR_SOURCE_DIR=pwd
	-DLLVM_EXTERNAL_TORCH_MLIR_DIALECTS_SOURCE_DIR=pwd/externals/llvm-external-projects/torch-mlir-dialects
	-DMLIR_ENABLE_BINDINGS_PYTHON=ON

AmosLewis / alderlake.txt

Last active October 14, 2022 16:54

	cmake -GNinja -B ../iree-build/ -S . -DCMAKE_BUILD_TYPE=RelWithDebInfo -DIREE_ENABLE_ASSERTIONS=OFF -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_INSTALL_PREFIX=/home/chi/IREE/iree-build/install -DIREE_ENABLE_LLD=ON -DIREE_HAL_DRIVER_VULKAN=ON -DIREE_TARGET_BACKEND_CUDA=OFF -DIREE_TARGET_BACKEND_VULKAN_SPIRV=ON -DIREE_TARGET_BACKEND_OPENCL_SPIRV=ON -DIREE_ENABLE_ASSERTIONS=ON -DIREE_BUILD_PYTHON_BINDINGS=ON -DPython3_EXECUTABLE="$(which python)" -DIREE_ENABLE_RUNTIME_TRACING=ON -DIREE_BYTECODE_MODULE_FORCE_LLVM_SYSTEM_LINKER=ON -DIREE_BUILD_TRACY=ON

	iree-compile --iree-input-type=none --iree-hal-target-backends=vulkan --iree-vulkan-target-triple=rdna2-6900xt-linux --iree-stream-resource-index-bits=64 --iree-vm-target-index-bits=64 unet_stable_diff_maxf.mlir -o unet_stable_diff_maxf.vmfb

	iree-benchmark-module --module_file=unet_stable_diff_maxf.vmfb --entry_function=forward --device=vulkan --function_input="2x64x64x4xf32" --function_input="2x320xf32" --function_input="2x77x768xf32"

AmosLewis / sharktunner_debug.txt

Last active October 13, 2022 05:02

	(tuner_venv) chi@alderlake:~/IREE$ python shark-tuner/minilm_example.py -model /home/chi/IREE/stable_diff_tf.mlir -num_iters 10 -result_dir results -device cpu -search_op matmul
	The input mlir type is mhlo
	Searching for [0, 2, 1280, 320]
	Updated op %0 = "mhlo.dot"(%arg0, %arg1) {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[2, 6], [1, 4, 0], [0, 0, 6]]>, translation_info = <CPUDoubleTilingPadExpert>>, name = "dot0"} : (tensor<2x320xf32>, tensor<320x1280xf32>) -> tensor<2x1280xf32>
	Best: 10000000.0 ms Current: 0.077 ms
	Updated op %0 = "mhlo.dot"(%arg0, %arg1) {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[2, 8], [1, 1, 0], [0, 0, 32]]>, translation_info = <CPUDoubleTilingPadExpert>>, name = "dot0"} : (tensor<2x320xf32>, tensor<320x1280xf32>) -> tensor<2x1280xf32>
	Best: 0.077 ms Current: 0.183 ms
	Updated op %0 = "mhlo.dot"(%arg0, %arg1) {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[2, 16], [1, 8, 0

AmosLewis / search_print7173.txt

Last active October 14, 2022 16:38

searchprint

This file has been truncated, but you can view the full file.

	(tuner_venv) chi@alderlake:~/IREE$ python shark-tuner/minilm_example.py -model /home/chi/IREE/stable_diff_linalg.mlir -num_iters 100 -result_dir results -device vulkan -search_op conv
	The input mlir type is linalg
	Found AMD Radeon RX 5000 series device. Using rdna1-5700xt-linux
	Searching for [2, 66, 66, 4, 3, 3, 320, 64, 64, 1, 1, 0]
	Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 64, 16], [0, 4, 8, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
	Best: 10000000.0 ms Current: 587.0 ms
	Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 64, 64], [0, 2, 32, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1,

Chi_Liu AmosLewis