Skip to content

Instantly share code, notes, and snippets.

@AmosLewis
AmosLewis / gpttosa_debug.txt
Created September 29, 2022 00:31
gpttosadebug
This file has been truncated, but you can view the full file.
➜ SHARK git:(gpt) ✗ torch-mlir-opt -pass-pipeline='torch-backend-to-tosa-backend-pipeline' /tmp/_lambda.mlir -mlir-print-ir-after-all -mlir-pretty-debuginfo -mlir-disable-threading
// -----// IR Dump After ConvertTorchToTosa (convert-torch-to-tosa) //----- //
func.func @forward(%arg0: !torch.vtensor<[1,5],si64>) -> !torch.vtensor<[1,5,50257],f32> {
%0 = torch_c.to_builtin_tensor %arg0 : !torch.vtensor<[1,5],si64> -> tensor<1x5xi64>
%int5 = torch.constant.int 5
%1 = torch_c.to_i64 %int5
%int1 = torch.constant.int 1
%2 = torch_c.to_i64 %int1
%true = torch.constant.bool true
%float0.000000e00 = torch.constant.float 0.000000e+00
@AmosLewis
AmosLewis / ElementwiseWhereSelfModule.mlir
Created October 3, 2022 21:42
ElementwiseWhereSelfModulemlir
#loc0 = loc(unknown)
module attributes {torch.debug_module_name = "ElementwiseWhereSelfModule"} {
func.func @forward(%arg0: tensor<1x1x1xf32> loc(unknown), %arg1: tensor<1x1xf32> loc(unknown), %arg2: tensor<1xf32> loc(unknown)) -> tensor<1x1x1xf32> {
%cst = arith.constant dense<5.000000e-01> : tensor<1x1x1xf32> loc(#loc1)
%0 = "tosa.greater"(%arg0, %cst) : (tensor<1x1x1xf32>, tensor<1x1x1xf32>) -> tensor<1x1x1xi1> loc(#loc1)
%1 = "tosa.select"(%0, %arg1, %arg2) : (tensor<1x1x1xi1>, tensor<1x1xf32>, tensor<1xf32>) -> tensor<1x1x1xf32> loc(#loc2)
return %1 : tensor<1x1x1xf32> loc(#loc0)
} loc(#loc0)
} loc(#loc0)
#loc1 = loc("/home/chi/src/ubuntu20/shark/torch-mlir/build/tools/torch-mlir/python_packages/torch_mlir/torch_mlir_e2e_test/test_suite/elementwise.py":150:27)
@AmosLewis
AmosLewis / ElementwiseAtenWhereSelfModule.mlir
Last active October 6, 2022 17:48
ElementwiseAtenWhereSelfModulemlir
#loc0 = loc(unknown)
module attributes {torch.debug_module_name = "ElementwiseAtenWhereSelfModule"} {
func.func @forward(%arg0: tensor<1x1x5x5xi1> loc(unknown), %arg1: tensor<1x12x5x5xf32> loc(unknown), %arg2: tensor<?xf32> loc(unknown)) -> tensor<1x12x5x5xf32> {
%0 = "tosa.select"(%arg0, %arg1, %arg2) : (tensor<1x1x5x5xi1>, tensor<1x12x5x5xf32>, tensor<?xf32>) -> tensor<1x12x5x5xf32> loc(#loc1)
return %0 : tensor<1x12x5x5xf32> loc(#loc0)
} loc(#loc0)
} loc(#loc0)
#loc1 = loc("/home/chi/src/ubuntu20/shark/torch-mlir/build/tools/torch-mlir/python_packages/torch_mlir/torch_mlir_e2e_test/test_suite/elementwise.py":150:15)
@AmosLewis
AmosLewis / where.mlir
Created October 6, 2022 17:36
wheremlirrun
func.func @torch.aten.where.self(%arg0: !torch.vtensor<[1,1,5,5],i1>, %arg1: !torch.vtensor<[1,12,5,5],f32>, %arg2: !torch.vtensor<[],f32>) -> !torch.vtensor<[1,12,5,5],f32> {
%0 = torch.aten.where.self %arg0, %arg1, %arg2 : !torch.vtensor<[1,1,5,5],i1>, !torch.vtensor<[1,12,5,5],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,12,5,5],f32>
return %0 : !torch.vtensor<[1,12,5,5],f32>
}
@AmosLewis
AmosLewis / select_fp32.mlir
Created October 6, 2022 18:22
tosaselect
func.func @select_fp32(%arg0: tensor<1x1x5x5xi1>, %arg1: tensor<1x12x5x5xf32>, %arg2: tensor<f32>) -> tensor<1x12x5x5xf32> {
%0 = "tosa.select"(%arg0, %arg1, %arg2) : (tensor<1x1x5x5xi1>, tensor<1x12x5x5xf32>, tensor<f32>) -> tensor<1x12x5x5xf32>
return %0 : tensor<1x12x5x5xf32>
}
@AmosLewis
AmosLewis / gpt2tosainferenceBug.txt
Created October 7, 2022 16:40
gpt2tosainferenceBug
➜ SHARK git:(gpt) ✗ python tank/pytorch/gpt2/gpt2.py
Torch Golden Result: tensor([[[ -31.8388, -30.9854, -34.4231, ..., -39.7515, -38.6848,
-32.3074],
[ -99.2055, -98.8202, -104.2251, ..., -112.2020, -109.0224,
-100.2584],
[-115.6919, -116.9150, -119.1486, ..., -124.9616, -123.2126,
-116.6671],
[-123.0994, -123.1445, -128.7349, ..., -130.6248, -130.6557,
-125.1285],
[ -80.2680, -81.8277, -89.0646, ..., -94.5047, -96.1721,
@AmosLewis
AmosLewis / torchmlirbuildcmd.txt
Created October 11, 2022 17:49
torchmlirbuildcmd
cmake -GNinja -Bbuild
-DCMAKE_BUILD_TYPE=Debug
-DCMAKE_C_COMPILER=clang
-DCMAKE_CXX_COMPILER=clang++
-DPython3_FIND_VIRTUALENV=ONLY
-DLLVM_ENABLE_PROJECTS=mlir
-DLLVM_EXTERNAL_PROJECTS="torch-mlir;torch-mlir-dialects"
-DLLVM_EXTERNAL_TORCH_MLIR_SOURCE_DIR=pwd
-DLLVM_EXTERNAL_TORCH_MLIR_DIALECTS_SOURCE_DIR=pwd/externals/llvm-external-projects/torch-mlir-dialects
-DMLIR_ENABLE_BINDINGS_PYTHON=ON
cmake -GNinja -B ../iree-build/ -S . -DCMAKE_BUILD_TYPE=RelWithDebInfo -DIREE_ENABLE_ASSERTIONS=OFF -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_INSTALL_PREFIX=/home/chi/IREE/iree-build/install -DIREE_ENABLE_LLD=ON -DIREE_HAL_DRIVER_VULKAN=ON -DIREE_TARGET_BACKEND_CUDA=OFF -DIREE_TARGET_BACKEND_VULKAN_SPIRV=ON -DIREE_TARGET_BACKEND_OPENCL_SPIRV=ON -DIREE_ENABLE_ASSERTIONS=ON -DIREE_BUILD_PYTHON_BINDINGS=ON -DPython3_EXECUTABLE="$(which python)" -DIREE_ENABLE_RUNTIME_TRACING=ON -DIREE_BYTECODE_MODULE_FORCE_LLVM_SYSTEM_LINKER=ON -DIREE_BUILD_TRACY=ON
iree-compile --iree-input-type=none --iree-hal-target-backends=vulkan --iree-vulkan-target-triple=rdna2-6900xt-linux --iree-stream-resource-index-bits=64 --iree-vm-target-index-bits=64 unet_stable_diff_maxf.mlir -o unet_stable_diff_maxf.vmfb
iree-benchmark-module --module_file=unet_stable_diff_maxf.vmfb --entry_function=forward --device=vulkan --function_input="2x64x64x4xf32" --function_input="2x320xf32" --function_input="2x77x768xf32"
(tuner_venv) chi@alderlake:~/IREE$ python shark-tuner/minilm_example.py -model /home/chi/IREE/stable_diff_tf.mlir -num_iters 10 -result_dir results -device cpu -search_op matmul
The input mlir type is mhlo
Searching for [0, 2, 1280, 320]
Updated op %0 = "mhlo.dot"(%arg0, %arg1) {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[2, 6], [1, 4, 0], [0, 0, 6]]>, translation_info = <CPUDoubleTilingPadExpert>>, name = "dot0"} : (tensor<2x320xf32>, tensor<320x1280xf32>) -> tensor<2x1280xf32>
Best: 10000000.0 ms Current: 0.077 ms
Updated op %0 = "mhlo.dot"(%arg0, %arg1) {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[2, 8], [1, 1, 0], [0, 0, 32]]>, translation_info = <CPUDoubleTilingPadExpert>>, name = "dot0"} : (tensor<2x320xf32>, tensor<320x1280xf32>) -> tensor<2x1280xf32>
Best: 0.077 ms Current: 0.183 ms
Updated op %0 = "mhlo.dot"(%arg0, %arg1) {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[2, 16], [1, 8, 0
@AmosLewis
AmosLewis / search_print7173.txt
Last active October 14, 2022 16:38
searchprint
This file has been truncated, but you can view the full file.
(tuner_venv) chi@alderlake:~/IREE$ python shark-tuner/minilm_example.py -model /home/chi/IREE/stable_diff_linalg.mlir -num_iters 100 -result_dir results -device vulkan -search_op conv
The input mlir type is linalg
Found AMD Radeon RX 5000 series device. Using rdna1-5700xt-linux
Searching for [2, 66, 66, 4, 3, 3, 320, 64, 64, 1, 1, 0]
Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 64, 64, 16], [0, 4, 8, 4], [0, 0, 0, 0, 1, 1, 4], [0, 1, 0, 0]]>, translation_info = <SPIRVVectorize>, workgroup_size = [4, 8, 16]>, dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<2x66x66x4xf32>, tensor<3x3x4x320xf32>) outs(%1 : tensor<2x64x64x320xf32>) -> tensor<2x64x64x320xf32>
Best: 10000000.0 ms Current: 587.0 ms
Updated op %2 = linalg.conv_2d_nhwc_hwcf {compilation_info = #iree_codegen.compilation_info<lowering_config = <tile_sizes = [[0, 16, 64, 64], [0, 2, 32, 8], [0, 0, 0, 0, 1, 1, 4], [0, 1,