renxida · May 29, 2024 17:10
diff --git a/scratch.sh b/scratch.sh
 # Description: This script is used to test the model deit-small-distilled-patch16-224.default.pytorch.torch.stripped.mlir


 # run original model and print ir after failure
 /home/azureuser/iree-build/tools/iree-compile --iree-input-demote-i64-to-i32 --iree-hal-target-backends=llvm-cpu  stripped/deit-small-distilled-patch16-224.default.pytorch.torch.stripped.mlir -o deit-small-distilled-patch16-224.default.stripped.vmfb   --mlir-print-debuginfo         --mlir-print-ir-after-failure  |& gh gist create - -d "native_layer_norm ir dump after failure"

 # run again with --debug and grep for `(tensor<198xf32>) -> tensor<?x198xf32>` and pass names
 # grep patterns:
 # `(tensor<198xf32>) -> tensor<?x198xf32>`
 # `IR Dump After`
 /home/azureuser/iree-build/tools/iree-compile --iree-input-demote-i64-to-i32 --iree-hal-target-backends=llvm-cpu  stripped/deit-small-distilled-patch16-224.default.pytorch.torch.stripped.mlir -o deit-small-distilled-patch16-224.default.stripped.vmfb   --mlir-print-debuginfo         --mlir-print-ir-after-all --debug |& grep -E "(tensor<198xf32>) -> tensor<?x198xf32>|IR Dump After" |& gh gist create - -d "native_layer_norm ir dump after failure with debug"


 /home/azureuser/iree-build/tools/iree-compile --iree-input-demote-i64-to-i32 --iree-hal-target-backends=llvm-cpu  stripped/deit-small-distilled-patch16-224.default.pytorch.torch.stripped.mlir -o deit-small-distilled-patch16-224.default.stripped.vmfb   --mlir-print-debuginfo --mlir-print-ir-before-all --mlir-print-skip-regions


 # find out which pass created the problem
 /home/azureuser/iree-build/tools/iree-compile --iree-input-demote-i64-to-i32 --iree-hal-target-backends=llvm-cpu  stripped/deit-small-distilled-patch16-224.default.pytorch.torch.stripped.mlir -o deit-small-distilled-patch16-224.default.stripped.vmfb   --mlir-print-debuginfo         --mlir-print-ir-after-all &> printirafterall.mlir
 cat printirafterall.mlir | grep -E '\(tensor<198xf32>\)|IR Dump'

 # output:
 # // -----// IR Dump After RaiseSpecialOps (iree-global-opt-raise-special-ops) //----- //
 # // -----// IR Dump After DecomposeConcat (iree-global-opt-decompose-concat) //----- //
 # // -----// IR Dump After GeneralizeLinalgNamedOps (iree-global-opt-generalize-linalg-named-ops) //----- //
 # // -----// IR Dump After FoldUnitExtentDims Failed (iree-flow-fold-unit-extent-dims) //----- //
 #   %73 = "tensor.expand_shape"(%72) <{reassociation = [[0, 1]]}> : (tensor<198xf32>) -> tensor<?x198xf32>
 #   %86 = "tensor.expand_shape"(%85) <{reassociation = [[0, 1]]}> : (tensor<198xf32>) -> tensor<?x198xf32>
 # stripped/deit-small-distilled-patch16-224.default.pytorch.torch.stripped.mlir:364:36: note: see current operation: %73 = "tensor.expand_shape"(%72) <{reassociation = [[0, 1]]}> : (tensor<198xf32>) -> tensor<?x198xf32>

 # -- the problem seems to be with FoldUnitExtentDims pass


 # print ir before and after FoldUnitExtentDims
 /home/azureuser/iree-build/tools/iree-compile --iree-input-demote-i64-to-i32 --iree-hal-target-backends=llvm-cpu  stripped/deit-small-distilled-patch16-224.default.pytorch.torch.stripped.mlir -o deit-small-distilled-patch16-224.default.stripped.vmfb   --mlir-print-debuginfo --mlir-print-ir-before=iree-flow-fold-unit-extent-dims --mlir-print-ir-after=iree-flow-fold-unit-extent-dims |& gh gist create - -d "native_layer_norm ir before and after FoldUnitExtentDims"
 # same, but with minimal.mlir
 /home/azureuser/iree-build/tools/iree-compile --iree-input-demote-i64-to-i32 --iree-hal-target-backends=llvm-cpu  stripped/minimal.mlir -o minimal.vmfb   --mlir-print-debuginfo --mlir-print-ir-before=iree-flow-fold-unit-extent-dims --mlir-print-ir-after=iree-flow-fold-unit-extent-dims |& gh gist create - -d "minimal ir before and after FoldUnitExtentDims"



 # print ir before and after FoldUnitExtentDims, but with stack trace
 /home/azureuser/iree-build/tools/iree-compile --iree-input-demote-i64-to-i32 --iree-hal-target-backends=llvm-cpu  stripped/deit-small-distilled-patch16-224.default.pytorch.torch.stripped.mlir -o deit-small-distilled-patch16-224.default.stripped.vmfb   --mlir-print-debuginfo --mlir-print-ir-before=iree-flow-fold-unit-extent-dims --mlir-print-ir-after=iree-flow-fold-unit-extent-dims --mlir-print-stacktrace-on-diagnostic  |& gh gist create - -d "native_layer_norm ir before and after FoldUnitExtentDims"


 ### meeting with stanley

 # --pass-pipeline="builtin.module(iree-flow-fold-unit-extent-dims)"

 # ┌─[0]─[azureuser@xida-cpu-0]─[~/SHARK-TestSuite/e2eshark/test-onnx/pytorch/models/deit-small-distilled-patch16-224]
 # └──╼ $/home/azureuser/iree-build/tools/iree-opt  ./even_bigger.mlir --pass-pipeline="builtin.module(iree-flow-fold-unit-extent-dims)"
 # /home/azureuser/iree-build/tools/iree-opt: /home/azureuser/miniconda/lib/libtinfo.so.6: no version information available (required by /home/azureuser/iree-build/lib/libIREECompiler.so)
 # <unknown>:0: error: unable to schedule pass 'FoldUnitExtentDims' on a PassManager intended to run on 'builtin.module'!


 ### Trying to find how the 1->? happened

 /home/azureuser/iree-build/tools/iree-compile --iree-input-demote-i64-to-i32 --iree-hal-target-backends=llvm-cpu  stripped/deit-small-distilled-patch16-224.default.pytorch.torch.stripped.mlir -o deit-small-distilled-patch16-224.default.stripped.vmfb   --mlir-print-debuginfo --mlir-print-ir-after-all &> printirafterall.mlir
 cat printirafterall.mlir | grep -E 'ins\(%[0-9]+ : tensor<1x198x384xf32>\) outs\(%[0-9]+ : tensor<\?x198x1xf32>\)|IR Dump'

 ## stop at linalg
 /home/azureuser/iree-build/tools/iree-compile --compile-to=input stripped/deit-small-distilled-patch16-224.default.pytorch.torch.stripped.mlir -o deit-small-distilled-patch16-224.default.stripped.vmfb   --mlir-print-debuginfo         --mlir-print-ir-after-all &> printirafterall-input.mlir
 cat printirafterall-input.mlir | grep -E 'ins\(%[0-9]+ : tensor<1x198x384xf32>\) outs\(%[0-9]+ : tensor<\?x198x1xf32>\)|IR Dump'

 # same but on stripped/minimal.mlir
 /home/azureuser/iree-build/tools/iree-compile --iree-input-demote-i64-to-i32 --iree-hal-target-backends=llvm-cpu  stripped/minimal.mlir -o minimal.vmfb   --mlir-print-debuginfo         --mlir-print-ir-after-all &> printirafterall-minimal.mlir
 cat printirafterall-minimal.mlir | grep -E 'ins\(%[0-9]+ : tensor<1x198x384xf32>\) outs\(%[0-9]+ : tensor<\?x198x1xf32>\)|IR Dump'


 # print ir before and after convert-elementwise-to-linalg
 /home/azureuser/iree-build/tools/iree-compile --iree-input-demote-i64-to-i32 --iree-hal-target-backends=llvm-cpu  stripped/deit-small-distilled-patch16-224.default.pytorch.torch.stripped.mlir -o deit-small-distilled-patch16-224.default.stripped.vmfb   --mlir-print-debuginfo --mlir-print-ir-before=convert-elementwise-to-linalg &> convert-elementwise-to-linalg-before.mlir
 /home/azureuser/iree-build/tools/iree-compile --iree-input-demote-i64-to-i32 --iree-hal-target-backends=llvm-cpu  stripped/deit-small-distilled-patch16-224.default.pytorch.torch.stripped.mlir -o deit-small-distilled-patch16-224.default.stripped.vmfb   --mlir-print-debuginfo --mlir-print-ir-after=convert-elementwise-to-linalg &> convert-elementwise-to-linalg-after.mlir
 gh gist create convert-elementwise-to-linalg-before.mlir convert-elementwise-to-linalg-after.mlir -d "convert-elementwise-to-linalg before and after"


 # trying to get a better trace
 /home/azureuser/iree-build/tools/iree-compile --iree-input-demote-i64-to-i32 --iree-hal-target-backends=llvm-cpu  stripped/deit-small-distilled-patch16-224.default.pytorch.torch.stripped.mlir -o deit-small-distilled-patch16-224.default.stripped.vmfb   --mlir-print-debuginfo         --mlir-print-ir-after-failure --mlir-print-ir-module-scope   --mlir-disable-threading       


 --mlir-print-debug-info



 # save ir after torch-decompose-complex-ops
 /home/azureuser/iree-build/tools/iree-compile --iree-input-demote-i64-to-i32 --iree-hal-target-backends=llvm-cpu  stripped/deit-small-distilled-patch16-224.default.pytorch.torch.stripped.mlir -o deit-small-distilled-patch16-224.default.stripped.vmfb   --mlir-print-debuginfo --mlir-print-ir-after=torch-decompose-complex-ops &> torch-decompose-complex-ops.log
 # decomposed.mlir contains the mlir
 # run with --mlir-print-debug-info
 /home/azureuser/iree-build/tools/iree-compile --iree-input-demote-i64-to-i32 --iree-hal-target-backends=llvm-cpu  decomposed.mlir -o decomposed.vmfb   --mlir-print-debuginfo --mlir-print-debuginfo &> torch-decompose-complex-ops-debuginfo.log

 # now try atenrepro.mlir
 /home/azureuser/iree-build/tools/iree-compile --iree-input-demote-i64-to-i32 --iree-hal-target-backends=llvm-cpu  atenrepro.mlir -o atenrepro.vmfb   --mlir-print-debuginfo --mlir-print-debuginfo &> atenrepro-debuginfo.log

 # try to canonicalize the decomposed.mlir
 torch-mlir-opt --canonicalize decomposed.mlir decomposed-canonicalized.mlir
 # run with --mlir-print-debug-info
 /home/azureuser/iree-build/tools/iree-compile --iree-input-demote-i64-to-i32 --iree-hal-target-backends=llvm-cpu  decomposed-canonicalized.mlir -o decomposed-canonicalized.vmfb   --mlir-print-debuginfo --mlir-print-debuginfo &> torch-decompose-complex-ops-debuginfo.log







 /home/azureuser/iree-build/tools/iree-compile --iree-input-demote-i64-to-i32 --iree-hal-target-backends=llvm-cpu  decomposed-canonicalized.mlir -o /tmp/tmp.vmfb --mlir-print-ir-after=memref-expand &> ./after-memref-expand.mlir

 /home/azureuser/iree-build/tools/iree-compile --iree-input-demote-i64-to-i32 --iree-hal-target-backends=llvm-cpu  after-memref-expand.mlir -o /tmp/tmp.vmfb --mlir-print-ir-after=canonicalize --mlir-print-debuginfo &> rest-of-the-way.mlir
 # grep
 cat rest-of-the-way.mlir | grep -E 'ins\(%[0-9]+ : tensor<1x198x384xf32>\) outs\(%[0-9]+ : tensor<\?x198x1xf32>\)|IR Dump'

 # Is the linalg.generic coming directly from native_layer_norm or is some intermediate op spawning it?
 /home/azureuser/iree-build/tools/iree-compile --iree-input-demote-i64-to-i32 --iree-hal-target-backends=llvm-cpu decomposed.mlir -o /tmp/tmp.vmfb --mlir-print-ir-after-all --mlir-print-debuginfo &> decomposed-all.mlir
 # grep for the linalg.generic with the ? -> 1 shape
 cat decomposed-all.mlir | grep -E 'ins\(%[0-9]+ : tensor<1x198x384xf32>\) outs\(%[0-9]+ : tensor<\?x198x1xf32>\)|IR Dump'




 # Now debugging onnx.if

 Test onnx/models/KeypointRCNN_vaiq_int8 failed [torch-mlir]
    onnx if
 Test onnx/models/retinanet_resnet50_fpn_vaiq_int8 failed [torch-mlir]
    onnx if

 cd /home/azureuser/SHARK-TestSuite/e2eshark/
 PYTHONPATH="/home/azureuser/torch-mlir/build/tools/torch-mlir/python_packages/torch_mlir" HF_TOKEN=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 python run.py --cachedir="~/.cache/" --torchtolinalg --torchmlirbuild "~/torch-mlir/build/" --ireebuild ~/iree-build --mode=onnx --framework=onnx --tests onnx/models/KeypointRCNN_vaiq_int8 --verbose

 # strip /home/azureuser/SHARK-TestSuite/e2eshark/test-run/onnx/models/KeypointRCNN_vaiq_int8/KeypointRCNN_vaiq_int8.default.torch-onnx.mlir
 torch-mlir-opt --mlir-elide-resource-strings-if-larger=16 /home/azureuser/SHARK-TestSuite/e2eshark/test-run/onnx/models/KeypointRCNN_vaiq_int8/KeypointRCNN_vaiq_int8.default.torch-onnx.mlir -o /home/azureuser/SHARK-TestSuite/e2eshark/test-run/onnx/models/KeypointRCNN_vaiq_int8/KeypointRCNN_vaiq_int8.default.torch-onnx.stripped.mlir



 ### now debugging onnx.if issue
 cd /home/azureuser/SHARK-TestSuite/e2eshark/test-run/onnx/models/KeypointRCNN_vaiq_int8

 # try to lower the stripped model to torch
 torch-mlir-opt --convert-torch-onnx-to-torch /home/azureuser/SHARK-TestSuite/e2eshark/test-run/onnx/models/KeypointRCNN_vaiq_int8/KeypointRCNN_vaiq_int8.default.torch-onnx.mlir -o /home/azureuser/SHARK-TestSuite/e2eshark/test-run/onnx/models/KeypointRCNN_vaiq_int8/KeypointRCNN_vaiq_int8.default.torch-onnx.torch.mlir --mlir-print-ir-after-failure --mlir-elide-resource-strings-if-larger=16 &> /tmp/after_failure.log


 torch-mlir-opt --convert-torch-onnx-to-torch /home/azureuser/SHARK-TestSuite/e2eshark/test-run/onnx/models/KeypointRCNN_vaiq_int8/KeypointRCNN_vaiq_int8.default.torch-onnx.mlir -o /home/azureuser/SHARK-TestSuite/e2eshark/test-run/onnx/models/KeypointRCNN_vaiq_int8/KeypointRCNN_vaiq_int8.default.torch-onnx.torch.mlir --mlir-print-ir-after-failure --debug &> /tmp/debug.log

 cat <<-EOF > ./rcnn_repro.mlir
    func.func @minimal_example(%arg0: !torch.vtensor<[1],i1>, %arg1: !torch.vtensor<[?,4],f32>, %arg2: !torch.vtensor<[1],si64>, %arg3: !torch.vtensor<[?],f32>) -> !torch.vtensor<[?],si64> attributes {torch.onnx_meta.ir_version = 8 : si64, torch.onnx_meta.opset_version = 17 : si64, torch.onnx_meta.producer_name = "pytorch", torch.onnx_meta.producer_version = "1.13.1"} {
    %0 = torch.operator "onnx.If"(%arg0) : (!torch.vtensor<[1],i1>) -> !torch.vtensor<[?],si64> {
        %1 = torch.operator "onnx.ReduceMax"(%arg1) {torch.onnx.keepdims = 0 : si64} : (!torch.vtensor<[?,4],f32>) -> !torch.vtensor<[],f32>
        %2 = torch.operator "onnx.Cast"(%arg2) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[1],si64>) -> !torch.vtensor<[1],f32>
        %3 = torch.operator "onnx.Mul"(%2, %1) : (!torch.vtensor<[1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[1],f32>
        %4 = torch.operator "onnx.Unsqueeze"(%3, %arg2) : (!torch.vtensor<[1],f32>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1,1],f32>
        %5 = torch.operator "onnx.Add"(%arg1, %4) : (!torch.vtensor<[?,4],f32>, !torch.vtensor<[1,1],f32>) -> !torch.vtensor<[?,4],f32>
        %6 = torch.operator "onnx.Unsqueeze"(%5, %arg2) : (!torch.vtensor<[?,4],f32>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1,?,4],f32>
        %7 = torch.operator "onnx.Unsqueeze"(%arg3, %arg2) : (!torch.vtensor<[?],f32>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1,?],f32>
        %8 = torch.operator "onnx.Unsqueeze"(%7, %arg2) : (!torch.vtensor<[1,?],f32>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1,1,?],f32>
        %9 = torch.operator "onnx.Constant"() {torch.onnx.value = dense<1> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64>
        %10 = torch.operator "onnx.Constant"() {torch.onnx.value = dense<0.5> : tensor<1xf32>} : () -> !torch.vtensor<[1],f32>
        %11 = torch.operator "onnx.NonMaxSuppression"(%6, %8, %9, %10) : (!torch.vtensor<[1,?,4],f32>, !torch.vtensor<[1,1,?],f32>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],f32>) -> !torch.vtensor<[?,3],si64>
        %12 = torch.operator "onnx.Gather"(%11, %arg2) {torch.onnx.axis = 1 : si64} : (!torch.vtensor<[?,3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1],si64>
        %13 = torch.operator "onnx.Squeeze"(%12, %arg2) : (!torch.vtensor<[?,1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?],si64>
        torch.operator_terminator %13 : !torch.vtensor<[?],si64>
    }, {
        %1 = torch.operator "onnx.Constant"() {torch.onnx.value = dense<> : tensor<0xsi64>} : () -> !torch.vtensor<[0],si64>
        torch.operator_terminator %1 : !torch.vtensor<[0],si64>
    }
    return %0 : !torch.vtensor<[?],si64>
    }
 EOF
 torch-mlir-opt --convert-torch-onnx-to-torch repro.mlir -o repro.torch.mlir --debug |& tee repro.log

 # //===-------------------------------------------===//
 # Legalizing operation : 'torch.operator'(0x55914d9c55f0) {
 #   %33 = "torch.operator"(%32, %arg2) <{name = "onnx.Unsqueeze"}> : (!torch.vtensor<[?,4],f32>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1,?,4],f32>

 #   * Fold {
 #   } -> FAILURE : unable to fold

 #   * Pattern : 'torch.operator -> ()' {
 # Trying to match ""
 #     ** Failure : only support constant int axes values
 # : conversion failed to apply: "onnx.Unsqueeze", sinceVersion=13
 #     ** Failure : no matching versioned converter
 # "" result 0
 #   } -> FAILURE : pattern failed to match
 # } -> FAILURE : no matched legalization pattern
 # //===-------------------------------------------===//
 # repro.mlir:8:14: error: failed to legalize operation 'torch.operator' that was explicitly marked illegal
 #         %6 = torch.operator "onnx.Unsqueeze"(%5, %arg2) : (!torch.vtensor<[?,4],f32>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1,?,4],f32>
 #              ^
 # repro.mlir:8:14: note: see current operation: %33 = "torch.operator"(%32, %arg2) <{name = "onnx.Unsqueeze"}> : (!torch.vtensor<[?,4],f32>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1,?,4],f32>
 # ImplicitTypeIDRegistry::lookupOrInsert(mlir::detail::PreservedAnalyses::AllAnalysesType)

 cat <<-EOF > ./rcnn_repro.mlir
    func.func @minimal_example(%arg0: !torch.vtensor<[1],i1>, %arg1: !torch.vtensor<[?,4],f32>, %arg3: !torch.vtensor<[?],f32>) -> !torch.vtensor<[?],si64> attributes {torch.onnx_meta.ir_version = 8 : si64, torch.onnx_meta.opset_version = 17 : si64, torch.onnx_meta.producer_name = "pytorch", torch.onnx_meta.producer_version = "1.13.1"} {
    %0 = torch.operator "onnx.If"(%arg0) : (!torch.vtensor<[1],i1>) -> !torch.vtensor<[?],si64> {
        %c0 = torch.operator "onnx.Constant"() {torch.onnx.value = dense<0> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> 

        %1 = torch.operator "onnx.ReduceMax"(%arg1) {torch.onnx.keepdims = 0 : si64} : (!torch.vtensor<[?,4],f32>) -> !torch.vtensor<[],f32>
        %2 = torch.operator "onnx.Cast"(%c0) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[1],si64>) -> !torch.vtensor<[1],f32>
        %3 = torch.operator "onnx.Mul"(%2, %1) : (!torch.vtensor<[1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[1],f32>
        %4 = torch.operator "onnx.Unsqueeze"(%3, %c0) : (!torch.vtensor<[1],f32>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1,1],f32>
        %5 = torch.operator "onnx.Add"(%arg1, %4) : (!torch.vtensor<[?,4],f32>, !torch.vtensor<[1,1],f32>) -> !torch.vtensor<[?,4],f32>
        %6 = torch.operator "onnx.Unsqueeze"(%5, %c0) : (!torch.vtensor<[?,4],f32>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1,?,4],f32>
        %7 = torch.operator "onnx.Unsqueeze"(%arg3, %c0) : (!torch.vtensor<[?],f32>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1,?],f32>
        %8 = torch.operator "onnx.Unsqueeze"(%7, %c0) : (!torch.vtensor<[1,?],f32>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1,1,?],f32>
        %9 = torch.operator "onnx.Constant"() {torch.onnx.value = dense<1> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64>
        %10 = torch.operator "onnx.Constant"() {torch.onnx.value = dense<0.5> : tensor<1xf32>} : () -> !torch.vtensor<[1],f32>
        %11 = torch.operator "onnx.NonMaxSuppression"(%6, %8, %9, %10) : (!torch.vtensor<[1,?,4],f32>, !torch.vtensor<[1,1,?],f32>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],f32>) -> !torch.vtensor<[?,3],si64>
        %12 = torch.operator "onnx.Gather"(%11, %c0) {torch.onnx.axis = 1 : si64} : (!torch.vtensor<[?,3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1],si64>
        %13 = torch.operator "onnx.Squeeze"(%12, %c0) : (!torch.vtensor<[?,1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?],si64>
        torch.operator_terminator %13 : !torch.vtensor<[?],si64>
    }, {
        %1 = torch.operator "onnx.Constant"() {torch.onnx.value = dense<> : tensor<0xsi64>} : () -> !torch.vtensor<[0],si64>
        torch.operator_terminator %1 : !torch.vtensor<[0],si64>
    }
    return %0 : !torch.vtensor<[?],si64>
    }
 EOF
 torch-mlir-opt --convert-torch-onnx-to-torch rcnn_repro.mlir -o rcnn_repro.torch.mlir --debug |& tee rcnn_repro.log


 # simple if test


 cat <<-EOF > ./rcnn_repro.mlir
    func.func @minimal_example(%arg0: !torch.vtensor<[1],i1>, %arg1: !torch.vtensor<[?,4],f32>, %arg3: !torch.vtensor<[?],f32>) -> !torch.vtensor<[?],si64> attributes {torch.onnx_meta.ir_version = 8 : si64, torch.onnx_meta.opset_version = 17 : si64, torch.onnx_meta.producer_name = "pytorch", torch.onnx_meta.producer_version = "1.13.1"} {
    %0 = torch.operator "onnx.If"(%arg0) : (!torch.vtensor<[1],i1>) -> !torch.vtensor<[?],si64> {
        %c0 = torch.operator "onnx.Constant"() {torch.onnx.value = dense<0> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> 
        torch.operator_terminator %13 : !torch.vtensor<[?],si64>
    }, {
        %1 = torch.operator "onnx.Constant"() {torch.onnx.value = dense<> : tensor<0xsi64>} : () -> !torch.vtensor<[0],si64>
        torch.operator_terminator %1 : !torch.vtensor<[0],si64>
    }
    return %0 : !torch.vtensor<[?],si64>
    }
 EOF
 torch-mlir-opt --convert-torch-onnx-to-torch rcnn_repro.mlir -o rcnn_repro.torch.mlir --debug |& tee rcnn_repro.log

 torch-mlir-opt --convert-torch-onnx-to-torch /home/azureuser/SHARK-TestSuite/e2eshark/test-run/onnx/models/KeypointRCNN_vaiq_int8/KeypointRCNN_vaiq_int8.default.torch-onnx.mlir -o /home/azureuser/SHARK-TestSuite/e2eshark/test-run/onnx/models/KeypointRCNN_vaiq_int8/KeypointRCNN_vaiq_int8.default.torch-onnx.torch.mlir --mlir-print-ir-after-failure --mlir-elide-resource-strings-if-larger=16 --debug &> /tmp/after_failure.log

 torch-mlir-opt --convert-torch-onnx-to-torch /tmp/afterfailure.mlir -o /tmp/afterfailure.torch.mlir --mlir-print-ir-after-failure

 torch-mlir-opt --convert-torch-onnx-to-torch ./repro2.mlir -o /tmp/afterfailure.torch.mlir --mlir-print-ir-after-failure