Skip to content

Instantly share code, notes, and snippets.

@AmosLewis
Last active November 20, 2024 00:33
Show Gist options
  • Save AmosLewis/b748824331849257f76925380fc0169f to your computer and use it in GitHub Desktop.
Save AmosLewis/b748824331849257f76925380fc0169f to your computer and use it in GitHub Desktop.
Running MaskedFillTensorIntValueStaticModule_basic...
*** RUNNING TEST: MaskedScatterStaticBasic_basic ***
Compiling MaskedScatterStaticBasic_basic...
/proj/gdba/shark/chi/src/torch-mlir/mlir_venv/lib/python3.10/site-packages/torch/onnx/symbolic_opset10.py:513: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
return g.op("Constant", value_t=torch.tensor(list_or_value))
====================
ONNX RAW IR
module {
func.func @main_graph(%arg0: !torch.vtensor<[4,4],f32>, %arg1: !torch.vtensor<[4,4],i1>, %arg2: !torch.vtensor<[8,8],f32>) -> !torch.vtensor<[4,4],f32> attributes {torch.onnx_meta.ir_version = 9 : si64, torch.onnx_meta.opset_version = 20 : si64, torch.onnx_meta.producer_name = "pytorch", torch.onnx_meta.producer_version = "2.6.0"} {
%none = torch.constant.none
%0 = torch.operator "onnx.Shape"(%arg0) : (!torch.vtensor<[4,4],f32>) -> !torch.vtensor<[2],si64>
%1 = torch.operator "onnx.Expand"(%arg1, %0) : (!torch.vtensor<[4,4],i1>, !torch.vtensor<[2],si64>) -> !torch.vtensor<[4,4],i1>
%2 = torch.operator "onnx.NonZero"(%1) : (!torch.vtensor<[4,4],i1>) -> !torch.vtensor<[2,?],si64>
%3 = torch.operator "onnx.Transpose"(%2) {torch.onnx.perm = [1 : si64, 0 : si64]} : (!torch.vtensor<[2,?],si64>) -> !torch.vtensor<[?,2],si64>
%4 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64>
%5 = torch.operator "onnx.Reshape"(%arg2, %4) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[8,8],f32>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[64],f32>
%6 = torch.operator "onnx.Shape"(%3) : (!torch.vtensor<[?,2],si64>) -> !torch.vtensor<[2],si64>
%7 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__1> : tensor<si64>} : () -> !torch.vtensor<[],si64>
%8 = torch.operator "onnx.Gather"(%6, %7) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[2],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64>
%9 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__2> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64>
%10 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__3> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64>
%11 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__4> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64>
%12 = torch.operator "onnx.Unsqueeze"(%8, %11) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64>
%13 = torch.operator "onnx.Slice"(%5, %10, %12, %9) : (!torch.vtensor<[64],f32>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?],f32>
%14 = torch.operator "onnx.ScatterND"(%arg0, %3, %13) : (!torch.vtensor<[4,4],f32>, !torch.vtensor<[?,2],si64>, !torch.vtensor<[?],f32>) -> !torch.vtensor<[4,4],f32>
return %14 : !torch.vtensor<[4,4],f32>
}
}
{-#
dialect_resources: {
builtin: {
_: "0x08000000FFFFFFFFFFFFFFFF",
__1: "0x080000000000000000000000",
__2: "0x080000000000000000000000",
__3: "0x080000000000000000000000",
__4: "0x080000000000000000000000"
}
}
#-}
====================
Torch IR
module {
func.func @main_graph(%arg0: !torch.vtensor<[4,4],f32>, %arg1: !torch.vtensor<[4,4],i1>, %arg2: !torch.vtensor<[8,8],f32>) -> !torch.vtensor<[4,4],f32> attributes {torch.onnx_meta.ir_version = 9 : si64, torch.onnx_meta.opset_version = 20 : si64, torch.onnx_meta.producer_name = "pytorch", torch.onnx_meta.producer_version = "2.6.0"} {
%str = torch.constant.str "floor"
%0 = torch.vtensor.literal(dense<1> : tensor<1xsi64>) : !torch.vtensor<[1],si64>
%str_0 = torch.constant.str "sum"
%1 = torch.vtensor.literal(dense<0> : tensor<16xsi64>) : !torch.vtensor<[16],si64>
%int16 = torch.constant.int 16
%none = torch.constant.none
%false = torch.constant.bool false
%2 = torch.vtensor.literal(dense<4> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
%int4 = torch.constant.int 4
%int2 = torch.constant.int 2
%int1 = torch.constant.int 1
%int0 = torch.constant.int 0
%3 = torch.aten.flatten.using_ints %arg1, %int0, %int1 : !torch.vtensor<[4,4],i1>, !torch.int, !torch.int -> !torch.vtensor<[16],i1>
%4 = torch.aten.ne.Scalar %3, %int0 : !torch.vtensor<[16],i1>, !torch.int -> !torch.vtensor<[16],i1>
%5 = torch.aten.to.dtype %4, %int4, %false, %false, %none : !torch.vtensor<[16],i1>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[16],si64>
%6 = torch.aten.cumsum %5, %int0, %none : !torch.vtensor<[16],si64>, !torch.int, !torch.none -> !torch.vtensor<[16],si64>
%7 = torch.aten.sub.Scalar %6, %int1, %int1 : !torch.vtensor<[16],si64>, !torch.int, !torch.int -> !torch.vtensor<[16],si64>
%8 = torch.aten.clamp %7, %int0, %none : !torch.vtensor<[16],si64>, !torch.int, !torch.none -> !torch.vtensor<[16],si64>
%9 = torch.aten.arange.start_step %int0, %int16, %int1, %none, %none, %none, %none : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.none, !torch.none -> !torch.vtensor<[16],si64>
%10 = torch.aten.mul.Tensor %9, %5 : !torch.vtensor<[16],si64>, !torch.vtensor<[16],si64> -> !torch.vtensor<[16],si64>
%11 = torch.aten.scatter_reduce.two %1, %int0, %8, %10, %str_0, %false : !torch.vtensor<[16],si64>, !torch.int, !torch.vtensor<[16],si64>, !torch.vtensor<[16],si64>, !torch.str, !torch.bool -> !torch.vtensor<[16],si64>
%12 = torch.aten.sum %5, %none : !torch.vtensor<[16],si64>, !torch.none -> !torch.vtensor<[],si64>
%13 = torch.aten.Int.Tensor %12 : !torch.vtensor<[],si64> -> !torch.int
%14 = torch.aten.slice.Tensor %11, %int0, %int0, %13, %int1 : !torch.vtensor<[16],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?],si64>
%15 = torch.prim.ListConstruct %int0 : (!torch.int) -> !torch.list<int>
%16 = torch.aten.flip %2, %15 : !torch.vtensor<[2],si64>, !torch.list<int> -> !torch.vtensor<[2],si64>
%17 = torch.aten.cumprod %16, %int0, %none : !torch.vtensor<[2],si64>, !torch.int, !torch.none -> !torch.vtensor<[2],si64>
%18 = torch.aten.flip %17, %15 : !torch.vtensor<[2],si64>, !torch.list<int> -> !torch.vtensor<[2],si64>
%19 = torch.aten.slice.Tensor %18, %int0, %int1, %int2, %int1 : !torch.vtensor<[2],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1],si64>
%20 = torch.prim.ListConstruct %19, %0 : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.list<vtensor<[?],si64>>
%21 = torch.aten.cat %20, %int0 : !torch.list<vtensor<[?],si64>>, !torch.int -> !torch.vtensor<[2],si64>
%22 = torch.aten.unsqueeze %14, %int1 : !torch.vtensor<[?],si64>, !torch.int -> !torch.vtensor<[?,1],si64>
%23 = torch.aten.unsqueeze %21, %int0 : !torch.vtensor<[2],si64>, !torch.int -> !torch.vtensor<[1,2],si64>
%24 = torch.aten.div.Tensor_mode %22, %23, %str : !torch.vtensor<[?,1],si64>, !torch.vtensor<[1,2],si64>, !torch.str -> !torch.vtensor<[?,2],si64>
%25 = torch.aten.remainder.Tensor %24, %2 : !torch.vtensor<[?,2],si64>, !torch.vtensor<[2],si64> -> !torch.vtensor<[2,2],si64>
%26 = torch.aten.transpose.int %25, %int0, %int1 : !torch.vtensor<[2,2],si64>, !torch.int, !torch.int -> !torch.vtensor<[2,2],si64>
%27 = torch.aten.flatten.using_ints %arg2, %int0, %int1 : !torch.vtensor<[8,8],f32>, !torch.int, !torch.int -> !torch.vtensor<[64],f32>
%28 = torch.aten.slice.Tensor %27, %int0, %int0, %int2, %int1 : !torch.vtensor<[64],f32>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2],f32>
%29 = torch.aten.slice.Tensor %26, %int1, %int0, %int1, %int1 : !torch.vtensor<[2,2],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1],si64>
%30 = torch.aten.lt.Scalar %29, %int0 : !torch.vtensor<[2,1],si64>, !torch.int -> !torch.vtensor<[2,1],i1>
%31 = torch.aten.add.Scalar %29, %int4, %int1 : !torch.vtensor<[2,1],si64>, !torch.int, !torch.int -> !torch.vtensor<[2,1],si64>
%32 = torch.aten.where.self %30, %31, %29 : !torch.vtensor<[2,1],i1>, !torch.vtensor<[2,1],si64>, !torch.vtensor<[2,1],si64> -> !torch.vtensor<[2,1],si64>
%33 = torch.aten.slice.Tensor %26, %int1, %int1, %int2, %int1 : !torch.vtensor<[2,2],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1],si64>
%34 = torch.aten.lt.Scalar %33, %int0 : !torch.vtensor<[2,1],si64>, !torch.int -> !torch.vtensor<[2,1],i1>
%35 = torch.aten.add.Scalar %33, %int4, %int1 : !torch.vtensor<[2,1],si64>, !torch.int, !torch.int -> !torch.vtensor<[2,1],si64>
%36 = torch.aten.where.self %34, %35, %33 : !torch.vtensor<[2,1],i1>, !torch.vtensor<[2,1],si64>, !torch.vtensor<[2,1],si64> -> !torch.vtensor<[2,1],si64>
%37 = torch.aten.add.Tensor %36, %32, %int4 : !torch.vtensor<[2,1],si64>, !torch.vtensor<[2,1],si64>, !torch.int -> !torch.vtensor<[2,1],si64>
%38 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
%39 = torch.aten.view %37, %38 : !torch.vtensor<[2,1],si64>, !torch.list<int> -> !torch.vtensor<[2],si64>
%40 = torch.aten.flatten.using_ints %39, %int0, %int0 : !torch.vtensor<[2],si64>, !torch.int, !torch.int -> !torch.vtensor<[2],si64>
%41 = torch.aten.flatten.using_ints %28, %int0, %int0 : !torch.vtensor<[2],f32>, !torch.int, !torch.int -> !torch.vtensor<[2],f32>
%42 = torch.aten.flatten.using_ints %arg0, %int0, %int1 : !torch.vtensor<[4,4],f32>, !torch.int, !torch.int -> !torch.vtensor<[16],f32>
%43 = torch.aten.scatter.src %42, %int0, %40, %41 : !torch.vtensor<[16],f32>, !torch.int, !torch.vtensor<[2],si64>, !torch.vtensor<[2],f32> -> !torch.vtensor<[16],f32>
%44 = torch.prim.ListConstruct %int4, %int4 : (!torch.int, !torch.int) -> !torch.list<int>
%45 = torch.aten.unflatten.int %43, %int0, %44 : !torch.vtensor<[16],f32>, !torch.int, !torch.list<int> -> !torch.vtensor<[4,4],f32>
return %45 : !torch.vtensor<[4,4],f32>
}
}
====================
Torch Backend IR
module {
func.func @main_graph(%arg0: !torch.vtensor<[4,4],f32>, %arg1: !torch.vtensor<[4,4],i1>, %arg2: !torch.vtensor<[8,8],f32>) -> !torch.vtensor<[4,4],f32> attributes {torch.onnx_meta.ir_version = 9 : si64, torch.onnx_meta.opset_version = 20 : si64, torch.onnx_meta.producer_name = "pytorch", torch.onnx_meta.producer_version = "2.6.0"} {
%str = torch.constant.str "floor"
%0 = torch.vtensor.literal(dense<1> : tensor<1xsi64>) : !torch.vtensor<[1],si64>
%str_0 = torch.constant.str "sum"
%1 = torch.vtensor.literal(dense<0> : tensor<16xsi64>) : !torch.vtensor<[16],si64>
%int16 = torch.constant.int 16
%none = torch.constant.none
%false = torch.constant.bool false
%2 = torch.vtensor.literal(dense<4> : tensor<2xsi64>) : !torch.vtensor<[2],si64>
%int4 = torch.constant.int 4
%int2 = torch.constant.int 2
%int1 = torch.constant.int 1
%int0 = torch.constant.int 0
%3 = torch.aten.flatten.using_ints %arg1, %int0, %int1 : !torch.vtensor<[4,4],i1>, !torch.int, !torch.int -> !torch.vtensor<[16],i1>
%4 = torch.aten.ne.Scalar %3, %int0 : !torch.vtensor<[16],i1>, !torch.int -> !torch.vtensor<[16],i1>
%5 = torch.aten.to.dtype %4, %int4, %false, %false, %none : !torch.vtensor<[16],i1>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[16],si64>
%6 = torch.aten.cumsum %5, %int0, %none : !torch.vtensor<[16],si64>, !torch.int, !torch.none -> !torch.vtensor<[16],si64>
%7 = torch.aten.sub.Scalar %6, %int1, %int1 : !torch.vtensor<[16],si64>, !torch.int, !torch.int -> !torch.vtensor<[16],si64>
%8 = torch.aten.clamp %7, %int0, %none : !torch.vtensor<[16],si64>, !torch.int, !torch.none -> !torch.vtensor<[16],si64>
%9 = torch.aten.arange.start_step %int0, %int16, %int1, %none, %none, %none, %none : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.none, !torch.none -> !torch.vtensor<[16],si64>
%10 = torch.aten.mul.Tensor %9, %5 : !torch.vtensor<[16],si64>, !torch.vtensor<[16],si64> -> !torch.vtensor<[16],si64>
%11 = torch.aten.scatter_reduce.two %1, %int0, %8, %10, %str_0, %false : !torch.vtensor<[16],si64>, !torch.int, !torch.vtensor<[16],si64>, !torch.vtensor<[16],si64>, !torch.str, !torch.bool -> !torch.vtensor<[16],si64>
%12 = torch.aten.sum %5, %none : !torch.vtensor<[16],si64>, !torch.none -> !torch.vtensor<[],si64>
%13 = torch.aten.Int.Tensor %12 : !torch.vtensor<[],si64> -> !torch.int
%14 = torch.aten.slice.Tensor %11, %int0, %int0, %13, %int1 : !torch.vtensor<[16],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?],si64>
%15 = torch.prim.ListConstruct %int0 : (!torch.int) -> !torch.list<int>
%16 = torch.aten.flip %2, %15 : !torch.vtensor<[2],si64>, !torch.list<int> -> !torch.vtensor<[2],si64>
%17 = torch.aten.cumprod %16, %int0, %none : !torch.vtensor<[2],si64>, !torch.int, !torch.none -> !torch.vtensor<[2],si64>
%18 = torch.aten.flip %17, %15 : !torch.vtensor<[2],si64>, !torch.list<int> -> !torch.vtensor<[2],si64>
%19 = torch.aten.slice.Tensor %18, %int0, %int1, %int2, %int1 : !torch.vtensor<[2],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1],si64>
%20 = torch.prim.ListConstruct %19, %0 : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.list<vtensor<[?],si64>>
%21 = torch.aten.cat %20, %int0 : !torch.list<vtensor<[?],si64>>, !torch.int -> !torch.vtensor<[2],si64>
%22 = torch.aten.unsqueeze %14, %int1 : !torch.vtensor<[?],si64>, !torch.int -> !torch.vtensor<[?,1],si64>
%23 = torch.aten.unsqueeze %21, %int0 : !torch.vtensor<[2],si64>, !torch.int -> !torch.vtensor<[1,2],si64>
%24 = torch.aten.div.Tensor_mode %22, %23, %str : !torch.vtensor<[?,1],si64>, !torch.vtensor<[1,2],si64>, !torch.str -> !torch.vtensor<[?,2],si64>
%25 = torch.aten.remainder.Tensor %24, %2 : !torch.vtensor<[?,2],si64>, !torch.vtensor<[2],si64> -> !torch.vtensor<[2,2],si64>
%26 = torch.aten.transpose.int %25, %int0, %int1 : !torch.vtensor<[2,2],si64>, !torch.int, !torch.int -> !torch.vtensor<[2,2],si64>
%27 = torch.aten.flatten.using_ints %arg2, %int0, %int1 : !torch.vtensor<[8,8],f32>, !torch.int, !torch.int -> !torch.vtensor<[64],f32>
%28 = torch.aten.slice.Tensor %27, %int0, %int0, %int2, %int1 : !torch.vtensor<[64],f32>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2],f32>
%29 = torch.aten.slice.Tensor %26, %int1, %int0, %int1, %int1 : !torch.vtensor<[2,2],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1],si64>
%30 = torch.aten.lt.Scalar %29, %int0 : !torch.vtensor<[2,1],si64>, !torch.int -> !torch.vtensor<[2,1],i1>
%31 = torch.aten.add.Scalar %29, %int4, %int1 : !torch.vtensor<[2,1],si64>, !torch.int, !torch.int -> !torch.vtensor<[2,1],si64>
%32 = torch.aten.where.self %30, %31, %29 : !torch.vtensor<[2,1],i1>, !torch.vtensor<[2,1],si64>, !torch.vtensor<[2,1],si64> -> !torch.vtensor<[2,1],si64>
%33 = torch.aten.slice.Tensor %26, %int1, %int1, %int2, %int1 : !torch.vtensor<[2,2],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1],si64>
%34 = torch.aten.lt.Scalar %33, %int0 : !torch.vtensor<[2,1],si64>, !torch.int -> !torch.vtensor<[2,1],i1>
%35 = torch.aten.add.Scalar %33, %int4, %int1 : !torch.vtensor<[2,1],si64>, !torch.int, !torch.int -> !torch.vtensor<[2,1],si64>
%36 = torch.aten.where.self %34, %35, %33 : !torch.vtensor<[2,1],i1>, !torch.vtensor<[2,1],si64>, !torch.vtensor<[2,1],si64> -> !torch.vtensor<[2,1],si64>
%37 = torch.aten.add.Tensor %36, %32, %int4 : !torch.vtensor<[2,1],si64>, !torch.vtensor<[2,1],si64>, !torch.int -> !torch.vtensor<[2,1],si64>
%38 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int>
%39 = torch.aten.view %37, %38 : !torch.vtensor<[2,1],si64>, !torch.list<int> -> !torch.vtensor<[2],si64>
%40 = torch.aten.flatten.using_ints %39, %int0, %int0 : !torch.vtensor<[2],si64>, !torch.int, !torch.int -> !torch.vtensor<[2],si64>
%41 = torch.aten.flatten.using_ints %28, %int0, %int0 : !torch.vtensor<[2],f32>, !torch.int, !torch.int -> !torch.vtensor<[2],f32>
%42 = torch.aten.flatten.using_ints %arg0, %int0, %int1 : !torch.vtensor<[4,4],f32>, !torch.int, !torch.int -> !torch.vtensor<[16],f32>
%43 = torch.aten.scatter.src %42, %int0, %40, %41 : !torch.vtensor<[16],f32>, !torch.int, !torch.vtensor<[2],si64>, !torch.vtensor<[2],f32> -> !torch.vtensor<[16],f32>
%44 = torch.prim.ListConstruct %int4, %int4 : (!torch.int, !torch.int) -> !torch.list<int>
%45 = torch.aten.unflatten.int %43, %int0, %44 : !torch.vtensor<[16],f32>, !torch.int, !torch.list<int> -> !torch.vtensor<[4,4],f32>
return %45 : !torch.vtensor<[4,4],f32>
}
}
====================
LINALG Backend IR
#map = affine_map<(d0) -> (d0)>
#map1 = affine_map<(d0) -> (d0, 0)>
#map2 = affine_map<(d0) -> ()>
#map3 = affine_map<(d0, d1) -> (d0, 0)>
#map4 = affine_map<(d0, d1) -> (0, d1)>
#map5 = affine_map<(d0, d1) -> (d0, d1)>
#map6 = affine_map<(d0, d1) -> (d1)>
module {
ml_program.global private mutable @global_seed(dense<0> : tensor<i64>) : tensor<i64>
func.func @main_graph(%arg0: tensor<4x4xf32>, %arg1: tensor<4x4xi1>, %arg2: tensor<8x8xf32>) -> tensor<4x4xf32> {
%cst = arith.constant dense<0> : tensor<16xi64>
%c0 = arith.constant 0 : index
%c-1 = arith.constant -1 : index
%c1 = arith.constant 1 : index
%cst_0 = arith.constant dense<1> : tensor<1xi64>
%cst_1 = arith.constant dense<4> : tensor<2xi64>
%c4_i64 = arith.constant 4 : i64
%c16 = arith.constant 16 : index
%c0_i64 = arith.constant 0 : i64
%c0_i32 = arith.constant 0 : i32
%c2 = arith.constant 2 : index
%c1_i64 = arith.constant 1 : i64
%cst_2 = arith.constant 0.000000e+00 : f32
%collapsed = tensor.collapse_shape %arg1 [[0, 1]] : tensor<4x4xi1> into tensor<16xi1>
%0 = tensor.empty() : tensor<16xi64>
%1 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%collapsed : tensor<16xi1>) outs(%0 : tensor<16xi64>) {
^bb0(%in: i1, %out: i64):
%51 = arith.extui %in : i1 to i64
linalg.yield %51 : i64
} -> tensor<16xi64>
%2 = linalg.fill ins(%c0_i64 : i64) outs(%0 : tensor<16xi64>) -> tensor<16xi64>
%3 = tensor.empty() : tensor<i64>
%4 = linalg.fill ins(%c0_i64 : i64) outs(%3 : tensor<i64>) -> tensor<i64>
%5:2 = tm_tensor.scan dimension(0) inclusive(true) ins(%1 : tensor<16xi64>) outs(%2, %4 : tensor<16xi64>, tensor<i64>) {
^bb0(%arg3: i64, %arg4: i64):
%51 = arith.addi %arg3, %arg4 : i64
tm_tensor.yield %51 : i64
} -> tensor<16xi64>, tensor<i64>
%6 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%5#0 : tensor<16xi64>) outs(%0 : tensor<16xi64>) {
^bb0(%in: i64, %out: i64):
%51 = arith.subi %in, %c1_i64 : i64
linalg.yield %51 : i64
} -> tensor<16xi64>
%7 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%6 : tensor<16xi64>) outs(%0 : tensor<16xi64>) {
^bb0(%in: i64, %out: i64):
%51 = arith.cmpi slt, %in, %c0_i64 : i64
%52 = arith.select %51, %c0_i64, %in : i64
linalg.yield %52 : i64
} -> tensor<16xi64>
%8 = linalg.generic {indexing_maps = [#map], iterator_types = ["parallel"]} outs(%0 : tensor<16xi64>) {
^bb0(%out: i64):
%51 = linalg.index 0 : index
%52 = arith.index_cast %51 : index to i64
linalg.yield %52 : i64
} -> tensor<16xi64>
%9 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%8, %1 : tensor<16xi64>, tensor<16xi64>) outs(%0 : tensor<16xi64>) {
^bb0(%in: i64, %in_12: i64, %out: i64):
%51 = arith.muli %in, %in_12 : i64
linalg.yield %51 : i64
} -> tensor<16xi64>
%10 = tensor.empty() : tensor<16x1xi32>
%11 = linalg.fill ins(%c0_i32 : i32) outs(%10 : tensor<16x1xi32>) -> tensor<16x1xi32>
%12:2 = linalg.generic {indexing_maps = [#map1, #map], iterator_types = ["parallel"]} outs(%11, %2 : tensor<16x1xi32>, tensor<16xi64>) {
^bb0(%out: i32, %out_12: i64):
%51 = linalg.index 0 : index
%52 = arith.remsi %51, %c16 : index
%extracted_13 = tensor.extract %7[%52] : tensor<16xi64>
%extracted_14 = tensor.extract %9[%52] : tensor<16xi64>
%53 = arith.trunci %extracted_13 : i64 to i32
linalg.yield %53, %extracted_14 : i32, i64
} -> (tensor<16x1xi32>, tensor<16xi64>)
%13 = tm_tensor.scatter {dimension_map = array<i64: 0>} unique_indices(false) ins(%2, %12#0 : tensor<16xi64>, tensor<16x1xi32>) outs(%cst : tensor<16xi64>) {
^bb0(%arg3: i64, %arg4: i64):
tm_tensor.yield %arg3 : i64
} -> tensor<16xi64>
%14 = tm_tensor.scatter {dimension_map = array<i64: 0>} unique_indices(false) ins(%12#1, %12#0 : tensor<16xi64>, tensor<16x1xi32>) outs(%13 : tensor<16xi64>) {
^bb0(%arg3: i64, %arg4: i64):
%51 = arith.addi %arg3, %arg4 : i64
tm_tensor.yield %51 : i64
} -> tensor<16xi64>
%15 = linalg.generic {indexing_maps = [#map, #map2], iterator_types = ["reduction"]} ins(%1 : tensor<16xi64>) outs(%4 : tensor<i64>) {
^bb0(%in: i64, %out: i64):
%51 = arith.addi %in, %out : i64
linalg.yield %51 : i64
} -> tensor<i64>
%extracted = tensor.extract %15[] : tensor<i64>
%16 = arith.index_cast %extracted : i64 to index
%17 = arith.cmpi slt, %16, %c0 : index
%18 = arith.addi %16, %c16 : index
%19 = arith.select %17, %18, %16 : index
%20 = arith.cmpi slt, %19, %c0 : index
%21 = arith.select %20, %c-1, %19 : index
%22 = arith.cmpi sgt, %21, %c16 : index
%23 = arith.select %22, %c16, %21 : index
%24 = arith.cmpi slt, %23, %c0 : index
%25 = arith.select %24, %c0, %23 : index
%extracted_slice = tensor.extract_slice %14[0] [%25] [1] : tensor<16xi64> to tensor<?xi64>
%26 = tensor.empty() : tensor<2xi64>
%27 = linalg.fill ins(%c0_i64 : i64) outs(%26 : tensor<2xi64>) -> tensor<2xi64>
%28 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%cst_1 : tensor<2xi64>) outs(%27 : tensor<2xi64>) {
^bb0(%in: i64, %out: i64):
linalg.yield %c4_i64 : i64
} -> tensor<2xi64>
%29 = linalg.fill ins(%c1_i64 : i64) outs(%26 : tensor<2xi64>) -> tensor<2xi64>
%30 = linalg.fill ins(%c1_i64 : i64) outs(%3 : tensor<i64>) -> tensor<i64>
%31:2 = tm_tensor.scan dimension(0) inclusive(true) ins(%28 : tensor<2xi64>) outs(%29, %30 : tensor<2xi64>, tensor<i64>) {
^bb0(%arg3: i64, %arg4: i64):
%51 = arith.muli %arg3, %arg4 : i64
tm_tensor.yield %51 : i64
} -> tensor<2xi64>, tensor<i64>
%32 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%31#0 : tensor<2xi64>) outs(%27 : tensor<2xi64>) {
^bb0(%in: i64, %out: i64):
%51 = linalg.index 0 : index
%52 = arith.subi %c1, %51 : index
%extracted_12 = tensor.extract %31#0[%52] : tensor<2xi64>
linalg.yield %extracted_12 : i64
} -> tensor<2xi64>
%extracted_slice_3 = tensor.extract_slice %32[1] [1] [1] : tensor<2xi64> to tensor<1xi64>
%concat = tensor.concat dim(0) %extracted_slice_3, %cst_0 : (tensor<1xi64>, tensor<1xi64>) -> tensor<2xi64>
%expanded = tensor.expand_shape %extracted_slice [[0, 1]] output_shape [%25, 1] : tensor<?xi64> into tensor<?x1xi64>
%expanded_4 = tensor.expand_shape %concat [[0, 1]] output_shape [1, 2] : tensor<2xi64> into tensor<1x2xi64>
%33 = tensor.empty() : tensor<2x2xi64>
%34 = linalg.generic {indexing_maps = [#map3, #map4, #map5], iterator_types = ["parallel", "parallel"]} ins(%expanded, %expanded_4 : tensor<?x1xi64>, tensor<1x2xi64>) outs(%33 : tensor<2x2xi64>) {
^bb0(%in: i64, %in_12: i64, %out: i64):
%51 = arith.sitofp %in : i64 to f64
%52 = arith.sitofp %in_12 : i64 to f64
%53 = arith.divf %51, %52 : f64
%54 = math.floor %53 : f64
%55 = arith.fptosi %54 : f64 to i64
linalg.yield %55 : i64
} -> tensor<2x2xi64>
%35 = linalg.generic {indexing_maps = [#map5, #map6, #map5], iterator_types = ["parallel", "parallel"]} ins(%34, %cst_1 : tensor<2x2xi64>, tensor<2xi64>) outs(%33 : tensor<2x2xi64>) {
^bb0(%in: i64, %in_12: i64, %out: i64):
%51 = arith.remsi %in, %in_12 : i64
%52 = arith.cmpi ne, %51, %c0_i64 : i64
%53 = arith.cmpi slt, %in_12, %c0_i64 : i64
%54 = arith.cmpi slt, %51, %c0_i64 : i64
%55 = arith.xori %53, %54 : i1
%56 = arith.andi %52, %55 : i1
%57 = arith.addi %51, %in_12 : i64
%58 = arith.select %56, %57, %51 : i64
linalg.yield %58 : i64
} -> tensor<2x2xi64>
%transposed = linalg.transpose ins(%35 : tensor<2x2xi64>) outs(%33 : tensor<2x2xi64>) permutation = [1, 0]
%collapsed_5 = tensor.collapse_shape %arg2 [[0, 1]] : tensor<8x8xf32> into tensor<64xf32>
%extracted_slice_6 = tensor.extract_slice %collapsed_5[0] [2] [1] : tensor<64xf32> to tensor<2xf32>
%extracted_slice_7 = tensor.extract_slice %transposed[0, 0] [2, 1] [1, 1] : tensor<2x2xi64> to tensor<2x1xi64>
%36 = tensor.empty() : tensor<2x1xi1>
%37 = linalg.generic {indexing_maps = [#map3, #map5], iterator_types = ["parallel", "parallel"]} ins(%extracted_slice_7 : tensor<2x1xi64>) outs(%36 : tensor<2x1xi1>) {
^bb0(%in: i64, %out: i1):
%51 = arith.cmpi slt, %in, %c0_i64 : i64
linalg.yield %51 : i1
} -> tensor<2x1xi1>
%38 = tensor.empty() : tensor<2x1xi64>
%39 = linalg.generic {indexing_maps = [#map3, #map5], iterator_types = ["parallel", "parallel"]} ins(%extracted_slice_7 : tensor<2x1xi64>) outs(%38 : tensor<2x1xi64>) {
^bb0(%in: i64, %out: i64):
%51 = arith.addi %in, %c4_i64 : i64
linalg.yield %51 : i64
} -> tensor<2x1xi64>
%40 = linalg.generic {indexing_maps = [#map3, #map3, #map3, #map5], iterator_types = ["parallel", "parallel"]} ins(%37, %39, %extracted_slice_7 : tensor<2x1xi1>, tensor<2x1xi64>, tensor<2x1xi64>) outs(%38 : tensor<2x1xi64>) {
^bb0(%in: i1, %in_12: i64, %in_13: i64, %out: i64):
%51 = arith.select %in, %in_12, %in_13 : i64
linalg.yield %51 : i64
} -> tensor<2x1xi64>
%extracted_slice_8 = tensor.extract_slice %transposed[0, 1] [2, 1] [1, 1] : tensor<2x2xi64> to tensor<2x1xi64>
%41 = linalg.generic {indexing_maps = [#map3, #map5], iterator_types = ["parallel", "parallel"]} ins(%extracted_slice_8 : tensor<2x1xi64>) outs(%36 : tensor<2x1xi1>) {
^bb0(%in: i64, %out: i1):
%51 = arith.cmpi slt, %in, %c0_i64 : i64
linalg.yield %51 : i1
} -> tensor<2x1xi1>
%42 = linalg.generic {indexing_maps = [#map3, #map5], iterator_types = ["parallel", "parallel"]} ins(%extracted_slice_8 : tensor<2x1xi64>) outs(%38 : tensor<2x1xi64>) {
^bb0(%in: i64, %out: i64):
%51 = arith.addi %in, %c4_i64 : i64
linalg.yield %51 : i64
} -> tensor<2x1xi64>
%43 = linalg.generic {indexing_maps = [#map3, #map3, #map3, #map5], iterator_types = ["parallel", "parallel"]} ins(%41, %42, %extracted_slice_8 : tensor<2x1xi1>, tensor<2x1xi64>, tensor<2x1xi64>) outs(%38 : tensor<2x1xi64>) {
^bb0(%in: i1, %in_12: i64, %in_13: i64, %out: i64):
%51 = arith.select %in, %in_12, %in_13 : i64
linalg.yield %51 : i64
} -> tensor<2x1xi64>
%44 = linalg.generic {indexing_maps = [#map3, #map3, #map5], iterator_types = ["parallel", "parallel"]} ins(%43, %40 : tensor<2x1xi64>, tensor<2x1xi64>) outs(%38 : tensor<2x1xi64>) {
^bb0(%in: i64, %in_12: i64, %out: i64):
%51 = arith.muli %in_12, %c4_i64 : i64
%52 = arith.addi %in, %51 : i64
linalg.yield %52 : i64
} -> tensor<2x1xi64>
%collapsed_9 = tensor.collapse_shape %44 [[0, 1]] : tensor<2x1xi64> into tensor<2xi64>
%collapsed_10 = tensor.collapse_shape %arg0 [[0, 1]] : tensor<4x4xf32> into tensor<16xf32>
%45 = tensor.empty() : tensor<2x1xi32>
%46 = linalg.fill ins(%c0_i32 : i32) outs(%45 : tensor<2x1xi32>) -> tensor<2x1xi32>
%47 = tensor.empty() : tensor<2xf32>
%48 = linalg.fill ins(%cst_2 : f32) outs(%47 : tensor<2xf32>) -> tensor<2xf32>
%49:2 = linalg.generic {indexing_maps = [#map1, #map], iterator_types = ["parallel"]} outs(%46, %48 : tensor<2x1xi32>, tensor<2xf32>) {
^bb0(%out: i32, %out_12: f32):
%51 = linalg.index 0 : index
%52 = arith.remsi %51, %c2 : index
%extracted_13 = tensor.extract %collapsed_9[%52] : tensor<2xi64>
%extracted_14 = tensor.extract %extracted_slice_6[%52] : tensor<2xf32>
%53 = arith.trunci %extracted_13 : i64 to i32
linalg.yield %53, %extracted_14 : i32, f32
} -> (tensor<2x1xi32>, tensor<2xf32>)
%50 = tm_tensor.scatter {dimension_map = array<i64: 0>} unique_indices(false) ins(%49#1, %49#0 : tensor<2xf32>, tensor<2x1xi32>) outs(%collapsed_10 : tensor<16xf32>) {
^bb0(%arg3: f32, %arg4: f32):
tm_tensor.yield %arg3 : f32
} -> tensor<16xf32>
%expanded_11 = tensor.expand_shape %50 [[0, 1]] output_shape [4, 4] : tensor<16xf32> into tensor<4x4xf32>
return %expanded_11 : tensor<4x4xf32>
}
}
Running MaskedScatterStaticBasic_basic...
ERROR: Runtime op verification failed
"memref.store"(%589, %550, %552, %554) <{nontemporal = false}> : (i64, memref<2x2xi64>, index, index) -> ()
^ out-of-bounds access
Location: loc("/Transpose")
./build_tools/ci/test_posix.sh: line 12: 156377 Aborted (core dumped) python -m e2e_testing.main --config=onnx -v -s
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment