Last active
November 20, 2024 00:33
-
-
Save AmosLewis/b748824331849257f76925380fc0169f to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Running MaskedFillTensorIntValueStaticModule_basic... | |
*** RUNNING TEST: MaskedScatterStaticBasic_basic *** | |
Compiling MaskedScatterStaticBasic_basic... | |
/proj/gdba/shark/chi/src/torch-mlir/mlir_venv/lib/python3.10/site-packages/torch/onnx/symbolic_opset10.py:513: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor). | |
return g.op("Constant", value_t=torch.tensor(list_or_value)) | |
==================== | |
ONNX RAW IR | |
module { | |
func.func @main_graph(%arg0: !torch.vtensor<[4,4],f32>, %arg1: !torch.vtensor<[4,4],i1>, %arg2: !torch.vtensor<[8,8],f32>) -> !torch.vtensor<[4,4],f32> attributes {torch.onnx_meta.ir_version = 9 : si64, torch.onnx_meta.opset_version = 20 : si64, torch.onnx_meta.producer_name = "pytorch", torch.onnx_meta.producer_version = "2.6.0"} { | |
%none = torch.constant.none | |
%0 = torch.operator "onnx.Shape"(%arg0) : (!torch.vtensor<[4,4],f32>) -> !torch.vtensor<[2],si64> | |
%1 = torch.operator "onnx.Expand"(%arg1, %0) : (!torch.vtensor<[4,4],i1>, !torch.vtensor<[2],si64>) -> !torch.vtensor<[4,4],i1> | |
%2 = torch.operator "onnx.NonZero"(%1) : (!torch.vtensor<[4,4],i1>) -> !torch.vtensor<[2,?],si64> | |
%3 = torch.operator "onnx.Transpose"(%2) {torch.onnx.perm = [1 : si64, 0 : si64]} : (!torch.vtensor<[2,?],si64>) -> !torch.vtensor<[?,2],si64> | |
%4 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> | |
%5 = torch.operator "onnx.Reshape"(%arg2, %4) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[8,8],f32>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[64],f32> | |
%6 = torch.operator "onnx.Shape"(%3) : (!torch.vtensor<[?,2],si64>) -> !torch.vtensor<[2],si64> | |
%7 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__1> : tensor<si64>} : () -> !torch.vtensor<[],si64> | |
%8 = torch.operator "onnx.Gather"(%6, %7) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[2],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> | |
%9 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__2> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> | |
%10 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__3> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> | |
%11 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__4> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> | |
%12 = torch.operator "onnx.Unsqueeze"(%8, %11) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> | |
%13 = torch.operator "onnx.Slice"(%5, %10, %12, %9) : (!torch.vtensor<[64],f32>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?],f32> | |
%14 = torch.operator "onnx.ScatterND"(%arg0, %3, %13) : (!torch.vtensor<[4,4],f32>, !torch.vtensor<[?,2],si64>, !torch.vtensor<[?],f32>) -> !torch.vtensor<[4,4],f32> | |
return %14 : !torch.vtensor<[4,4],f32> | |
} | |
} | |
{-# | |
dialect_resources: { | |
builtin: { | |
_: "0x08000000FFFFFFFFFFFFFFFF", | |
__1: "0x080000000000000000000000", | |
__2: "0x080000000000000000000000", | |
__3: "0x080000000000000000000000", | |
__4: "0x080000000000000000000000" | |
} | |
} | |
#-} | |
==================== | |
Torch IR | |
module { | |
func.func @main_graph(%arg0: !torch.vtensor<[4,4],f32>, %arg1: !torch.vtensor<[4,4],i1>, %arg2: !torch.vtensor<[8,8],f32>) -> !torch.vtensor<[4,4],f32> attributes {torch.onnx_meta.ir_version = 9 : si64, torch.onnx_meta.opset_version = 20 : si64, torch.onnx_meta.producer_name = "pytorch", torch.onnx_meta.producer_version = "2.6.0"} { | |
%str = torch.constant.str "floor" | |
%0 = torch.vtensor.literal(dense<1> : tensor<1xsi64>) : !torch.vtensor<[1],si64> | |
%str_0 = torch.constant.str "sum" | |
%1 = torch.vtensor.literal(dense<0> : tensor<16xsi64>) : !torch.vtensor<[16],si64> | |
%int16 = torch.constant.int 16 | |
%none = torch.constant.none | |
%false = torch.constant.bool false | |
%2 = torch.vtensor.literal(dense<4> : tensor<2xsi64>) : !torch.vtensor<[2],si64> | |
%int4 = torch.constant.int 4 | |
%int2 = torch.constant.int 2 | |
%int1 = torch.constant.int 1 | |
%int0 = torch.constant.int 0 | |
%3 = torch.aten.flatten.using_ints %arg1, %int0, %int1 : !torch.vtensor<[4,4],i1>, !torch.int, !torch.int -> !torch.vtensor<[16],i1> | |
%4 = torch.aten.ne.Scalar %3, %int0 : !torch.vtensor<[16],i1>, !torch.int -> !torch.vtensor<[16],i1> | |
%5 = torch.aten.to.dtype %4, %int4, %false, %false, %none : !torch.vtensor<[16],i1>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[16],si64> | |
%6 = torch.aten.cumsum %5, %int0, %none : !torch.vtensor<[16],si64>, !torch.int, !torch.none -> !torch.vtensor<[16],si64> | |
%7 = torch.aten.sub.Scalar %6, %int1, %int1 : !torch.vtensor<[16],si64>, !torch.int, !torch.int -> !torch.vtensor<[16],si64> | |
%8 = torch.aten.clamp %7, %int0, %none : !torch.vtensor<[16],si64>, !torch.int, !torch.none -> !torch.vtensor<[16],si64> | |
%9 = torch.aten.arange.start_step %int0, %int16, %int1, %none, %none, %none, %none : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.none, !torch.none -> !torch.vtensor<[16],si64> | |
%10 = torch.aten.mul.Tensor %9, %5 : !torch.vtensor<[16],si64>, !torch.vtensor<[16],si64> -> !torch.vtensor<[16],si64> | |
%11 = torch.aten.scatter_reduce.two %1, %int0, %8, %10, %str_0, %false : !torch.vtensor<[16],si64>, !torch.int, !torch.vtensor<[16],si64>, !torch.vtensor<[16],si64>, !torch.str, !torch.bool -> !torch.vtensor<[16],si64> | |
%12 = torch.aten.sum %5, %none : !torch.vtensor<[16],si64>, !torch.none -> !torch.vtensor<[],si64> | |
%13 = torch.aten.Int.Tensor %12 : !torch.vtensor<[],si64> -> !torch.int | |
%14 = torch.aten.slice.Tensor %11, %int0, %int0, %13, %int1 : !torch.vtensor<[16],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?],si64> | |
%15 = torch.prim.ListConstruct %int0 : (!torch.int) -> !torch.list<int> | |
%16 = torch.aten.flip %2, %15 : !torch.vtensor<[2],si64>, !torch.list<int> -> !torch.vtensor<[2],si64> | |
%17 = torch.aten.cumprod %16, %int0, %none : !torch.vtensor<[2],si64>, !torch.int, !torch.none -> !torch.vtensor<[2],si64> | |
%18 = torch.aten.flip %17, %15 : !torch.vtensor<[2],si64>, !torch.list<int> -> !torch.vtensor<[2],si64> | |
%19 = torch.aten.slice.Tensor %18, %int0, %int1, %int2, %int1 : !torch.vtensor<[2],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1],si64> | |
%20 = torch.prim.ListConstruct %19, %0 : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.list<vtensor<[?],si64>> | |
%21 = torch.aten.cat %20, %int0 : !torch.list<vtensor<[?],si64>>, !torch.int -> !torch.vtensor<[2],si64> | |
%22 = torch.aten.unsqueeze %14, %int1 : !torch.vtensor<[?],si64>, !torch.int -> !torch.vtensor<[?,1],si64> | |
%23 = torch.aten.unsqueeze %21, %int0 : !torch.vtensor<[2],si64>, !torch.int -> !torch.vtensor<[1,2],si64> | |
%24 = torch.aten.div.Tensor_mode %22, %23, %str : !torch.vtensor<[?,1],si64>, !torch.vtensor<[1,2],si64>, !torch.str -> !torch.vtensor<[?,2],si64> | |
%25 = torch.aten.remainder.Tensor %24, %2 : !torch.vtensor<[?,2],si64>, !torch.vtensor<[2],si64> -> !torch.vtensor<[2,2],si64> | |
%26 = torch.aten.transpose.int %25, %int0, %int1 : !torch.vtensor<[2,2],si64>, !torch.int, !torch.int -> !torch.vtensor<[2,2],si64> | |
%27 = torch.aten.flatten.using_ints %arg2, %int0, %int1 : !torch.vtensor<[8,8],f32>, !torch.int, !torch.int -> !torch.vtensor<[64],f32> | |
%28 = torch.aten.slice.Tensor %27, %int0, %int0, %int2, %int1 : !torch.vtensor<[64],f32>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2],f32> | |
%29 = torch.aten.slice.Tensor %26, %int1, %int0, %int1, %int1 : !torch.vtensor<[2,2],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1],si64> | |
%30 = torch.aten.lt.Scalar %29, %int0 : !torch.vtensor<[2,1],si64>, !torch.int -> !torch.vtensor<[2,1],i1> | |
%31 = torch.aten.add.Scalar %29, %int4, %int1 : !torch.vtensor<[2,1],si64>, !torch.int, !torch.int -> !torch.vtensor<[2,1],si64> | |
%32 = torch.aten.where.self %30, %31, %29 : !torch.vtensor<[2,1],i1>, !torch.vtensor<[2,1],si64>, !torch.vtensor<[2,1],si64> -> !torch.vtensor<[2,1],si64> | |
%33 = torch.aten.slice.Tensor %26, %int1, %int1, %int2, %int1 : !torch.vtensor<[2,2],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1],si64> | |
%34 = torch.aten.lt.Scalar %33, %int0 : !torch.vtensor<[2,1],si64>, !torch.int -> !torch.vtensor<[2,1],i1> | |
%35 = torch.aten.add.Scalar %33, %int4, %int1 : !torch.vtensor<[2,1],si64>, !torch.int, !torch.int -> !torch.vtensor<[2,1],si64> | |
%36 = torch.aten.where.self %34, %35, %33 : !torch.vtensor<[2,1],i1>, !torch.vtensor<[2,1],si64>, !torch.vtensor<[2,1],si64> -> !torch.vtensor<[2,1],si64> | |
%37 = torch.aten.add.Tensor %36, %32, %int4 : !torch.vtensor<[2,1],si64>, !torch.vtensor<[2,1],si64>, !torch.int -> !torch.vtensor<[2,1],si64> | |
%38 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> | |
%39 = torch.aten.view %37, %38 : !torch.vtensor<[2,1],si64>, !torch.list<int> -> !torch.vtensor<[2],si64> | |
%40 = torch.aten.flatten.using_ints %39, %int0, %int0 : !torch.vtensor<[2],si64>, !torch.int, !torch.int -> !torch.vtensor<[2],si64> | |
%41 = torch.aten.flatten.using_ints %28, %int0, %int0 : !torch.vtensor<[2],f32>, !torch.int, !torch.int -> !torch.vtensor<[2],f32> | |
%42 = torch.aten.flatten.using_ints %arg0, %int0, %int1 : !torch.vtensor<[4,4],f32>, !torch.int, !torch.int -> !torch.vtensor<[16],f32> | |
%43 = torch.aten.scatter.src %42, %int0, %40, %41 : !torch.vtensor<[16],f32>, !torch.int, !torch.vtensor<[2],si64>, !torch.vtensor<[2],f32> -> !torch.vtensor<[16],f32> | |
%44 = torch.prim.ListConstruct %int4, %int4 : (!torch.int, !torch.int) -> !torch.list<int> | |
%45 = torch.aten.unflatten.int %43, %int0, %44 : !torch.vtensor<[16],f32>, !torch.int, !torch.list<int> -> !torch.vtensor<[4,4],f32> | |
return %45 : !torch.vtensor<[4,4],f32> | |
} | |
} | |
==================== | |
Torch Backend IR | |
module { | |
func.func @main_graph(%arg0: !torch.vtensor<[4,4],f32>, %arg1: !torch.vtensor<[4,4],i1>, %arg2: !torch.vtensor<[8,8],f32>) -> !torch.vtensor<[4,4],f32> attributes {torch.onnx_meta.ir_version = 9 : si64, torch.onnx_meta.opset_version = 20 : si64, torch.onnx_meta.producer_name = "pytorch", torch.onnx_meta.producer_version = "2.6.0"} { | |
%str = torch.constant.str "floor" | |
%0 = torch.vtensor.literal(dense<1> : tensor<1xsi64>) : !torch.vtensor<[1],si64> | |
%str_0 = torch.constant.str "sum" | |
%1 = torch.vtensor.literal(dense<0> : tensor<16xsi64>) : !torch.vtensor<[16],si64> | |
%int16 = torch.constant.int 16 | |
%none = torch.constant.none | |
%false = torch.constant.bool false | |
%2 = torch.vtensor.literal(dense<4> : tensor<2xsi64>) : !torch.vtensor<[2],si64> | |
%int4 = torch.constant.int 4 | |
%int2 = torch.constant.int 2 | |
%int1 = torch.constant.int 1 | |
%int0 = torch.constant.int 0 | |
%3 = torch.aten.flatten.using_ints %arg1, %int0, %int1 : !torch.vtensor<[4,4],i1>, !torch.int, !torch.int -> !torch.vtensor<[16],i1> | |
%4 = torch.aten.ne.Scalar %3, %int0 : !torch.vtensor<[16],i1>, !torch.int -> !torch.vtensor<[16],i1> | |
%5 = torch.aten.to.dtype %4, %int4, %false, %false, %none : !torch.vtensor<[16],i1>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[16],si64> | |
%6 = torch.aten.cumsum %5, %int0, %none : !torch.vtensor<[16],si64>, !torch.int, !torch.none -> !torch.vtensor<[16],si64> | |
%7 = torch.aten.sub.Scalar %6, %int1, %int1 : !torch.vtensor<[16],si64>, !torch.int, !torch.int -> !torch.vtensor<[16],si64> | |
%8 = torch.aten.clamp %7, %int0, %none : !torch.vtensor<[16],si64>, !torch.int, !torch.none -> !torch.vtensor<[16],si64> | |
%9 = torch.aten.arange.start_step %int0, %int16, %int1, %none, %none, %none, %none : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.none, !torch.none -> !torch.vtensor<[16],si64> | |
%10 = torch.aten.mul.Tensor %9, %5 : !torch.vtensor<[16],si64>, !torch.vtensor<[16],si64> -> !torch.vtensor<[16],si64> | |
%11 = torch.aten.scatter_reduce.two %1, %int0, %8, %10, %str_0, %false : !torch.vtensor<[16],si64>, !torch.int, !torch.vtensor<[16],si64>, !torch.vtensor<[16],si64>, !torch.str, !torch.bool -> !torch.vtensor<[16],si64> | |
%12 = torch.aten.sum %5, %none : !torch.vtensor<[16],si64>, !torch.none -> !torch.vtensor<[],si64> | |
%13 = torch.aten.Int.Tensor %12 : !torch.vtensor<[],si64> -> !torch.int | |
%14 = torch.aten.slice.Tensor %11, %int0, %int0, %13, %int1 : !torch.vtensor<[16],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?],si64> | |
%15 = torch.prim.ListConstruct %int0 : (!torch.int) -> !torch.list<int> | |
%16 = torch.aten.flip %2, %15 : !torch.vtensor<[2],si64>, !torch.list<int> -> !torch.vtensor<[2],si64> | |
%17 = torch.aten.cumprod %16, %int0, %none : !torch.vtensor<[2],si64>, !torch.int, !torch.none -> !torch.vtensor<[2],si64> | |
%18 = torch.aten.flip %17, %15 : !torch.vtensor<[2],si64>, !torch.list<int> -> !torch.vtensor<[2],si64> | |
%19 = torch.aten.slice.Tensor %18, %int0, %int1, %int2, %int1 : !torch.vtensor<[2],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1],si64> | |
%20 = torch.prim.ListConstruct %19, %0 : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.list<vtensor<[?],si64>> | |
%21 = torch.aten.cat %20, %int0 : !torch.list<vtensor<[?],si64>>, !torch.int -> !torch.vtensor<[2],si64> | |
%22 = torch.aten.unsqueeze %14, %int1 : !torch.vtensor<[?],si64>, !torch.int -> !torch.vtensor<[?,1],si64> | |
%23 = torch.aten.unsqueeze %21, %int0 : !torch.vtensor<[2],si64>, !torch.int -> !torch.vtensor<[1,2],si64> | |
%24 = torch.aten.div.Tensor_mode %22, %23, %str : !torch.vtensor<[?,1],si64>, !torch.vtensor<[1,2],si64>, !torch.str -> !torch.vtensor<[?,2],si64> | |
%25 = torch.aten.remainder.Tensor %24, %2 : !torch.vtensor<[?,2],si64>, !torch.vtensor<[2],si64> -> !torch.vtensor<[2,2],si64> | |
%26 = torch.aten.transpose.int %25, %int0, %int1 : !torch.vtensor<[2,2],si64>, !torch.int, !torch.int -> !torch.vtensor<[2,2],si64> | |
%27 = torch.aten.flatten.using_ints %arg2, %int0, %int1 : !torch.vtensor<[8,8],f32>, !torch.int, !torch.int -> !torch.vtensor<[64],f32> | |
%28 = torch.aten.slice.Tensor %27, %int0, %int0, %int2, %int1 : !torch.vtensor<[64],f32>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2],f32> | |
%29 = torch.aten.slice.Tensor %26, %int1, %int0, %int1, %int1 : !torch.vtensor<[2,2],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1],si64> | |
%30 = torch.aten.lt.Scalar %29, %int0 : !torch.vtensor<[2,1],si64>, !torch.int -> !torch.vtensor<[2,1],i1> | |
%31 = torch.aten.add.Scalar %29, %int4, %int1 : !torch.vtensor<[2,1],si64>, !torch.int, !torch.int -> !torch.vtensor<[2,1],si64> | |
%32 = torch.aten.where.self %30, %31, %29 : !torch.vtensor<[2,1],i1>, !torch.vtensor<[2,1],si64>, !torch.vtensor<[2,1],si64> -> !torch.vtensor<[2,1],si64> | |
%33 = torch.aten.slice.Tensor %26, %int1, %int1, %int2, %int1 : !torch.vtensor<[2,2],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1],si64> | |
%34 = torch.aten.lt.Scalar %33, %int0 : !torch.vtensor<[2,1],si64>, !torch.int -> !torch.vtensor<[2,1],i1> | |
%35 = torch.aten.add.Scalar %33, %int4, %int1 : !torch.vtensor<[2,1],si64>, !torch.int, !torch.int -> !torch.vtensor<[2,1],si64> | |
%36 = torch.aten.where.self %34, %35, %33 : !torch.vtensor<[2,1],i1>, !torch.vtensor<[2,1],si64>, !torch.vtensor<[2,1],si64> -> !torch.vtensor<[2,1],si64> | |
%37 = torch.aten.add.Tensor %36, %32, %int4 : !torch.vtensor<[2,1],si64>, !torch.vtensor<[2,1],si64>, !torch.int -> !torch.vtensor<[2,1],si64> | |
%38 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> | |
%39 = torch.aten.view %37, %38 : !torch.vtensor<[2,1],si64>, !torch.list<int> -> !torch.vtensor<[2],si64> | |
%40 = torch.aten.flatten.using_ints %39, %int0, %int0 : !torch.vtensor<[2],si64>, !torch.int, !torch.int -> !torch.vtensor<[2],si64> | |
%41 = torch.aten.flatten.using_ints %28, %int0, %int0 : !torch.vtensor<[2],f32>, !torch.int, !torch.int -> !torch.vtensor<[2],f32> | |
%42 = torch.aten.flatten.using_ints %arg0, %int0, %int1 : !torch.vtensor<[4,4],f32>, !torch.int, !torch.int -> !torch.vtensor<[16],f32> | |
%43 = torch.aten.scatter.src %42, %int0, %40, %41 : !torch.vtensor<[16],f32>, !torch.int, !torch.vtensor<[2],si64>, !torch.vtensor<[2],f32> -> !torch.vtensor<[16],f32> | |
%44 = torch.prim.ListConstruct %int4, %int4 : (!torch.int, !torch.int) -> !torch.list<int> | |
%45 = torch.aten.unflatten.int %43, %int0, %44 : !torch.vtensor<[16],f32>, !torch.int, !torch.list<int> -> !torch.vtensor<[4,4],f32> | |
return %45 : !torch.vtensor<[4,4],f32> | |
} | |
} | |
==================== | |
LINALG Backend IR | |
#map = affine_map<(d0) -> (d0)> | |
#map1 = affine_map<(d0) -> (d0, 0)> | |
#map2 = affine_map<(d0) -> ()> | |
#map3 = affine_map<(d0, d1) -> (d0, 0)> | |
#map4 = affine_map<(d0, d1) -> (0, d1)> | |
#map5 = affine_map<(d0, d1) -> (d0, d1)> | |
#map6 = affine_map<(d0, d1) -> (d1)> | |
module { | |
ml_program.global private mutable @global_seed(dense<0> : tensor<i64>) : tensor<i64> | |
func.func @main_graph(%arg0: tensor<4x4xf32>, %arg1: tensor<4x4xi1>, %arg2: tensor<8x8xf32>) -> tensor<4x4xf32> { | |
%cst = arith.constant dense<0> : tensor<16xi64> | |
%c0 = arith.constant 0 : index | |
%c-1 = arith.constant -1 : index | |
%c1 = arith.constant 1 : index | |
%cst_0 = arith.constant dense<1> : tensor<1xi64> | |
%cst_1 = arith.constant dense<4> : tensor<2xi64> | |
%c4_i64 = arith.constant 4 : i64 | |
%c16 = arith.constant 16 : index | |
%c0_i64 = arith.constant 0 : i64 | |
%c0_i32 = arith.constant 0 : i32 | |
%c2 = arith.constant 2 : index | |
%c1_i64 = arith.constant 1 : i64 | |
%cst_2 = arith.constant 0.000000e+00 : f32 | |
%collapsed = tensor.collapse_shape %arg1 [[0, 1]] : tensor<4x4xi1> into tensor<16xi1> | |
%0 = tensor.empty() : tensor<16xi64> | |
%1 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%collapsed : tensor<16xi1>) outs(%0 : tensor<16xi64>) { | |
^bb0(%in: i1, %out: i64): | |
%51 = arith.extui %in : i1 to i64 | |
linalg.yield %51 : i64 | |
} -> tensor<16xi64> | |
%2 = linalg.fill ins(%c0_i64 : i64) outs(%0 : tensor<16xi64>) -> tensor<16xi64> | |
%3 = tensor.empty() : tensor<i64> | |
%4 = linalg.fill ins(%c0_i64 : i64) outs(%3 : tensor<i64>) -> tensor<i64> | |
%5:2 = tm_tensor.scan dimension(0) inclusive(true) ins(%1 : tensor<16xi64>) outs(%2, %4 : tensor<16xi64>, tensor<i64>) { | |
^bb0(%arg3: i64, %arg4: i64): | |
%51 = arith.addi %arg3, %arg4 : i64 | |
tm_tensor.yield %51 : i64 | |
} -> tensor<16xi64>, tensor<i64> | |
%6 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%5#0 : tensor<16xi64>) outs(%0 : tensor<16xi64>) { | |
^bb0(%in: i64, %out: i64): | |
%51 = arith.subi %in, %c1_i64 : i64 | |
linalg.yield %51 : i64 | |
} -> tensor<16xi64> | |
%7 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%6 : tensor<16xi64>) outs(%0 : tensor<16xi64>) { | |
^bb0(%in: i64, %out: i64): | |
%51 = arith.cmpi slt, %in, %c0_i64 : i64 | |
%52 = arith.select %51, %c0_i64, %in : i64 | |
linalg.yield %52 : i64 | |
} -> tensor<16xi64> | |
%8 = linalg.generic {indexing_maps = [#map], iterator_types = ["parallel"]} outs(%0 : tensor<16xi64>) { | |
^bb0(%out: i64): | |
%51 = linalg.index 0 : index | |
%52 = arith.index_cast %51 : index to i64 | |
linalg.yield %52 : i64 | |
} -> tensor<16xi64> | |
%9 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%8, %1 : tensor<16xi64>, tensor<16xi64>) outs(%0 : tensor<16xi64>) { | |
^bb0(%in: i64, %in_12: i64, %out: i64): | |
%51 = arith.muli %in, %in_12 : i64 | |
linalg.yield %51 : i64 | |
} -> tensor<16xi64> | |
%10 = tensor.empty() : tensor<16x1xi32> | |
%11 = linalg.fill ins(%c0_i32 : i32) outs(%10 : tensor<16x1xi32>) -> tensor<16x1xi32> | |
%12:2 = linalg.generic {indexing_maps = [#map1, #map], iterator_types = ["parallel"]} outs(%11, %2 : tensor<16x1xi32>, tensor<16xi64>) { | |
^bb0(%out: i32, %out_12: i64): | |
%51 = linalg.index 0 : index | |
%52 = arith.remsi %51, %c16 : index | |
%extracted_13 = tensor.extract %7[%52] : tensor<16xi64> | |
%extracted_14 = tensor.extract %9[%52] : tensor<16xi64> | |
%53 = arith.trunci %extracted_13 : i64 to i32 | |
linalg.yield %53, %extracted_14 : i32, i64 | |
} -> (tensor<16x1xi32>, tensor<16xi64>) | |
%13 = tm_tensor.scatter {dimension_map = array<i64: 0>} unique_indices(false) ins(%2, %12#0 : tensor<16xi64>, tensor<16x1xi32>) outs(%cst : tensor<16xi64>) { | |
^bb0(%arg3: i64, %arg4: i64): | |
tm_tensor.yield %arg3 : i64 | |
} -> tensor<16xi64> | |
%14 = tm_tensor.scatter {dimension_map = array<i64: 0>} unique_indices(false) ins(%12#1, %12#0 : tensor<16xi64>, tensor<16x1xi32>) outs(%13 : tensor<16xi64>) { | |
^bb0(%arg3: i64, %arg4: i64): | |
%51 = arith.addi %arg3, %arg4 : i64 | |
tm_tensor.yield %51 : i64 | |
} -> tensor<16xi64> | |
%15 = linalg.generic {indexing_maps = [#map, #map2], iterator_types = ["reduction"]} ins(%1 : tensor<16xi64>) outs(%4 : tensor<i64>) { | |
^bb0(%in: i64, %out: i64): | |
%51 = arith.addi %in, %out : i64 | |
linalg.yield %51 : i64 | |
} -> tensor<i64> | |
%extracted = tensor.extract %15[] : tensor<i64> | |
%16 = arith.index_cast %extracted : i64 to index | |
%17 = arith.cmpi slt, %16, %c0 : index | |
%18 = arith.addi %16, %c16 : index | |
%19 = arith.select %17, %18, %16 : index | |
%20 = arith.cmpi slt, %19, %c0 : index | |
%21 = arith.select %20, %c-1, %19 : index | |
%22 = arith.cmpi sgt, %21, %c16 : index | |
%23 = arith.select %22, %c16, %21 : index | |
%24 = arith.cmpi slt, %23, %c0 : index | |
%25 = arith.select %24, %c0, %23 : index | |
%extracted_slice = tensor.extract_slice %14[0] [%25] [1] : tensor<16xi64> to tensor<?xi64> | |
%26 = tensor.empty() : tensor<2xi64> | |
%27 = linalg.fill ins(%c0_i64 : i64) outs(%26 : tensor<2xi64>) -> tensor<2xi64> | |
%28 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%cst_1 : tensor<2xi64>) outs(%27 : tensor<2xi64>) { | |
^bb0(%in: i64, %out: i64): | |
linalg.yield %c4_i64 : i64 | |
} -> tensor<2xi64> | |
%29 = linalg.fill ins(%c1_i64 : i64) outs(%26 : tensor<2xi64>) -> tensor<2xi64> | |
%30 = linalg.fill ins(%c1_i64 : i64) outs(%3 : tensor<i64>) -> tensor<i64> | |
%31:2 = tm_tensor.scan dimension(0) inclusive(true) ins(%28 : tensor<2xi64>) outs(%29, %30 : tensor<2xi64>, tensor<i64>) { | |
^bb0(%arg3: i64, %arg4: i64): | |
%51 = arith.muli %arg3, %arg4 : i64 | |
tm_tensor.yield %51 : i64 | |
} -> tensor<2xi64>, tensor<i64> | |
%32 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%31#0 : tensor<2xi64>) outs(%27 : tensor<2xi64>) { | |
^bb0(%in: i64, %out: i64): | |
%51 = linalg.index 0 : index | |
%52 = arith.subi %c1, %51 : index | |
%extracted_12 = tensor.extract %31#0[%52] : tensor<2xi64> | |
linalg.yield %extracted_12 : i64 | |
} -> tensor<2xi64> | |
%extracted_slice_3 = tensor.extract_slice %32[1] [1] [1] : tensor<2xi64> to tensor<1xi64> | |
%concat = tensor.concat dim(0) %extracted_slice_3, %cst_0 : (tensor<1xi64>, tensor<1xi64>) -> tensor<2xi64> | |
%expanded = tensor.expand_shape %extracted_slice [[0, 1]] output_shape [%25, 1] : tensor<?xi64> into tensor<?x1xi64> | |
%expanded_4 = tensor.expand_shape %concat [[0, 1]] output_shape [1, 2] : tensor<2xi64> into tensor<1x2xi64> | |
%33 = tensor.empty() : tensor<2x2xi64> | |
%34 = linalg.generic {indexing_maps = [#map3, #map4, #map5], iterator_types = ["parallel", "parallel"]} ins(%expanded, %expanded_4 : tensor<?x1xi64>, tensor<1x2xi64>) outs(%33 : tensor<2x2xi64>) { | |
^bb0(%in: i64, %in_12: i64, %out: i64): | |
%51 = arith.sitofp %in : i64 to f64 | |
%52 = arith.sitofp %in_12 : i64 to f64 | |
%53 = arith.divf %51, %52 : f64 | |
%54 = math.floor %53 : f64 | |
%55 = arith.fptosi %54 : f64 to i64 | |
linalg.yield %55 : i64 | |
} -> tensor<2x2xi64> | |
%35 = linalg.generic {indexing_maps = [#map5, #map6, #map5], iterator_types = ["parallel", "parallel"]} ins(%34, %cst_1 : tensor<2x2xi64>, tensor<2xi64>) outs(%33 : tensor<2x2xi64>) { | |
^bb0(%in: i64, %in_12: i64, %out: i64): | |
%51 = arith.remsi %in, %in_12 : i64 | |
%52 = arith.cmpi ne, %51, %c0_i64 : i64 | |
%53 = arith.cmpi slt, %in_12, %c0_i64 : i64 | |
%54 = arith.cmpi slt, %51, %c0_i64 : i64 | |
%55 = arith.xori %53, %54 : i1 | |
%56 = arith.andi %52, %55 : i1 | |
%57 = arith.addi %51, %in_12 : i64 | |
%58 = arith.select %56, %57, %51 : i64 | |
linalg.yield %58 : i64 | |
} -> tensor<2x2xi64> | |
%transposed = linalg.transpose ins(%35 : tensor<2x2xi64>) outs(%33 : tensor<2x2xi64>) permutation = [1, 0] | |
%collapsed_5 = tensor.collapse_shape %arg2 [[0, 1]] : tensor<8x8xf32> into tensor<64xf32> | |
%extracted_slice_6 = tensor.extract_slice %collapsed_5[0] [2] [1] : tensor<64xf32> to tensor<2xf32> | |
%extracted_slice_7 = tensor.extract_slice %transposed[0, 0] [2, 1] [1, 1] : tensor<2x2xi64> to tensor<2x1xi64> | |
%36 = tensor.empty() : tensor<2x1xi1> | |
%37 = linalg.generic {indexing_maps = [#map3, #map5], iterator_types = ["parallel", "parallel"]} ins(%extracted_slice_7 : tensor<2x1xi64>) outs(%36 : tensor<2x1xi1>) { | |
^bb0(%in: i64, %out: i1): | |
%51 = arith.cmpi slt, %in, %c0_i64 : i64 | |
linalg.yield %51 : i1 | |
} -> tensor<2x1xi1> | |
%38 = tensor.empty() : tensor<2x1xi64> | |
%39 = linalg.generic {indexing_maps = [#map3, #map5], iterator_types = ["parallel", "parallel"]} ins(%extracted_slice_7 : tensor<2x1xi64>) outs(%38 : tensor<2x1xi64>) { | |
^bb0(%in: i64, %out: i64): | |
%51 = arith.addi %in, %c4_i64 : i64 | |
linalg.yield %51 : i64 | |
} -> tensor<2x1xi64> | |
%40 = linalg.generic {indexing_maps = [#map3, #map3, #map3, #map5], iterator_types = ["parallel", "parallel"]} ins(%37, %39, %extracted_slice_7 : tensor<2x1xi1>, tensor<2x1xi64>, tensor<2x1xi64>) outs(%38 : tensor<2x1xi64>) { | |
^bb0(%in: i1, %in_12: i64, %in_13: i64, %out: i64): | |
%51 = arith.select %in, %in_12, %in_13 : i64 | |
linalg.yield %51 : i64 | |
} -> tensor<2x1xi64> | |
%extracted_slice_8 = tensor.extract_slice %transposed[0, 1] [2, 1] [1, 1] : tensor<2x2xi64> to tensor<2x1xi64> | |
%41 = linalg.generic {indexing_maps = [#map3, #map5], iterator_types = ["parallel", "parallel"]} ins(%extracted_slice_8 : tensor<2x1xi64>) outs(%36 : tensor<2x1xi1>) { | |
^bb0(%in: i64, %out: i1): | |
%51 = arith.cmpi slt, %in, %c0_i64 : i64 | |
linalg.yield %51 : i1 | |
} -> tensor<2x1xi1> | |
%42 = linalg.generic {indexing_maps = [#map3, #map5], iterator_types = ["parallel", "parallel"]} ins(%extracted_slice_8 : tensor<2x1xi64>) outs(%38 : tensor<2x1xi64>) { | |
^bb0(%in: i64, %out: i64): | |
%51 = arith.addi %in, %c4_i64 : i64 | |
linalg.yield %51 : i64 | |
} -> tensor<2x1xi64> | |
%43 = linalg.generic {indexing_maps = [#map3, #map3, #map3, #map5], iterator_types = ["parallel", "parallel"]} ins(%41, %42, %extracted_slice_8 : tensor<2x1xi1>, tensor<2x1xi64>, tensor<2x1xi64>) outs(%38 : tensor<2x1xi64>) { | |
^bb0(%in: i1, %in_12: i64, %in_13: i64, %out: i64): | |
%51 = arith.select %in, %in_12, %in_13 : i64 | |
linalg.yield %51 : i64 | |
} -> tensor<2x1xi64> | |
%44 = linalg.generic {indexing_maps = [#map3, #map3, #map5], iterator_types = ["parallel", "parallel"]} ins(%43, %40 : tensor<2x1xi64>, tensor<2x1xi64>) outs(%38 : tensor<2x1xi64>) { | |
^bb0(%in: i64, %in_12: i64, %out: i64): | |
%51 = arith.muli %in_12, %c4_i64 : i64 | |
%52 = arith.addi %in, %51 : i64 | |
linalg.yield %52 : i64 | |
} -> tensor<2x1xi64> | |
%collapsed_9 = tensor.collapse_shape %44 [[0, 1]] : tensor<2x1xi64> into tensor<2xi64> | |
%collapsed_10 = tensor.collapse_shape %arg0 [[0, 1]] : tensor<4x4xf32> into tensor<16xf32> | |
%45 = tensor.empty() : tensor<2x1xi32> | |
%46 = linalg.fill ins(%c0_i32 : i32) outs(%45 : tensor<2x1xi32>) -> tensor<2x1xi32> | |
%47 = tensor.empty() : tensor<2xf32> | |
%48 = linalg.fill ins(%cst_2 : f32) outs(%47 : tensor<2xf32>) -> tensor<2xf32> | |
%49:2 = linalg.generic {indexing_maps = [#map1, #map], iterator_types = ["parallel"]} outs(%46, %48 : tensor<2x1xi32>, tensor<2xf32>) { | |
^bb0(%out: i32, %out_12: f32): | |
%51 = linalg.index 0 : index | |
%52 = arith.remsi %51, %c2 : index | |
%extracted_13 = tensor.extract %collapsed_9[%52] : tensor<2xi64> | |
%extracted_14 = tensor.extract %extracted_slice_6[%52] : tensor<2xf32> | |
%53 = arith.trunci %extracted_13 : i64 to i32 | |
linalg.yield %53, %extracted_14 : i32, f32 | |
} -> (tensor<2x1xi32>, tensor<2xf32>) | |
%50 = tm_tensor.scatter {dimension_map = array<i64: 0>} unique_indices(false) ins(%49#1, %49#0 : tensor<2xf32>, tensor<2x1xi32>) outs(%collapsed_10 : tensor<16xf32>) { | |
^bb0(%arg3: f32, %arg4: f32): | |
tm_tensor.yield %arg3 : f32 | |
} -> tensor<16xf32> | |
%expanded_11 = tensor.expand_shape %50 [[0, 1]] output_shape [4, 4] : tensor<16xf32> into tensor<4x4xf32> | |
return %expanded_11 : tensor<4x4xf32> | |
} | |
} | |
Running MaskedScatterStaticBasic_basic... | |
ERROR: Runtime op verification failed | |
"memref.store"(%589, %550, %552, %554) <{nontemporal = false}> : (i64, memref<2x2xi64>, index, index) -> () | |
^ out-of-bounds access | |
Location: loc("/Transpose") | |
./build_tools/ci/test_posix.sh: line 12: 156377 Aborted (core dumped) python -m e2e_testing.main --config=onnx -v -s |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment