Skip to content

Instantly share code, notes, and snippets.

@pashu123
Created February 22, 2023 17:07
Show Gist options
  • Save pashu123/a4f7b1cc30a81da85ecc35df3c99ac53 to your computer and use it in GitHub Desktop.
Save pashu123/a4f7b1cc30a81da85ecc35df3c99ac53 to your computer and use it in GitHub Desktop.
Args: {'output_path': '.', 'quant_device': 'cuda', 'batch_size': 2, 'bit_width': 8, 'conv_group_size': 16, 'linear_group_size': 16, 'layer_type': 'conv', 'weight_quant': 'group_scale_param', 'input_quant': 'group_dynamic_sym', 'float16': True, 'inline_ts_graph': False, 'onnx_export': False, 'make_fx': True, 'quantize': True}
Moving model to cuda...
Run model quantization...
Quantize conv2d: conv
Tracing to FX functional representation...
FX graph model...
graph():
%arg0_1 : [#users=2] = placeholder[target=arg0_1]
%_tensor_constant0 : [#users=1] = get_attr[target=_tensor_constant0]
%lift_fresh_copy : [#users=0] = call_function[target=torch.ops.aten.lift_fresh_copy](args = (%_tensor_constant0,), kwargs = {})
%view : [#users=1] = call_function[target=torch.ops.aten.view](args = (%arg0_1, [2, 2, 16, 5, 5]), kwargs = {})
%abs_1 : [#users=1] = call_function[target=torch.ops.aten.abs](args = (%view,), kwargs = {})
%max_1 : [#users=2] = call_function[target=torch.ops.aten.max](args = (%abs_1, 2, True), kwargs = {})
%getitem : [#users=1] = call_function[target=operator.getitem](args = (%max_1, 0), kwargs = {})
%getitem_1 : [#users=0] = call_function[target=operator.getitem](args = (%max_1, 1), kwargs = {})
%view_1 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%getitem, [2, 2, 1, 5, 5]), kwargs = {})
%expand : [#users=1] = call_function[target=torch.ops.aten.expand](args = (%view_1, [2, 2, 16, 5, 5]), kwargs = {})
%clone : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%expand,), kwargs = {memory_format: torch.contiguous_format})
%view_2 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%clone, [2, 32, 5, 5]), kwargs = {})
%div : [#users=2] = call_function[target=torch.ops.aten.div](args = (%view_2, 128), kwargs = {})
%_tensor_constant1 : [#users=1] = get_attr[target=_tensor_constant1]
%detach : [#users=1] = call_function[target=torch.ops.aten.detach](args = (%_tensor_constant1,), kwargs = {})
%detach_1 : [#users=2] = call_function[target=torch.ops.aten.detach](args = (%detach,), kwargs = {})
%div_1 : [#users=1] = call_function[target=torch.ops.aten.div](args = (%arg0_1, %div), kwargs = {})
%add : [#users=1] = call_function[target=torch.ops.aten.add](args = (%div_1, %detach_1), kwargs = {})
%round_1 : [#users=1] = call_function[target=torch.ops.aten.round](args = (%add,), kwargs = {})
%clamp : [#users=1] = call_function[target=torch.ops.aten.clamp](args = (%round_1, -128, 127), kwargs = {})
%sub : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%clamp, %detach_1), kwargs = {})
%mul : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub, %div), kwargs = {})
%_tensor_constant2 : [#users=1] = get_attr[target=_tensor_constant2]
%lift_fresh_copy_1 : [#users=0] = call_function[target=torch.ops.aten.lift_fresh_copy](args = (%_tensor_constant2,), kwargs = {})
%_tensor_constant3 : [#users=1] = get_attr[target=_tensor_constant3]
%lift_fresh_copy_2 : [#users=0] = call_function[target=torch.ops.aten.lift_fresh_copy](args = (%_tensor_constant3,), kwargs = {})
%_tensor_constant4 : [#users=1] = get_attr[target=_tensor_constant4]
%lift_fresh_copy_3 : [#users=0] = call_function[target=torch.ops.aten.lift_fresh_copy](args = (%_tensor_constant4,), kwargs = {})
%_tensor_constant5 : [#users=1] = get_attr[target=_tensor_constant5]
%expand_1 : [#users=1] = call_function[target=torch.ops.aten.expand](args = (%_tensor_constant5, [2, 2, 16, 3, 3]), kwargs = {})
%clone_1 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%expand_1,), kwargs = {memory_format: torch.contiguous_format})
%view_3 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%clone_1, [2, 32, 3, 3]), kwargs = {})
%_tensor_constant6 : [#users=1] = get_attr[target=_tensor_constant6]
%mul_1 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%_tensor_constant6, %view_3), kwargs = {})
%_tensor_constant7 : [#users=1] = get_attr[target=_tensor_constant7]
%lift_fresh_copy_4 : [#users=0] = call_function[target=torch.ops.aten.lift_fresh_copy](args = (%_tensor_constant7,), kwargs = {})
%_tensor_constant8 : [#users=1] = get_attr[target=_tensor_constant8]
%lift_fresh_copy_5 : [#users=0] = call_function[target=torch.ops.aten.lift_fresh_copy](args = (%_tensor_constant8,), kwargs = {})
%_tensor_constant9 : [#users=1] = get_attr[target=_tensor_constant9]
%lift_fresh_copy_6 : [#users=0] = call_function[target=torch.ops.aten.lift_fresh_copy](args = (%_tensor_constant9,), kwargs = {})
%_param_constant0 : [#users=1] = get_attr[target=_param_constant0]
%convolution : [#users=1] = call_function[target=torch.ops.aten.convolution](args = (%mul, %mul_1, %_param_constant0, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), kwargs = {})
%_tensor_constant10 : [#users=1] = get_attr[target=_tensor_constant10]
%lift_fresh_copy_7 : [#users=0] = call_function[target=torch.ops.aten.lift_fresh_copy](args = (%_tensor_constant10,), kwargs = {})
%_tensor_constant11 : [#users=1] = get_attr[target=_tensor_constant11]
%lift_fresh_copy_8 : [#users=0] = call_function[target=torch.ops.aten.lift_fresh_copy](args = (%_tensor_constant11,), kwargs = {})
return convolution
# Linalg IR
#map = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d3, d4)>
#map1 = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, 0, d3, d4)>
#map2 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
#map3 = affine_map<(d0, d1, d2, d3) -> ()>
#map4 = affine_map<(d0, d1, d2, d3) -> (d1)>
module attributes {torch.debug_module_name = "_lambda"} {
ml_program.global private mutable @global_seed(dense<0> : tensor<i64>) : tensor<i64>
func.func @forward(%arg0: tensor<2x32x5x5xf16>) -> tensor<2x2x3x3xf16> {
%cst = arith.constant dense<[4.827880e-02, 6.393430e-03]> : tensor<2xf16>
%cst_0 = arith.constant dense<"0xD0285B67B6D9816E05B2B2AFDEADBC320D42510BF670057FA034A01AEFFE9666C02F2F4038CEE2E5E79C2F49400E467F8B7F775C187F8949FF81A009C4591DAD33C20FF7D356852E5CE1C863C80A81BB98B653D575158688471D25EE05D41EEEF81B108181FD716948E3B8253A436CA26CE0690437B089E6ED9CB7DAFF0FA1684BDD2BF70D12F5F25DB09DB77489812D50122CE520178481C0757FE17C075FBD9CEF8B47245CCA6721950E301C7F33CC727F88FBA7D3FB65BA7F6510C1D4A6972CAD53587181415FD3240E5E5D4A344678D70A3A8438639AEE75428A7FE143B86BF1B3C7E365DE25F5DA195C4D0E696E9A683A4A821D782926742C4414ECBAE18E20484E2B832DA3C294DCE2FFBC7F841F5E9251C0B703A8A2D681D3503B5A3BCD94983D7F7F988EF20FED3DAF8C07A54C0455053E4D8B5F757F98091C0495BF7BAE3E466B93ED81745C72C2F756ED04FB681FFA57FA828A942F39E6D9117FF3A842AF2454A9C4E5567FFF67E4446163F614EBCDEBDDE819F9B91FE47FF4B1701382FF35BE7E704DCC7FEC033C9FDBEA7FAD5451787F7A043D9CCCE9DC5828D4B1B9DA1F38492630E66923D72FE582BF4271F7230481818123DCD844B3C06B68A4DC708133C93F3C7BAC0F3D044AD130130FDE174881E01C843991042D2A69A8C456BCD089914810EE4C852DD43DE0937667C181DBF4C6DA3CC57FD10A18A4447F74241C2618CACD912DE2D80967E3FB8959838128FCFB61F8392B578D76DC7FF1E3B10BB839A52F9C8C32D11FFFF6761646B7BC0E0739FE93D52DFB0A40299F"> : tensor<2x32x3x3xi8>
%cst_1 = arith.constant dense<[[[[[4.537110e-04, 4.565720e-04, 4.570480e-04], [4.465580e-04, 4.248620e-04, 4.377370e-04], [4.544260e-04, 4.503730e-04, 4.482270e-04]]], [[[4.372600e-04, 4.158020e-04, 4.422660e-04], [4.563330e-04, 4.177090e-04, 4.560950e-04], [4.410740e-04, 4.377370e-04, 4.374980e-04]]]], [[[[4.606250e-04, 4.541870e-04, 4.444120e-04], [4.489420e-04, 4.236700e-04, 3.855230e-04], [4.563330e-04, 4.637240e-04, 4.191400e-04]]], [[[4.277230e-04, 3.893380e-04, 4.627700e-04], [4.460810e-04, 4.467960e-04, 4.429820e-04], [4.401210e-04, 4.448890e-04, 4.282000e-04]]]]]> : tensor<2x2x1x3x3xf16>
%cst_2 = arith.constant dense<0.000000e+00> : tensor<f16>
%cst_3 = arith.constant -6.550400e+04 : f16
%cst_4 = arith.constant 1.280000e+02 : f16
%cst_5 = arith.constant -1.280000e+02 : f16
%cst_6 = arith.constant 1.270000e+02 : f16
%c0_i64 = arith.constant 0 : i64
%expanded = tensor.expand_shape %arg0 [[0], [1, 2], [3], [4]] : tensor<2x32x5x5xf16> into tensor<2x2x16x5x5xf16>
%0 = tensor.empty() : tensor<2x2x16x5x5xf16>
%1 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel"]} ins(%expanded : tensor<2x2x16x5x5xf16>) outs(%0 : tensor<2x2x16x5x5xf16>) {
^bb0(%in: f16, %out: f16):
%23 = math.absf %in : f16
linalg.yield %23 : f16
} -> tensor<2x2x16x5x5xf16>
%2 = tensor.empty() : tensor<2x2x1x5x5xi64>
%3 = linalg.fill ins(%c0_i64 : i64) outs(%2 : tensor<2x2x1x5x5xi64>) -> tensor<2x2x1x5x5xi64>
%4 = tensor.empty() : tensor<2x2x1x5x5xf16>
%5 = linalg.fill ins(%cst_3 : f16) outs(%4 : tensor<2x2x1x5x5xf16>) -> tensor<2x2x1x5x5xf16>
%6:2 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel"]} ins(%1 : tensor<2x2x16x5x5xf16>) outs(%5, %3 : tensor<2x2x1x5x5xf16>, tensor<2x2x1x5x5xi64>) {
^bb0(%in: f16, %out: f16, %out_8: i64):
%23 = linalg.index 2 : index
%24 = arith.index_cast %23 : index to i64
%25 = arith.maxf %in, %out : f16
%26 = arith.cmpf ogt, %in, %out : f16
%27 = arith.select %26, %24, %out_8 : i64
linalg.yield %25, %27 : f16, i64
} -> (tensor<2x2x1x5x5xf16>, tensor<2x2x1x5x5xi64>)
%7 = linalg.generic {indexing_maps = [#map1, #map], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel"]} ins(%6#0 : tensor<2x2x1x5x5xf16>) outs(%0 : tensor<2x2x16x5x5xf16>) {
^bb0(%in: f16, %out: f16):
linalg.yield %in : f16
} -> tensor<2x2x16x5x5xf16>
%collapsed = tensor.collapse_shape %7 [[0], [1, 2], [3], [4]] : tensor<2x2x16x5x5xf16> into tensor<2x32x5x5xf16>
%8 = tensor.empty() : tensor<2x32x5x5xf16>
%9 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%collapsed : tensor<2x32x5x5xf16>) outs(%8 : tensor<2x32x5x5xf16>) {
^bb0(%in: f16, %out: f16):
%23 = arith.divf %in, %cst_4 : f16
linalg.yield %23 : f16
} -> tensor<2x32x5x5xf16>
%10 = linalg.generic {indexing_maps = [#map2, #map2, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg0, %9 : tensor<2x32x5x5xf16>, tensor<2x32x5x5xf16>) outs(%8 : tensor<2x32x5x5xf16>) {
^bb0(%in: f16, %in_8: f16, %out: f16):
%23 = arith.divf %in, %in_8 : f16
linalg.yield %23 : f16
} -> tensor<2x32x5x5xf16>
%11 = linalg.generic {indexing_maps = [#map2, #map3, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%10, %cst_2 : tensor<2x32x5x5xf16>, tensor<f16>) outs(%8 : tensor<2x32x5x5xf16>) {
^bb0(%in: f16, %in_8: f16, %out: f16):
%23 = arith.addf %in, %in_8 : f16
linalg.yield %23 : f16
} -> tensor<2x32x5x5xf16>
%12 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%11 : tensor<2x32x5x5xf16>) outs(%8 : tensor<2x32x5x5xf16>) {
^bb0(%in: f16, %out: f16):
%23 = math.roundeven %in : f16
linalg.yield %23 : f16
} -> tensor<2x32x5x5xf16>
%13 = linalg.generic {indexing_maps = [#map2, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%12 : tensor<2x32x5x5xf16>) outs(%8 : tensor<2x32x5x5xf16>) {
^bb0(%in: f16, %out: f16):
%23 = arith.cmpf ult, %in, %cst_5 : f16
%24 = arith.select %23, %cst_5, %in : f16
%25 = arith.cmpf ugt, %24, %cst_6 : f16
%26 = arith.select %25, %cst_6, %24 : f16
linalg.yield %26 : f16
} -> tensor<2x32x5x5xf16>
%14 = linalg.generic {indexing_maps = [#map2, #map3, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%13, %cst_2 : tensor<2x32x5x5xf16>, tensor<f16>) outs(%8 : tensor<2x32x5x5xf16>) {
^bb0(%in: f16, %in_8: f16, %out: f16):
%23 = arith.subf %in, %in_8 : f16
linalg.yield %23 : f16
} -> tensor<2x32x5x5xf16>
%15 = linalg.generic {indexing_maps = [#map2, #map2, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%14, %9 : tensor<2x32x5x5xf16>, tensor<2x32x5x5xf16>) outs(%8 : tensor<2x32x5x5xf16>) {
^bb0(%in: f16, %in_8: f16, %out: f16):
%23 = arith.mulf %in, %in_8 : f16
linalg.yield %23 : f16
} -> tensor<2x32x5x5xf16>
%16 = tensor.empty() : tensor<2x2x16x3x3xf16>
%17 = linalg.generic {indexing_maps = [#map1, #map], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel"]} ins(%cst_1 : tensor<2x2x1x3x3xf16>) outs(%16 : tensor<2x2x16x3x3xf16>) {
^bb0(%in: f16, %out: f16):
linalg.yield %in : f16
} -> tensor<2x2x16x3x3xf16>
%collapsed_7 = tensor.collapse_shape %17 [[0], [1, 2], [3], [4]] : tensor<2x2x16x3x3xf16> into tensor<2x32x3x3xf16>
%18 = tensor.empty() : tensor<2x32x3x3xf16>
%19 = linalg.generic {indexing_maps = [#map2, #map2, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cst_0, %collapsed_7 : tensor<2x32x3x3xi8>, tensor<2x32x3x3xf16>) outs(%18 : tensor<2x32x3x3xf16>) {
^bb0(%in: i8, %in_8: f16, %out: f16):
%23 = arith.sitofp %in : i8 to f16
%24 = arith.mulf %23, %in_8 : f16
linalg.yield %24 : f16
} -> tensor<2x32x3x3xf16>
%20 = tensor.empty() : tensor<2x2x3x3xf16>
%21 = linalg.generic {indexing_maps = [#map4, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cst : tensor<2xf16>) outs(%20 : tensor<2x2x3x3xf16>) {
^bb0(%in: f16, %out: f16):
linalg.yield %in : f16
} -> tensor<2x2x3x3xf16>
%22 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%15, %19 : tensor<2x32x5x5xf16>, tensor<2x32x3x3xf16>) outs(%21 : tensor<2x2x3x3xf16>) -> tensor<2x2x3x3xf16>
return %22 : tensor<2x2x3x3xf16>
}
}
pytorch result.
tensor([[[[ 0.0169, 0.9688, 0.1151],
[ 0.5601, -0.8652, 1.0654],
[ 0.3960, -0.8550, -0.6348]],
[[-0.4011, -0.2404, 0.0652],
[ 0.3621, -0.6089, -0.0455],
[ 0.6440, 0.1541, 0.9761]]],
[[[ 0.4189, 0.2705, -0.2681],
[-0.7920, -0.5820, 0.1659],
[-0.5610, 0.0285, 0.2966]],
[[-0.1343, -1.5732, -0.1599],
[-0.5366, 0.6650, 0.1564],
[ 1.2070, -0.4768, 0.0576]]]], device='cuda:0', dtype=torch.float16,
grad_fn=<ConvolutionBackward0>)
Results: Refbackend torch-mlir
[[[[ 0.01743 0.967 0.1157 ]
[ 0.561 -0.863 1.067 ]
[ 0.3943 -0.854 -0.637 ]]
[[-0.4004 -0.2391 0.0644 ]
[ 0.3618 -0.608 -0.0454 ]
[ 0.645 0.1539 0.9746 ]]]
[[[ 0.4197 0.2705 -0.266 ]
[-0.792 -0.5776 0.1655 ]
[-0.562 0.02806 0.2966 ]]
[[-0.133 -1.575 -0.1598 ]
[-0.5366 0.666 0.1559 ]
[ 1.21 -0.475 0.05792]]]]
Results: IREE-CPU
Target triple found:x86_64-linux-gnu
[[[[ 0.01678 0.969 0.1163 ]
[ 0.5596 -0.8657 1.063 ]
[ 0.3967 -0.8545 -0.6377 ]]
[[-0.4004 -0.2394 0.067 ]
[ 0.3616 -0.6094 -0.0459 ]
[ 0.646 0.154 0.974 ]]]
[[[ 0.4175 0.2703 -0.2666 ]
[-0.795 -0.5854 0.165 ]
[-0.5605 0.02707 0.2961 ]]
[[-0.1333 -1.576 -0.1599 ]
[-0.5366 0.6685 0.1552 ]
[ 1.208 -0.477 0.0583 ]]]]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment