Skip to content

Instantly share code, notes, and snippets.

fn (%input: Tensor[(2, 2, 3), float32], %v38: Tensor[(16, 3), float32], %v44: Tensor[(16), float32], %v45: Tensor[(16), float32], %v50: Tensor[(16, 4), float32], %v56: Tensor[(16), float32], %v57: Tensor[(16), float32], %v78: Tensor[(4), float32], %v79: Tensor[(4), float32], %states: List[(Tensor[(2, 4), float32], Tensor[(2, 4), float32])], %v109: Tensor[(16, 4), float32], %v115: Tensor[(16), float32], %v116: Tensor[(16), float32], %v121: Tensor[(16, 4), float32], %v127: Tensor[(16), float32], %v128: Tensor[(16), float32], %v149: Tensor[(4), float32], %v150: Tensor[(4), float32]) -> (Tensor[(?, 2, 4), float32], List[(Tensor[(2, 4), float32], Tensor[(2, 4), float32])]) {
%0 = Nil /* ty=List[Tensor[(2, 4), float32]] */;
%1 = @nth(%states, 1 /* ty=int32 */) /* ty=(Tensor[(2, 4), float32], Tensor[(2, 4), float32]) */;
%83 = (
let %while_loop: fn (int32, List[Tensor[(2, 4), float32]], (Tensor[(2, 4), float32], Tensor[(2, 4), float32])) -> (int32, List[Tensor[(2, 4), float32]], (Tensor[(2, 4), float32], T
import numpy as np
import tvm
from tvm import relay
from tvm.relay.ty import TupleType, TensorType
from tvm.relay.prelude import Prelude
def _get_relay_input_vars(input_shapes, prelude):
def _is_int_seq(seq):
graph(%self : __torch__.custom_lstms.LSTMLayer,
%input.1 : Tensor,
%state.1 : (Tensor, Tensor)):
%3 : bool = prim::Constant[value=1]() # /home/masa/projects/dev/torchscript-to-tvm/custom_lstms.py:84:8
%4 : int = prim::Constant[value=0]() # /home/masa/projects/dev/torchscript-to-tvm/custom_lstms.py:84:34
%outputs.1 : Tensor[] = prim::ListConstruct()
%6 : int = aten::size(%input.1, %4) # /home/masa/projects/dev/torchscript-to-tvm/custom_lstms.py:84:23
%outputs : Tensor[], %state : (Tensor, Tensor) = prim::Loop(%6, %3, %outputs.1, %state.1) # /home/masa/projects/dev/torchscript-to-tvm/custom_lstms.py:84:8
block0(%i.1 : int, %outputs.6 : Tensor[], %state.6 : (Tensor, Tensor)):
%12 : __torch__.custom_lstms.LSTMCell = prim::GetAttr[name="cell"](%self)
{"input": ["llvm -device=arm_cpu -target=aarch64-unknown-linux-gnu -mattr=+neon", "conv2d_nchw_spatial_pack.arm_cpu", [["TENSOR", [1, 2048, 8, 8], "int16"], ["TENSOR", [192, 2048, 1, 1], "int16"], [1, 1], [0, 0, 0, 0], [1, 1], "int32"], {}], "config": {"index": 18989, "code_hash": null, "entity": [["tile_co", "sp", [-1, 8]], ["tile_oh", "sp", [-1, 1]], ["tile_ow", "sp", [-1, 8]], ["reorder_0", "re", [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]], ["ann_reduce", "an", ["unroll", "none"]], ["ann_spatial", "an", ["unroll", "none", "vec"]]]}, "result": [[0.0015915348], 0, 3.0337789058685303, 1585830810.6958284], "version": 0.2, "tvm_version": "0.7.dev1"}
{"input": ["llvm -device=arm_cpu -target=aarch64-unknown-linux-gnu -mattr=+neon", "conv2d_nchw_spatial_pack.arm_cpu", [["TENSOR", [1, 2048, 8, 8], "int16"], ["TENSOR", [448, 2048, 1, 1], "int16"], [1, 1], [0, 0, 0, 0], [1, 1], "int32"], {}], "config": {"index": 10210, "code_hash": null, "entity": [["tile_co", "sp", [-1, 8]], ["tile_oh", "sp", [-1, 2]], ["tile_ow", "sp", [-1, 4]
{"input": ["llvm -device=arm_cpu -target=aarch64-unknown-linux-gnu -mattr=+neon", "conv2d_nchw_spatial_pack.arm_cpu", [["TENSOR", [1, 1024, 14, 14], "int16"], ["TENSOR", [2048, 1024, 1, 1], "int16"], [2, 2], [0, 0, 0, 0], [1, 1], "int32"], {}], "config": {"index": 4011, "code_hash": null, "entity": [["tile_co", "sp", [-1, 8]], ["tile_oh", "sp", [-1, 1]], ["tile_ow", "sp", [-1, 7]], ["reorder_0", "re", [0, 1, 2, 3, 4, 5, 6, 9, 7, 8]], ["ann_reduce", "an", ["none", "unroll"]], ["ann_spatial", "an", ["unroll", "none", "vec"]]]}, "result": [[0.00672183], 0, 2.449126720428467, 1585788869.0401723], "version": 0.2, "tvm_version": "0.7.dev1"}
{"input": ["llvm -device=arm_cpu -target=aarch64-unknown-linux-gnu -mattr=+neon", "conv2d_nchw_spatial_pack.arm_cpu", [["TENSOR", [1, 512, 28, 28], "int16"], ["TENSOR", [1024, 512, 1, 1], "int16"], [2, 2], [0, 0, 0, 0], [1, 1], "int32"], {}], "config": {"index": 19595, "code_hash": null, "entity": [["tile_co", "sp", [-1, 16]], ["tile_oh", "sp", [-1, 2]], ["tile_ow", "sp", [-1, 2
{"input": ["llvm -device=arm_cpu -target=aarch64-unknown-linux-gnu -mattr=+neon", "conv2d_nchw_spatial_pack.arm_cpu", [["TENSOR", [1, 256, 14, 14], "int16"], ["TENSOR", [512, 256, 1, 1], "int16"], [2, 2], [0, 0, 0, 0], [1, 1], "int32"], {}], "config": {"index": 806, "code_hash": null, "entity": [["tile_co", "sp", [-1, 64]], ["tile_oh", "sp", [-1, 1]], ["tile_ow", "sp", [-1, 1]], ["reorder_0", "re", [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]], ["ann_reduce", "an", ["unroll", "none"]], ["ann_spatial", "an", ["none", "none", "vec"]]]}, "result": [[0.0004633984], 0, 2.3286285400390625, 1585768016.7444756], "version": 0.2, "tvm_version": "0.7.dev1"}
{"input": ["llvm -device=arm_cpu -target=aarch64-unknown-linux-gnu -mattr=+neon", "conv2d_nchw_spatial_pack.arm_cpu", [["TENSOR", [1, 128, 28, 28], "int16"], ["TENSOR", [256, 128, 1, 1], "int16"], [2, 2], [0, 0, 0, 0], [1, 1], "int32"], {}], "config": {"index": 11631, "code_hash": null, "entity": [["tile_co", "sp", [-1, 8]], ["tile_oh", "sp", [-1, 1]], ["tile_ow", "sp", [-1, 14]],
{"input": ["llvm -device=arm_cpu -target=aarch64-unknown-linux-gnu -mattr=+neon", "conv2d_nchw_spatial_pack.arm_cpu", [["TENSOR", [1, 832, 7, 7], "int16"], ["TENSOR", [48, 832, 1, 1], "int16"], [1, 1], [0, 0, 0, 0], [1, 1], "int32"], {}], "config": {"index": 4253, "code_hash": null, "entity": [["tile_co", "sp", [-1, 4]], ["tile_oh", "sp", [-1, 7]], ["tile_ow", "sp", [-1, 1]], ["reorder_0", "re", [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]], ["ann_reduce", "an", ["none", "unroll"]], ["ann_spatial", "an", ["unroll", "unroll", "vec"]]]}, "result": [[0.0002178164], 0, 4.340655565261841, 1585708126.5301793], "version": 0.2, "tvm_version": "0.7.dev1"}
{"input": ["llvm -device=arm_cpu -target=aarch64-unknown-linux-gnu -mattr=+neon", "conv2d_nchw_winograd.arm_cpu", [["TENSOR", [1, 48, 9, 9], "int16"], ["TENSOR", [128, 48, 3, 3], "int16"], [1, 1], [0, 0, 0, 0], [1, 1], "int32"], {}], "config": {"index": 1901, "code_hash": null, "entity": [["tile_p", "sp", [-1, 4]], ["tile_k", "sp", [-1, 8]], ["tile_c", "sp", [-1, 12]], ["ann_reduc
DEBUG:autotvm:No: 404 GFLOPS: 0.00/21.78 result: MeasureResult(costs=(InstantiationError(['Too large factor for unrolling'],),), error_no=1, all_cost=0.014428138732910156, timestamp=1585706401.8644567) [('tile_co', [-1, 32]), ('tile_oh', [-1, 56]), ('tile_ow', [-1, 1]), ('reorder_0', [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), ('ann_reduce', ['none', 'none']), ('ann_spatial', ['none', 'vec', 'unroll'])],None,39261
DEBUG:autotvm:No: 405 GFLOPS: 0.00/21.78 result: MeasureResult(costs=(InstantiationError(['Too large factor for unrolling', 'Too large factor for unrolling'],),), error_no=1, all_cost=0.01387786865234375, timestamp=1585706401.8645122) [('tile_co', [-1, 64]), ('tile_oh', [-1, 112]), ('tile_ow', [-1, 1]), ('reorder_0', [0, 1, 2, 3, 4, 5, 6, 9, 7, 8]), ('ann_reduce', ['unroll', 'none']), ('ann_spatial', ['unroll', 'unroll', 'unroll'])],None,70769
DEBUG:autotvm:No: 406 GFLOPS: 0.00/21.78 result: MeasureResult(costs=('',), error_no=7, all_cost=10, timestamp=1585706522.077638) [('tile_co', [-1, 2]), ('tile_oh', [-1,
Working on resnet18, per channel quantization
File /home/masa/.tvm_test_data/data/elephant-299.jpg exists, skip.
build finished
TVM elapsed ms: 1.2536034989999998
Torch elapsed ms: 4.9253363609313965
Working on resnet50, per channel quantization
File /home/masa/.tvm_test_data/data/elephant-299.jpg exists, skip.
build finished
TVM elapsed ms: 3.142418917
resnet 18
TVM AVX512 elapsed ms: 2.6329094399999997
TVM AVX512 + VNNI elapsed ms: 2.0363589199999996
Torch elapsed ms: 5.305776596069336
resnet 50
TVM AVX512 elapsed ms: 5.464826759999999
TVM AVX512 + VNNI elapsed ms: 4.63109352
Torch elapsed ms: 10.141160488128662