masahi’s gists

masahi / stacked_lstm.txt

Last active April 9, 2020 22:52

	fn (%input: Tensor[(2, 2, 3), float32], %v38: Tensor[(16, 3), float32], %v44: Tensor[(16), float32], %v45: Tensor[(16), float32], %v50: Tensor[(16, 4), float32], %v56: Tensor[(16), float32], %v57: Tensor[(16), float32], %v78: Tensor[(4), float32], %v79: Tensor[(4), float32], %states: List[(Tensor[(2, 4), float32], Tensor[(2, 4), float32])], %v109: Tensor[(16, 4), float32], %v115: Tensor[(16), float32], %v116: Tensor[(16), float32], %v121: Tensor[(16, 4), float32], %v127: Tensor[(16), float32], %v128: Tensor[(16), float32], %v149: Tensor[(4), float32], %v150: Tensor[(4), float32]) -> (Tensor[(?, 2, 4), float32], List[(Tensor[(2, 4), float32], Tensor[(2, 4), float32])]) {
	%0 = Nil /* ty=List[Tensor[(2, 4), float32]] */;
	%1 = @nth(%states, 1 /* ty=int32 /) / ty=(Tensor[(2, 4), float32], Tensor[(2, 4), float32]) */;
	%83 = (
	let %while_loop: fn (int32, List[Tensor[(2, 4), float32]], (Tensor[(2, 4), float32], Tensor[(2, 4), float32])) -> (int32, List[Tensor[(2, 4), float32]], (Tensor[(2, 4), float32], T

masahi / vm_bug.py

Last active April 9, 2020 17:30

	import numpy as np

	import tvm
	from tvm import relay
	from tvm.relay.ty import TupleType, TensorType
	from tvm.relay.prelude import Prelude


	def _get_relay_input_vars(input_shapes, prelude):
	def _is_int_seq(seq):

masahi / rnn_ir.txt

Created April 5, 2020 23:56

	graph(%self : __torch__.custom_lstms.LSTMLayer,
	%input.1 : Tensor,
	%state.1 : (Tensor, Tensor)):
	%3 : bool = prim::Constant[value=1]() # /home/masa/projects/dev/torchscript-to-tvm/custom_lstms.py:84:8
	%4 : int = prim::Constant[value=0]() # /home/masa/projects/dev/torchscript-to-tvm/custom_lstms.py:84:34
	%outputs.1 : Tensor[] = prim::ListConstruct()
	%6 : int = aten::size(%input.1, %4) # /home/masa/projects/dev/torchscript-to-tvm/custom_lstms.py:84:23
	%outputs : Tensor[], %state : (Tensor, Tensor) = prim::Loop(%6, %3, %outputs.1, %state.1) # /home/masa/projects/dev/torchscript-to-tvm/custom_lstms.py:84:8
	block0(%i.1 : int, %outputs.6 : Tensor[], %state.6 : (Tensor, Tensor)):
	%12 : __torch__.custom_lstms.LSTMCell = prim::GetAttr[name="cell"](%self)

masahi / inceptionv3.log

Created April 3, 2020 03:45

{"input": ["llvm -device=arm_cpu -target=aarch64-unknown-linux-gnu -mattr=+neon", "conv2d_nchw_spatial_pack.arm_cpu", [["TENSOR", [1, 2048, 8, 8], "int16"], ["TENSOR", [192, 2048, 1, 1], "int16"], [1, 1], [0, 0, 0, 0], [1, 1], "int32"], {}], "config": {"index": 18989, "code_hash": null, "entity": [["tile_co", "sp", [-1, 8]], ["tile_oh", "sp", [-1, 1]], ["tile_ow", "sp", [-1, 8]], ["reorder_0", "re", [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]], ["ann_reduce", "an", ["unroll", "none"]], ["ann_spatial", "an", ["unroll", "none", "vec"]]]}, "result": [[0.0015915348], 0, 3.0337789058685303, 1585830810.6958284], "version": 0.2, "tvm_version": "0.7.dev1"}

{"input": ["llvm -device=arm_cpu -target=aarch64-unknown-linux-gnu -mattr=+neon", "conv2d_nchw_spatial_pack.arm_cpu", [["TENSOR", [1, 2048, 8, 8], "int16"], ["TENSOR", [448, 2048, 1, 1], "int16"], [1, 1], [0, 0, 0, 0], [1, 1], "int32"], {}], "config": {"index": 10210, "code_hash": null, "entity": [["tile_co", "sp", [-1, 8]], ["tile_oh", "sp", [-1, 2]], ["tile_ow", "sp", [-1, 4]

masahi / resnet50.log

Created April 3, 2020 03:44

{"input": ["llvm -device=arm_cpu -target=aarch64-unknown-linux-gnu -mattr=+neon", "conv2d_nchw_spatial_pack.arm_cpu", [["TENSOR", [1, 1024, 14, 14], "int16"], ["TENSOR", [2048, 1024, 1, 1], "int16"], [2, 2], [0, 0, 0, 0], [1, 1], "int32"], {}], "config": {"index": 4011, "code_hash": null, "entity": [["tile_co", "sp", [-1, 8]], ["tile_oh", "sp", [-1, 1]], ["tile_ow", "sp", [-1, 7]], ["reorder_0", "re", [0, 1, 2, 3, 4, 5, 6, 9, 7, 8]], ["ann_reduce", "an", ["none", "unroll"]], ["ann_spatial", "an", ["unroll", "none", "vec"]]]}, "result": [[0.00672183], 0, 2.449126720428467, 1585788869.0401723], "version": 0.2, "tvm_version": "0.7.dev1"}

{"input": ["llvm -device=arm_cpu -target=aarch64-unknown-linux-gnu -mattr=+neon", "conv2d_nchw_spatial_pack.arm_cpu", [["TENSOR", [1, 512, 28, 28], "int16"], ["TENSOR", [1024, 512, 1, 1], "int16"], [2, 2], [0, 0, 0, 0], [1, 1], "int32"], {}], "config": {"index": 19595, "code_hash": null, "entity": [["tile_co", "sp", [-1, 16]], ["tile_oh", "sp", [-1, 2]], ["tile_ow", "sp", [-1, 2

masahi / autotvm_resnet18

Created April 2, 2020 00:38

{"input": ["llvm -device=arm_cpu -target=aarch64-unknown-linux-gnu -mattr=+neon", "conv2d_nchw_spatial_pack.arm_cpu", [["TENSOR", [1, 256, 14, 14], "int16"], ["TENSOR", [512, 256, 1, 1], "int16"], [2, 2], [0, 0, 0, 0], [1, 1], "int32"], {}], "config": {"index": 806, "code_hash": null, "entity": [["tile_co", "sp", [-1, 64]], ["tile_oh", "sp", [-1, 1]], ["tile_ow", "sp", [-1, 1]], ["reorder_0", "re", [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]], ["ann_reduce", "an", ["unroll", "none"]], ["ann_spatial", "an", ["none", "none", "vec"]]]}, "result": [[0.0004633984], 0, 2.3286285400390625, 1585768016.7444756], "version": 0.2, "tvm_version": "0.7.dev1"}

{"input": ["llvm -device=arm_cpu -target=aarch64-unknown-linux-gnu -mattr=+neon", "conv2d_nchw_spatial_pack.arm_cpu", [["TENSOR", [1, 128, 28, 28], "int16"], ["TENSOR", [256, 128, 1, 1], "int16"], [2, 2], [0, 0, 0, 0], [1, 1], "int32"], {}], "config": {"index": 11631, "code_hash": null, "entity": [["tile_co", "sp", [-1, 8]], ["tile_oh", "sp", [-1, 1]], ["tile_ow", "sp", [-1, 14]],

masahi / googlenet.log

Created April 1, 2020 19:07

{"input": ["llvm -device=arm_cpu -target=aarch64-unknown-linux-gnu -mattr=+neon", "conv2d_nchw_spatial_pack.arm_cpu", [["TENSOR", [1, 832, 7, 7], "int16"], ["TENSOR", [48, 832, 1, 1], "int16"], [1, 1], [0, 0, 0, 0], [1, 1], "int32"], {}], "config": {"index": 4253, "code_hash": null, "entity": [["tile_co", "sp", [-1, 4]], ["tile_oh", "sp", [-1, 7]], ["tile_ow", "sp", [-1, 1]], ["reorder_0", "re", [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]], ["ann_reduce", "an", ["none", "unroll"]], ["ann_spatial", "an", ["unroll", "unroll", "vec"]]]}, "result": [[0.0002178164], 0, 4.340655565261841, 1585708126.5301793], "version": 0.2, "tvm_version": "0.7.dev1"}

{"input": ["llvm -device=arm_cpu -target=aarch64-unknown-linux-gnu -mattr=+neon", "conv2d_nchw_winograd.arm_cpu", [["TENSOR", [1, 48, 9, 9], "int16"], ["TENSOR", [128, 48, 3, 3], "int16"], [1, 1], [0, 0, 0, 0], [1, 1], "int32"], {}], "config": {"index": 1901, "code_hash": null, "entity": [["tile_p", "sp", [-1, 4]], ["tile_k", "sp", [-1, 8]], ["tile_c", "sp", [-1, 12]], ["ann_reduc

masahi / autotvm_log_resnet18

Created April 1, 2020 02:21

	DEBUG:autotvm:No: 404 GFLOPS: 0.00/21.78 result: MeasureResult(costs=(InstantiationError(['Too large factor for unrolling'],),), error_no=1, all_cost=0.014428138732910156, timestamp=1585706401.8644567) [('tile_co', [-1, 32]), ('tile_oh', [-1, 56]), ('tile_ow', [-1, 1]), ('reorder_0', [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), ('ann_reduce', ['none', 'none']), ('ann_spatial', ['none', 'vec', 'unroll'])],None,39261
	DEBUG:autotvm:No: 405 GFLOPS: 0.00/21.78 result: MeasureResult(costs=(InstantiationError(['Too large factor for unrolling', 'Too large factor for unrolling'],),), error_no=1, all_cost=0.01387786865234375, timestamp=1585706401.8645122) [('tile_co', [-1, 64]), ('tile_oh', [-1, 112]), ('tile_ow', [-1, 1]), ('reorder_0', [0, 1, 2, 3, 4, 5, 6, 9, 7, 8]), ('ann_reduce', ['unroll', 'none']), ('ann_spatial', ['unroll', 'unroll', 'unroll'])],None,70769
	DEBUG:autotvm:No: 406 GFLOPS: 0.00/21.78 result: MeasureResult(costs=('',), error_no=7, all_cost=10, timestamp=1585706522.077638) [('tile_co', [-1, 2]), ('tile_oh', [-1,

masahi / cascadelake_updated.txt

Created March 28, 2020 13:04

	Working on resnet18, per channel quantization
	File /home/masa/.tvm_test_data/data/elephant-299.jpg exists, skip.
	build finished
	TVM elapsed ms: 1.2536034989999998
	Torch elapsed ms: 4.9253363609313965

	Working on resnet50, per channel quantization
	File /home/masa/.tvm_test_data/data/elephant-299.jpg exists, skip.
	build finished
	TVM elapsed ms: 3.142418917

masahi / perf_vs_torch.py

Created March 17, 2020 03:19

	resnet 18
	TVM AVX512 elapsed ms: 2.6329094399999997
	TVM AVX512 + VNNI elapsed ms: 2.0363589199999996
	Torch elapsed ms: 5.305776596069336

	resnet 50
	TVM AVX512 elapsed ms: 5.464826759999999
	TVM AVX512 + VNNI elapsed ms: 4.63109352
	Torch elapsed ms: 10.141160488128662