January 5, 2018 18:46 · January 5, 2018 19:31 · July 4, 2018 00:18 · July 15, 2018 17:25 · August 23, 2018 00:33 · November 2, 2018 03:56
 import numpy as np
 import tvm
 import topi
 from tvm.contrib.pickle_memoize import memoize
 from topi.util import get_const_tuple
 from topi import tag

 def schedule_conv2d(outs):
    print('Run in x86 sch ...')
    """Create schedule for tensors"""
 import numpy as np
 import tvm
 import topi
 from tvm.contrib.pickle_memoize import memoize
 from topi.util import get_const_tuple
 from topi.nn.conv2d import SpatialPack, Im2ColPack, _WORKLOADS
 from topi.nn.conv2d import _get_workload
 from topi.nn.util import infer_pad, infer_stride
 from topi import tag
 from topi.nn import pad
 import tvm

 def compute_conv2d(A, W, stride, padding):
    batch_size, in_channel, height, width = A.shape
    out_channel, _ = W.shape

    kh = 1
    kw = 1

    out_height = (height + 2 * padding - kh) // stride + 1
 208 [main] INFO MXNetJVM - Try loading mxnet-scala from native path.
 209 [main] INFO MXNetJVM - Try loading mxnet-scala-osx-x86_64-gpu from native path.
 209 [main] INFO MXNetJVM - Try loading mxnet-scala-osx-x86_64-cpu from native path.
 210 [main] WARN MXNetJVM - MXNet Scala native library not found in path. Copying native library from the archive. Consider installing the library somewhere in the path (for Windows: PATH, for Linux: LD_LIBRARY_PATH), or specifying by Java cmd option -Djava.library.path=[lib path].
 224 [main] INFO org.apache.mxnet.util.NativeLibraryLoader - Replaced .dylib with .jnilib
 228 [main] INFO org.apache.mxnet.util.NativeLibraryLoader - Loading libmxnet-scala.jnilib from /lib/native/ copying to mxnet-scala
 661 [main] INFO org.apache.mxnet.WarnIfNotDisposed - Leaked object tracing is enabled (property mxnet.traceLeakedObjects is set)
 [22:30:36] src/io/iter_mnist.cc:110: MNISTIter: load 60000 images, shuffle=1, shape=(128,784)
 [22:30:37] src/io/iter_mnist.cc:110: MNISTIter: load 10000 ima
 diff --git a/nnvm/src/top/nn/nn.cc b/nnvm/src/top/nn/nn.cc
 index 322d77b6..12146cca 100644
 --- a/nnvm/src/top/nn/nn.cc
 +++ b/nnvm/src/top/nn/nn.cc
 @@ -641,6 +641,31 @@ inline bool LayoutTransformInferShape(const NodeAttrs& attrs,
   return true;
 }
 
 +inline Array<Expr> layout_transform_func(const std::string& src, const std::string& dst, const Array<Var>& dst_indices) {
 +  Layout src_layout(src);
 import os

 import numpy as np

 import nnvm.testing
 import nnvm.compiler
 import tvm
 from tvm import autotvm
 from tvm.autotvm.tuner import XGBTuner, GATuner, RandomTuner, GridSearchTuner
 from tvm.contrib.util import tempdir
 {"i": ["llvm -mcpu=skylake-avx512", "topi_nn_conv2d", [["TENSOR", [1, 1024, 7, 7], "float32"], ["TENSOR", [1024, 1024, 1, 1], "float32"], [1, 1], [0, 0], "NCHW", "float32"], {}, ["conv2d", [1, 1024, 7, 7, "float32"], [1024, 1024, 1, 1, "float32"], [1, 1], [0, 0], "NCHW", "float32"], {"i": 416, "c": null, "e": [["tile_ic", "sp", [2, 512]], ["tile_oc", "sp", [64, 16]], ["tile_ow", "sp", [1, 7]], ["tile_oh", "ot", 2]], "t": "direct"}], "r": [[0.0012855558738853504], 0, 1.3173747062683105, 1541097220.10334], "v": 0.1}
 {"i": ["llvm -mcpu=skylake-avx512", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 1024, 7, 7], "float32"], ["TENSOR", [1024, 1, 3, 3], "float32"], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 1024, 7, 7, "float32"], [1024, 1, 3, 3, "float32"], [1, 1], [1, 1], "float32"], {"i": 354673, "c": null, "e": [["tile_co", "sp", [1024, 1]], ["tile_oh", "sp", [1, 7]], ["tile_ow", "sp", [1, 7]], ["reorder_0", "re", [0, 1, 2, 3, 4, 5, 6, 7, 8]], ["reorder_1", "re", [0, 1, 2, 3, 6, 4, 5]], ["a
 Extract tasks...
 Tuning...
 [Task  1/19]  Current/Best:    0.00/  79.93 GFLOPS | Progress: (484/1000) | 2325.08 s Done.
 [Task  2/19]  Current/Best:    7.72/  13.22 GFLOPS | Progress: (756/1000) | 1381.61 s Done.
 [Task  3/19]  Current/Best:   10.29/  80.49 GFLOPS | Progress: (440/1000) | 1910.56 s Done.
 [Task  4/19]  Current/Best:    2.18/   6.38 GFLOPS | Progress: (1000/1000) | 1627.55 s Done.
 [Task  5/19]  Current/Best:   13.69/ 116.10 GFLOPS | Progress: (504/1000) | 1762.37 s Done.
 [Task  6/19]  Current/Best:    8.73/   8.73 GFLOPS | Progress: (36/1000) | 66.31 sLLVM ERROR: Cannot select: 0x56385e2c8368: i32 = X86ISD::CMP 0x56385e2ca1e8, 0x56385e2c9550
  0x56385e2ca1e8: v16i1 = and 0x56385e202548, 0x56385e213948
    0x56385e202548: v16i1 = bitcast 0x56385e202a28
 {"i": ["llvm -mcpu=skylake-avx512", "topi_nn_conv2d", [["TENSOR", [1, 1024, 7, 7], "float32"], ["TENSOR", [1024, 1024, 1, 1], "float32"], [1, 1], [0, 0], "NCHW", "float32"], {}, ["conv2d", [1, 1024, 7, 7, "float32"], [1024, 1024, 1, 1, "float32"], [1, 1], [0, 0], "NCHW", "float32"], {"i": 417, "c": null, "e": [["tile_ic", "sp", [1, 1024]], ["tile_oc", "sp", [64, 16]], ["tile_ow", "sp", [1, 7]], ["tile_oh", "ot", 2]], "t": "direct"}], "r": [[0.00010386519659715739], 0, 1.2961008548736572, 1541133775.742406], "v": 0.1}
 {"i": ["llvm -mcpu=skylake-avx512", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 1024, 7, 7], "float32"], ["TENSOR", [1024, 1, 3, 3], "float32"], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 1024, 7, 7, "float32"], [1024, 1, 3, 3, "float32"], [1, 1], [1, 1], "float32"], {"i": 777271, "c": null, "e": [["tile_co", "sp", [1024, 1]], ["tile_oh", "sp", [1, 7]], ["tile_ow", "sp", [7, 1]], ["reorder_0", "re", [0, 1, 2, 3, 4, 5, 8, 6, 7]], ["reorder_1", "re", [0, 1, 2, 3, 4, 5, 6]], 
 diff --git a/src/arithmetic/const_fold.h b/src/arithmetic/const_fold.h
 index fbf8fe7e..1c397f40 100644
 --- a/src/arithmetic/const_fold.h
 +++ b/src/arithmetic/const_fold.h
 @@ -101,33 +101,28 @@ inline bool IsIndexType(const Type& type) {
 // specialization of constant folders.
 template<>
 inline Expr TryConstFold<ir::Add>(Expr a, Expr b) {
 -  TVM_ARITH_CONST_PROPAGATION({
 +  TVM_INDEX_CONST_PROPAGATION({
	import numpy as np
	import tvm
	import topi
	from tvm.contrib.pickle_memoize import memoize
	from topi.util import get_const_tuple
	from topi import tag

	def schedule_conv2d(outs):
	print('Run in x86 sch ...')
	"""Create schedule for tensors"""
	import tvm

	def compute_conv2d(A, W, stride, padding):
	batch_size, in_channel, height, width = A.shape
	out_channel, _ = W.shape

	kh = 1
	kw = 1

	out_height = (height + 2 * padding - kh) // stride + 1
	208 [main] INFO MXNetJVM - Try loading mxnet-scala from native path.
	209 [main] INFO MXNetJVM - Try loading mxnet-scala-osx-x86_64-gpu from native path.
	209 [main] INFO MXNetJVM - Try loading mxnet-scala-osx-x86_64-cpu from native path.
	210 [main] WARN MXNetJVM - MXNet Scala native library not found in path. Copying native library from the archive. Consider installing the library somewhere in the path (for Windows: PATH, for Linux: LD_LIBRARY_PATH), or specifying by Java cmd option -Djava.library.path=[lib path].
	224 [main] INFO org.apache.mxnet.util.NativeLibraryLoader - Replaced .dylib with .jnilib
	228 [main] INFO org.apache.mxnet.util.NativeLibraryLoader - Loading libmxnet-scala.jnilib from /lib/native/ copying to mxnet-scala
	661 [main] INFO org.apache.mxnet.WarnIfNotDisposed - Leaked object tracing is enabled (property mxnet.traceLeakedObjects is set)
	[22:30:36] src/io/iter_mnist.cc:110: MNISTIter: load 60000 images, shuffle=1, shape=(128,784)
	[22:30:37] src/io/iter_mnist.cc:110: MNISTIter: load 10000 ima
	diff --git a/nnvm/src/top/nn/nn.cc b/nnvm/src/top/nn/nn.cc
	index 322d77b6..12146cca 100644
	--- a/nnvm/src/top/nn/nn.cc
	+++ b/nnvm/src/top/nn/nn.cc
	@@ -641,6 +641,31 @@ inline bool LayoutTransformInferShape(const NodeAttrs& attrs,
	return true;
	}

	+inline Array<Expr> layout_transform_func(const std::string& src, const std::string& dst, const Array<Var>& dst_indices) {
	+ Layout src_layout(src);
	import os

	import numpy as np

	import nnvm.testing
	import nnvm.compiler
	import tvm
	from tvm import autotvm
	from tvm.autotvm.tuner import XGBTuner, GATuner, RandomTuner, GridSearchTuner
	from tvm.contrib.util import tempdir
	Extract tasks...
	Tuning...
	[Task 1/19] Current/Best: 0.00/ 79.93 GFLOPS \| Progress: (484/1000) \| 2325.08 s Done.
	[Task 2/19] Current/Best: 7.72/ 13.22 GFLOPS \| Progress: (756/1000) \| 1381.61 s Done.
	[Task 3/19] Current/Best: 10.29/ 80.49 GFLOPS \| Progress: (440/1000) \| 1910.56 s Done.
	[Task 4/19] Current/Best: 2.18/ 6.38 GFLOPS \| Progress: (1000/1000) \| 1627.55 s Done.
	[Task 5/19] Current/Best: 13.69/ 116.10 GFLOPS \| Progress: (504/1000) \| 1762.37 s Done.
	[Task 6/19] Current/Best: 8.73/ 8.73 GFLOPS \| Progress: (36/1000) \| 66.31 sLLVM ERROR: Cannot select: 0x56385e2c8368: i32 = X86ISD::CMP 0x56385e2ca1e8, 0x56385e2c9550
	0x56385e2ca1e8: v16i1 = and 0x56385e202548, 0x56385e213948
	0x56385e202548: v16i1 = bitcast 0x56385e202a28
	diff --git a/src/arithmetic/const_fold.h b/src/arithmetic/const_fold.h
	index fbf8fe7e..1c397f40 100644
	--- a/src/arithmetic/const_fold.h
	+++ b/src/arithmetic/const_fold.h
	@@ -101,33 +101,28 @@ inline bool IsIndexType(const Type& type) {
	// specialization of constant folders.
	template<>
	inline Expr TryConstFold<ir::Add>(Expr a, Expr b) {
	- TVM_ARITH_CONST_PROPAGATION({
	+ TVM_INDEX_CONST_PROPAGATION({