This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
import torch | |
pt_dtype_mappings = { | |
"float": torch.float, | |
"half": torch.half, | |
"float16": torch.float16, | |
"bfloat16": torch.bfloat16, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Licensed to the Apache Software Foundation (ASF) under one | |
# or more contributor license agreements. See the NOTICE file | |
# distributed with this work for additional information | |
# regarding copyright ownership. The ASF licenses this file | |
# to you under the Apache License, Version 2.0 (the | |
# "License"); you may not use this file except in compliance | |
# with the License. You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import tvm | |
from tvm.contrib import graph_runtime | |
from tvm.contrib.util import tempdir | |
from tvm import autotvm | |
from tvm import relay | |
import tvm.relay.testing | |
#import mxnet |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
extern "C" __global__ void tvmop_kernel0( float* __restrict__ buffer, float* __restrict__ buffer1, float* __restrict__ buffer2, int tindex, int tindex1, int tindex2, int stride, int stride1, int stride2, int stride3, int stride4, int stride5, int stride6, int stride7, int stride8) { | |
if (((((((int)blockIdx.x) * 64) + ((int)threadIdx.x)) / tindex) / tindex1) < tindex2) { | |
if (((((((int)blockIdx.x) * 64) + ((int)threadIdx.x)) / tindex) % tindex1) < tindex1) { | |
if ((((((int)blockIdx.x) * 64) + ((int)threadIdx.x)) % tindex) < tindex) { | |
if ((((((int)blockIdx.x) * 64) + ((int)threadIdx.x)) / tindex) < (tindex2 * tindex1)) { | |
if (((((int)blockIdx.x) * 64) + ((int)threadIdx.x)) < ((tindex2 * tindex1) * tindex)) { | |
if (((((((int)blockIdx.x) * 64) + ((int)threadIdx.x)) / tindex) / tindex1) < tindex2) { | |
if (0 <= ((((((int)blockIdx.x) * 64) + ((int)threadIdx.x)) / tindex) % tindex1)) { | |
if (((((((int)blockIdx.x) * 64) + ((int)threadIdx.x)) / tindex) % ti |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
extern "C" __global__ void tvmop_kernel0( float* __restrict__ buffer, float* __restrict__ buffer1, float* __restrict__ buffer2, int tindex, int tindex1, int tindex2, int stride, int stride1, int stride2, int stride3, int stride4, int stride5, int stride6, int stride7, int stride8) { | |
if (((int)blockIdx.x) < (((tindex * tindex1) * tindex2) >> 6)) { | |
if (((((tindex1 >= 0) && ((((((tindex2 >= 0) && ((((((int)blockIdx.x) * 64) + ((int)threadIdx.x)) % tindex2) >= 0)) || ((tindex2 < 0) && ((((((int)blockIdx.x) * 64) + ((int)threadIdx.x)) % tindex2) <= 0))) ? (((((int)blockIdx.x) * 64) + ((int)threadIdx.x)) / tindex2) : ((((((int)blockIdx.x) * 64) + ((int)threadIdx.x)) / tindex2) - 1)) % tindex1) >= 0)) || ((tindex1 < 0) && ((((((tindex2 >= 0) && ((((((int)blockIdx.x) * 64) + ((int)threadIdx.x)) % tindex2) >= 0)) || ((tindex2 < 0) && ((((((int)blockIdx.x) * 64) + ((int)threadIdx.x)) % tindex2) <= 0))) ? (((((int)blockIdx.x) * 64) + ((int)threadIdx.x)) / tindex2) : ((((((int)blockIdx.x) * 64) + ((int)threadIdx. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import tvm | |
import topi | |
from topi.util import get_const_tuple | |
import numpy as np | |
from topi.nn.pad import pad | |
# on a2: python3 -m tvm.exec.rpc_server --port=8499 | |
# target = 'llvm -mcpu=core-avx2' | |
# target = 'llvm -device=arm_cpu -target=aarch64-linux-gnu -mattr=+v8.4a,+fp16fml,+fullfp16' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/src/arithmetic/const_fold.h b/src/arithmetic/const_fold.h | |
index fbf8fe7e..1c397f40 100644 | |
--- a/src/arithmetic/const_fold.h | |
+++ b/src/arithmetic/const_fold.h | |
@@ -101,33 +101,28 @@ inline bool IsIndexType(const Type& type) { | |
// specialization of constant folders. | |
template<> | |
inline Expr TryConstFold<ir::Add>(Expr a, Expr b) { | |
- TVM_ARITH_CONST_PROPAGATION({ | |
+ TVM_INDEX_CONST_PROPAGATION({ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{"i": ["llvm -mcpu=skylake-avx512", "topi_nn_conv2d", [["TENSOR", [1, 1024, 7, 7], "float32"], ["TENSOR", [1024, 1024, 1, 1], "float32"], [1, 1], [0, 0], "NCHW", "float32"], {}, ["conv2d", [1, 1024, 7, 7, "float32"], [1024, 1024, 1, 1, "float32"], [1, 1], [0, 0], "NCHW", "float32"], {"i": 417, "c": null, "e": [["tile_ic", "sp", [1, 1024]], ["tile_oc", "sp", [64, 16]], ["tile_ow", "sp", [1, 7]], ["tile_oh", "ot", 2]], "t": "direct"}], "r": [[0.00010386519659715739], 0, 1.2961008548736572, 1541133775.742406], "v": 0.1} | |
{"i": ["llvm -mcpu=skylake-avx512", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 1024, 7, 7], "float32"], ["TENSOR", [1024, 1, 3, 3], "float32"], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 1024, 7, 7, "float32"], [1024, 1, 3, 3, "float32"], [1, 1], [1, 1], "float32"], {"i": 777271, "c": null, "e": [["tile_co", "sp", [1024, 1]], ["tile_oh", "sp", [1, 7]], ["tile_ow", "sp", [7, 1]], ["reorder_0", "re", [0, 1, 2, 3, 4, 5, 8, 6, 7]], ["reorder_1", "re", [0, 1, 2, 3, 4, 5, 6]], |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Extract tasks... | |
Tuning... | |
[Task 1/19] Current/Best: 0.00/ 79.93 GFLOPS | Progress: (484/1000) | 2325.08 s Done. | |
[Task 2/19] Current/Best: 7.72/ 13.22 GFLOPS | Progress: (756/1000) | 1381.61 s Done. | |
[Task 3/19] Current/Best: 10.29/ 80.49 GFLOPS | Progress: (440/1000) | 1910.56 s Done. | |
[Task 4/19] Current/Best: 2.18/ 6.38 GFLOPS | Progress: (1000/1000) | 1627.55 s Done. | |
[Task 5/19] Current/Best: 13.69/ 116.10 GFLOPS | Progress: (504/1000) | 1762.37 s Done. | |
[Task 6/19] Current/Best: 8.73/ 8.73 GFLOPS | Progress: (36/1000) | 66.31 sLLVM ERROR: Cannot select: 0x56385e2c8368: i32 = X86ISD::CMP 0x56385e2ca1e8, 0x56385e2c9550 | |
0x56385e2ca1e8: v16i1 = and 0x56385e202548, 0x56385e213948 | |
0x56385e202548: v16i1 = bitcast 0x56385e202a28 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{"i": ["llvm -mcpu=skylake-avx512", "topi_nn_conv2d", [["TENSOR", [1, 1024, 7, 7], "float32"], ["TENSOR", [1024, 1024, 1, 1], "float32"], [1, 1], [0, 0], "NCHW", "float32"], {}, ["conv2d", [1, 1024, 7, 7, "float32"], [1024, 1024, 1, 1, "float32"], [1, 1], [0, 0], "NCHW", "float32"], {"i": 416, "c": null, "e": [["tile_ic", "sp", [2, 512]], ["tile_oc", "sp", [64, 16]], ["tile_ow", "sp", [1, 7]], ["tile_oh", "ot", 2]], "t": "direct"}], "r": [[0.0012855558738853504], 0, 1.3173747062683105, 1541097220.10334], "v": 0.1} | |
{"i": ["llvm -mcpu=skylake-avx512", "topi_nn_depthwise_conv2d_nchw", [["TENSOR", [1, 1024, 7, 7], "float32"], ["TENSOR", [1024, 1, 3, 3], "float32"], [1, 1], [1, 1], "float32"], {}, ["depthwise_conv2d_nchw", [1, 1024, 7, 7, "float32"], [1024, 1, 3, 3, "float32"], [1, 1], [1, 1], "float32"], {"i": 354673, "c": null, "e": [["tile_co", "sp", [1024, 1]], ["tile_oh", "sp", [1, 7]], ["tile_ow", "sp", [1, 7]], ["reorder_0", "re", [0, 1, 2, 3, 4, 5, 6, 7, 8]], ["reorder_1", "re", [0, 1, 2, 3, 6, 4, 5]], ["a |
NewerOlder