Skip to content

Instantly share code, notes, and snippets.

import numpy as np
import tvm
from tvm import auto_scheduler, te, topi
from tvm.te import schedule
# The last layer in resnet
H, W, CO, CI, KH, KW, strides, padding = 7, 7, 512, 512, 3, 3, (1, 1), (1, 1)
def conv2d(N, H, W, CO, CI, KH, KW, stride, padding):

Lorien: A Hyper-Automated Tuning System for Tensor Operators

Lorien is a system built on the top of TVM to massively explore/benchmark the best schedule configs of TOPI schedules.

Motivation

Although TVM already has a TOPI (TVM Operator Inventory) with the implementations of algorithm and schedules for commonly used operators such as conv2d and dense, there is a challenge makes TOPI hard to be improved efficiently.

import numpy as np
import tvm
from tvm import relay
from tvm.runtime.vm import VirtualMachine
target = "cuda"
data_shape = (relay.Any(), 3, 224, 224)
weight_shape = (32, 3, 3, 3)
"""BYOC Demo using TensorRT."""
# pylint: disable=invalid-name,redefined-outer-name,missing-function-docstring
# config.cmake
# set(USE_TENSORRT_CODEGEN ON)
# set(USE_TENSORRT_RUNTIME ON)
# Add TensorRT to LD_LIBRARY_PATH if use tarball.
# export LD_LIBRARY_PATH=/path/to/tensorrt/lib:$LD_LIBRARY_PATH
import time
import logging
import numpy as np
import tvm
from tvm import relay, te, topi, transform, auto_scheduler
from tvm.contrib import graph_runtime
from tvm.relay.backend import compile_engine
# logging.basicConfig(level=logging.INFO)
extracted_reduction_ax0, extracted_reduction_ax1, extracted_reduction_ax2, extracted_reduction_ax3, extracted_reduction_n0_n0_k1_shifted_shifted, extracted_reduction_n1_n1_k2_shifted_shifted, extracted_reduction_n2_n2_k3_shifted_shifted = tuple(extracted_reduction.op.axis) + tuple(extracted_reduction.op.reduce_axis)
pad_temp_data_grad_ax0, pad_temp_data_grad_ax1, pad_temp_data_grad_ax2, pad_temp_data_grad_ax3 = tuple(pad_temp_data_grad.op.axis) + tuple(pad_temp_data_grad.op.reduce_axis)
pad_temp_i0, pad_temp_i1, pad_temp_i2, pad_temp_i3 = tuple(pad_temp.op.axis) + tuple(pad_temp.op.reduce_axis)
compute_kernel_grad_ax0, compute_kernel_grad_ax1, compute_kernel_grad_ax2, compute_kernel_grad_ax3, compute_kernel_grad_n0_n0_k0_shifted_shifted, compute_kernel_grad_n1_n1_k2_shifted_shifted, compute_kernel_grad_n2_n2_k3_shifted_shifted = tuple(compute_kernel_grad.op.axis) + tuple(compute_kernel_grad.op.reduce_axis)
compute_kernel_grad_local, = s.cache_write([compute_kernel_grad], "local")
compute_kernel_grad_local_ax0
import os
import numpy as np
import logging
import tvm
from tvm import auto_scheduler, te, topi
from tvm.topi.nn.util import get_pad_tuple
from tvm.auto_scheduler.compute_dag import ComputeDAG
import numpy as np
import tvm
from tvm import auto_scheduler, te, topi
# The last layer in resnet
H, W, CO, CI, KH, KW, strides, padding = 7, 7, 512, 512, 3, 3, (1, 1), (1, 1)
def conv2d_diff(N, H, W, CO, CI, KH, KW, stride, padding):
data = te.placeholder((N, CI, H, W), name="data")
import numpy as np
import tvm
from tvm import relay
from tvm.autotvm.graph_tuner import DPTuner
from tvm.contrib import graph_runtime
import torch
import torchvision
import numpy as np
import tvm
from tvm import relay
from tvm.contrib import graph_runtime
from tvm.relay.backend import compile_engine
import gluoncv as gcv
model_name = 'MobileNet1.0'