tiandiao123 · January 23, 2022 22:45
diff --git a/tvm_load_test.py b/tvm_load_test.py
 import tvm
 from tvm import relay
 from tvm.relay.op.contrib.tensorrt import partition_for_tensorrt
 from tvm.contrib import graph_executor
 import numpy as np

 # 改成你自己的tvm .so 存储路径
 my_lib_saved_path = "/data00/cuiqing.li/xperf_workplace/xperf_tools/xperf_tools/xperf_pipeline/bytetuner/rh2_tvm_lab.vulgar.pipeline_1.4/deploy_batch_id_32.so"
 lib = tvm.runtime.load_module(my_lib_saved_path)

 target = "cuda"
 ctx = tvm.device(target, 0)
 module = graph_executor.GraphModule(lib["default"](ctx))

 # 改成模型本身的输入信息，如input nanes 还有对应的input shape， input dtype 的信息
 input_names = ["data"]
 input_shapes = [[32, 3, 224, 224]]
 dtypes = ["float32"]
 batch_size = int(input_shapes[0][0])
 for i in range(len(input_names)):
    input_name = input_names[i]
    input_shape = input_shapes[i]
    dtype = dtypes[i]
    data_tvm = tvm.nd.array(np.random.uniform(size = input_shape).astype(dtype), ctx)
    module.set_input(input_name, data_tvm)

 # get output of tvm
 module.run()
 out = module.get_output(0)
 print("warming up ... ")
 for i in range(10):
    module.run()

 print("Evaluate inference time cost...")
 ftimer = module.module.time_evaluator("run", ctx, repeat=10, min_repeat_ms=500)
 prof_res = np.array(ftimer().results) * 1e3  # convert to millisecond
 message = "Mean inference time (std dev): %.2f ms (%.2f ms)" % (np.mean(prof_res), np.std(prof_res))
 print(message)
 ms_to_s = np.mean(prof_res) * 0.001
 print("the QPS of tvm model is {} ".format(str(batch_size/ ms_to_s)))
	import tvm
	from tvm import relay
	from tvm.relay.op.contrib.tensorrt import partition_for_tensorrt
	from tvm.contrib import graph_executor
	import numpy as np

	# 改成你自己的tvm .so 存储路径
	my_lib_saved_path = "/data00/cuiqing.li/xperf_workplace/xperf_tools/xperf_tools/xperf_pipeline/bytetuner/rh2_tvm_lab.vulgar.pipeline_1.4/deploy_batch_id_32.so"
	lib = tvm.runtime.load_module(my_lib_saved_path)

	target = "cuda"
	ctx = tvm.device(target, 0)
	module = graph_executor.GraphModule(lib["default"](ctx))

	# 改成模型本身的输入信息，如input nanes 还有对应的input shape， input dtype 的信息
	input_names = ["data"]
	input_shapes = [[32, 3, 224, 224]]
	dtypes = ["float32"]
	batch_size = int(input_shapes[0][0])
	for i in range(len(input_names)):
	input_name = input_names[i]
	input_shape = input_shapes[i]
	dtype = dtypes[i]
	data_tvm = tvm.nd.array(np.random.uniform(size = input_shape).astype(dtype), ctx)
	module.set_input(input_name, data_tvm)

	# get output of tvm
	module.run()
	out = module.get_output(0)
	print("warming up ... ")
	for i in range(10):
	module.run()

	print("Evaluate inference time cost...")
	ftimer = module.module.time_evaluator("run", ctx, repeat=10, min_repeat_ms=500)
	prof_res = np.array(ftimer().results) * 1e3 # convert to millisecond
	message = "Mean inference time (std dev): %.2f ms (%.2f ms)" % (np.mean(prof_res), np.std(prof_res))
	print(message)
	ms_to_s = np.mean(prof_res) * 0.001
	print("the QPS of tvm model is {} ".format(str(batch_size/ ms_to_s)))