comaniac · February 17, 2021 21:38
diff --git a/eval_meta_subgraph.py b/eval_meta_subgraph.py
 import timeit

 import numpy as np
 import torch

 import tvm
 from tvm import auto_scheduler

 import mnm
 from mnm.testing.utils import ir_fusion, ir_simplify, get_vm_executor, get_vm_profiler
 from mnm.utils.tuner import run_tuning


 def randn(
    shape, *, ctx="cuda", dtype="float32", std=1.0, mean=0.0, requires_grad=False, positive=False
 ):
    if positive:
        x = np.abs(np.random.randn(*shape)) * std + mean
    else:
        x = np.random.randn(*shape) * std + mean
    if not isinstance(x, np.ndarray):
        x = np.array(x)
    assert list(x.shape) == list(shape)
    x = x.astype(dtype)
    m_x = mnm.array(x, ctx=ctx)
    if requires_grad:
        m_x.requires_grad = True
    t_x = torch.tensor(x, requires_grad=requires_grad, device=ctx)  # pylint: disable=not-callable
    return m_x, t_x


 def mul_conv2d_dw_add(ctx):
    """Task 83"""

    class Model(mnm.Model):
        def build(self, shape, stride, padding, dilation, groups, momentum):
            self.shape = shape
            self.stride = stride
            self.padding = padding
            self.dilation = dilation
            self.groups = groups
            self.momentum = mnm.array(momentum, dtype="float32", ctx="cpu")

        @mnm.model.trace
        def forward(self, x, y, dy, v):
            dw = mnm.conv2d_dw(
                x,
                y,
                dy,
                shape=self.shape,
                stride=self.stride,
                padding=self.padding,
                dilation=self.dilation,
                groups=self.groups,
            )
            new_v = mnm.add(mnm.multiply(self.momentum, v), dw)
            return new_v

    #x_shape = (32, 3, 32, 32)
    #y_shape = (32, 64, 32, 32)
    #dy_shape = (32, 64, 32, 32)
    #shape = (64, 32, 3, 3)
    x_shape = (32, 256, 32, 32)
    y_shape = (32, 128, 32, 32)
    dy_shape = (32, 128, 32, 32)
    shape = (128, 256, 1, 1)
    stride = 1
    padding = 0
    dilation = 1
    groups = 1
    m_model = Model(shape, stride, padding, dilation, groups, momentum=0.01)
    m_model.to(ctx=ctx)

    m_x, t_x = randn(x_shape, ctx=ctx, requires_grad=True)
    m_y, t_y = randn(y_shape, ctx=ctx, requires_grad=True)
    m_dy, t_dy = randn(dy_shape, ctx=ctx)
    m_v, t_v = randn(shape, ctx=ctx)
    return m_model, [m_x, m_y, m_dy, m_v]


 def conv2d_dx_relu_dx(ctx):
    class Model(mnm.Model):
        # fwd = x -> relu --(y_relu)-> conv -> y_conv
        # bwd = conv_dx -> relu_dx
        def build(self, shape, stride, padding, dilation, groups):
            self.shape = shape
            self.stride = stride
            self.padding = padding
            self.dilation = dilation
            self.groups = groups

        @mnm.model.trace
        def forward(self, x, y_relu, y_conv, w, dy_conv):
            dy_relu = mnm.conv2d_dx(
                w,
                y_conv,
                dy_conv,
                shape=self.shape,
                stride=self.stride,
                padding=self.padding,
                dilation=self.dilation,
                groups=self.groups,
            )
            dx = mnm.relu_dx(x, y_relu, dy_relu)
            return dx

    w_shape = (256, 256, 3, 3)
    y_shape = (32, 256, 8, 8)
    dy_shape = (32, 256, 8, 8)
    shape = (32, 256, 16, 16)
    x_shape = shape
    stride = 2
    padding = 1
    dilation = 1
    groups = 1
    m_model = Model(shape, stride, padding, dilation, groups)
    m_model.to(ctx=ctx)

    x, _ = randn(x_shape, ctx=ctx)
    y_relu, _ = randn(x_shape, ctx=ctx)  # y_relu = mnm.relu(x)
    w, _ = randn(w_shape, ctx=ctx)
    y_conv, _ = randn(y_shape, ctx=ctx)  # y_conv = mnm.conv2d(y_relu, w)
    dy_conv, _ = randn(dy_shape, ctx=ctx)
    return m_model, [x, y_relu, y_conv, w, dy_conv]


 def conv2d(ctx):
    """Task 7"""

    class Model(mnm.Model):
        def build(self, stride, padding, dilation, groups):
            self.stride = stride
            self.padding = padding
            self.dilation = dilation
            self.groups = groups

        @mnm.model.trace
        def forward(self, x, y):
            out = mnm.conv2d(
                x,
                y,
                stride=self.stride,
                padding=self.padding,
                dilation=self.dilation,
                groups=self.groups,
            )
            return out

    x_shape = (32, 256, 32, 32)
    y_shape = (128, 256, 1, 1)
    stride = 1
    padding = 0
    dilation = 1
    groups = 1
    m_model = Model(stride, padding, dilation, groups)
    m_model.to(ctx=ctx)

    m_x, _ = randn(x_shape, ctx=ctx, requires_grad=True)
    m_y, _ = randn(y_shape, ctx=ctx, requires_grad=True)
    return m_model, [m_x, m_y]


 def batch_norm(ctx):
    """Task ?"""

    class Model(mnm.Model):
        def build(self):
            pass

        @mnm.model.trace
        def forward(self, m_x, m_m, m_v, m_w, m_b):
            out = mnm.batch_norm_infer(m_x, m_m, m_v, m_w, m_b, 0.1, 1e-5)
            return out

    shape = (32, 128, 32, 32)
    s_shape = (shape[1],)

    m_m, _ = randn(s_shape, ctx=ctx)
    m_v, _ = randn(s_shape, ctx=ctx, positive=True)

    m_x, _ = randn(shape, ctx=ctx)
    m_w, _ = randn(s_shape, ctx=ctx)
    m_b, _ = randn(s_shape, ctx=ctx)

    m_model = Model()
    m_model.to(ctx=ctx)

    return m_model, [m_x, m_m, m_v, m_w, m_b]


 def batch_norm_train_dwxwb(ctx):
    """Task 66"""

    class Model(mnm.Model):
        def build(self):
            pass

        @mnm.model.trace
        def forward(self, m_y, m_x, m_w, m_b):
            out = mnm.batch_norm_train_dxwb(m_y, m_x, m_w, m_b, 1e-5)
            return out

    shape = (32, 128, 32, 32)
    s_shape = (shape[1],)

    m_y, _ = randn(shape, ctx=ctx)
    m_x, _ = randn(shape, ctx=ctx)
    m_w, _ = randn(s_shape, ctx=ctx)
    m_b, _ = randn(s_shape, ctx=ctx)

    m_model = Model()
    m_model.to(ctx=ctx)

    return m_model, [m_y, m_x, m_w, m_b]


 def profile_wkl(wkl_func, ctx, fuse, log_file, number=100, warmup=100):
    name = wkl_func.__qualname__
    m_model, inputs = wkl_func(ctx)

    def ir_optimizer(func):
        func = ir_simplify(func)
        if fuse:
            func = ir_fusion(func)
        return func

    print("Profiling workload %s with %s" % (name, "fused_tvmjit" if fuse else "unfused_cudnn"))

    print("Individual latency")

    def m_profiler():
        profiler, vm_inputs = get_vm_profiler(m_model, ctx, inputs, ir_optimizer)
        vm = profiler.make_executor(sch_file=log_file)
        # skip first several executions
        for _ in range(warmup):
            vm(*vm_inputs)

        profiler.reset()
        for _ in range(number):
            vm(*vm_inputs)
        print("\n{}".format(profiler.get_stat()))

    m_profiler()

    print("End-to-end latency")
    executor, vm_inputs = get_vm_executor(m_model, ctx, inputs, ir_optimizer, sch_file=log_file)

    def m_setup():
        # skip first several executions
        for _ in range(warmup):
            executor(*vm_inputs)

    def m_stmt():
        executor(*vm_inputs)

    m_time = (
        timeit.Timer(
            stmt="m_stmt()", setup="m_setup();", globals={"m_stmt": m_stmt, "m_setup": m_setup}
        ).timeit(number)
        / number
        * 1e3
    )
    print("Latency: %.2f ms" % m_time)


 if __name__ == "__main__":
    for fuse in [False]:
        profile_wkl(mul_conv2d_dw_add, "cuda", fuse, "sch_resnet_50.json")
        # profile_wkl(conv2d, "cuda", fuse, "sch_resnet_50.json")
        # profile_wkl(conv2d_dx_relu_dx, "cuda", fuse, "sch_resnet_50.json")
        # profile_wkl(batch_norm_train_dwxwb, "cuda", fuse, "sch_resnet_50.json")
        # profile_wkl(batch_norm, "cuda", fuse, "sch_resnet_50.json")
	import timeit

	import numpy as np
	import torch

	import tvm
	from tvm import auto_scheduler

	import mnm
	from mnm.testing.utils import ir_fusion, ir_simplify, get_vm_executor, get_vm_profiler
	from mnm.utils.tuner import run_tuning


	def randn(
	shape, *, ctx="cuda", dtype="float32", std=1.0, mean=0.0, requires_grad=False, positive=False
	):
	if positive:
	x = np.abs(np.random.randn(shape)) std + mean
	else:
	x = np.random.randn(shape) std + mean
	if not isinstance(x, np.ndarray):
	x = np.array(x)
	assert list(x.shape) == list(shape)
	x = x.astype(dtype)
	m_x = mnm.array(x, ctx=ctx)
	if requires_grad:
	m_x.requires_grad = True
	t_x = torch.tensor(x, requires_grad=requires_grad, device=ctx) # pylint: disable=not-callable
	return m_x, t_x


	def mul_conv2d_dw_add(ctx):
	"""Task 83"""

	class Model(mnm.Model):
	def build(self, shape, stride, padding, dilation, groups, momentum):
	self.shape = shape
	self.stride = stride
	self.padding = padding
	self.dilation = dilation
	self.groups = groups
	self.momentum = mnm.array(momentum, dtype="float32", ctx="cpu")

	@mnm.model.trace
	def forward(self, x, y, dy, v):
	dw = mnm.conv2d_dw(
	x,
	y,
	dy,
	shape=self.shape,
	stride=self.stride,
	padding=self.padding,
	dilation=self.dilation,
	groups=self.groups,
	)
	new_v = mnm.add(mnm.multiply(self.momentum, v), dw)
	return new_v

	#x_shape = (32, 3, 32, 32)
	#y_shape = (32, 64, 32, 32)
	#dy_shape = (32, 64, 32, 32)
	#shape = (64, 32, 3, 3)
	x_shape = (32, 256, 32, 32)
	y_shape = (32, 128, 32, 32)
	dy_shape = (32, 128, 32, 32)
	shape = (128, 256, 1, 1)
	stride = 1
	padding = 0
	dilation = 1
	groups = 1
	m_model = Model(shape, stride, padding, dilation, groups, momentum=0.01)
	m_model.to(ctx=ctx)

	m_x, t_x = randn(x_shape, ctx=ctx, requires_grad=True)
	m_y, t_y = randn(y_shape, ctx=ctx, requires_grad=True)
	m_dy, t_dy = randn(dy_shape, ctx=ctx)
	m_v, t_v = randn(shape, ctx=ctx)
	return m_model, [m_x, m_y, m_dy, m_v]


	def conv2d_dx_relu_dx(ctx):
	class Model(mnm.Model):
	# fwd = x -> relu --(y_relu)-> conv -> y_conv
	# bwd = conv_dx -> relu_dx
	def build(self, shape, stride, padding, dilation, groups):
	self.shape = shape
	self.stride = stride
	self.padding = padding
	self.dilation = dilation
	self.groups = groups

	@mnm.model.trace
	def forward(self, x, y_relu, y_conv, w, dy_conv):
	dy_relu = mnm.conv2d_dx(
	w,
	y_conv,
	dy_conv,
	shape=self.shape,
	stride=self.stride,
	padding=self.padding,
	dilation=self.dilation,
	groups=self.groups,
	)
	dx = mnm.relu_dx(x, y_relu, dy_relu)
	return dx

	w_shape = (256, 256, 3, 3)
	y_shape = (32, 256, 8, 8)
	dy_shape = (32, 256, 8, 8)
	shape = (32, 256, 16, 16)
	x_shape = shape
	stride = 2
	padding = 1
	dilation = 1
	groups = 1
	m_model = Model(shape, stride, padding, dilation, groups)
	m_model.to(ctx=ctx)

	x, _ = randn(x_shape, ctx=ctx)
	y_relu, _ = randn(x_shape, ctx=ctx) # y_relu = mnm.relu(x)
	w, _ = randn(w_shape, ctx=ctx)
	y_conv, _ = randn(y_shape, ctx=ctx) # y_conv = mnm.conv2d(y_relu, w)
	dy_conv, _ = randn(dy_shape, ctx=ctx)
	return m_model, [x, y_relu, y_conv, w, dy_conv]


	def conv2d(ctx):
	"""Task 7"""

	class Model(mnm.Model):
	def build(self, stride, padding, dilation, groups):
	self.stride = stride
	self.padding = padding
	self.dilation = dilation
	self.groups = groups

	@mnm.model.trace
	def forward(self, x, y):
	out = mnm.conv2d(
	x,
	y,
	stride=self.stride,
	padding=self.padding,
	dilation=self.dilation,
	groups=self.groups,
	)
	return out

	x_shape = (32, 256, 32, 32)
	y_shape = (128, 256, 1, 1)
	stride = 1
	padding = 0
	dilation = 1
	groups = 1
	m_model = Model(stride, padding, dilation, groups)
	m_model.to(ctx=ctx)

	m_x, _ = randn(x_shape, ctx=ctx, requires_grad=True)
	m_y, _ = randn(y_shape, ctx=ctx, requires_grad=True)
	return m_model, [m_x, m_y]


	def batch_norm(ctx):
	"""Task ?"""

	class Model(mnm.Model):
	def build(self):
	pass

	@mnm.model.trace
	def forward(self, m_x, m_m, m_v, m_w, m_b):
	out = mnm.batch_norm_infer(m_x, m_m, m_v, m_w, m_b, 0.1, 1e-5)
	return out

	shape = (32, 128, 32, 32)
	s_shape = (shape[1],)

	m_m, _ = randn(s_shape, ctx=ctx)
	m_v, _ = randn(s_shape, ctx=ctx, positive=True)

	m_x, _ = randn(shape, ctx=ctx)
	m_w, _ = randn(s_shape, ctx=ctx)
	m_b, _ = randn(s_shape, ctx=ctx)

	m_model = Model()
	m_model.to(ctx=ctx)

	return m_model, [m_x, m_m, m_v, m_w, m_b]


	def batch_norm_train_dwxwb(ctx):
	"""Task 66"""

	class Model(mnm.Model):
	def build(self):
	pass

	@mnm.model.trace
	def forward(self, m_y, m_x, m_w, m_b):
	out = mnm.batch_norm_train_dxwb(m_y, m_x, m_w, m_b, 1e-5)
	return out

	shape = (32, 128, 32, 32)
	s_shape = (shape[1],)

	m_y, _ = randn(shape, ctx=ctx)
	m_x, _ = randn(shape, ctx=ctx)
	m_w, _ = randn(s_shape, ctx=ctx)
	m_b, _ = randn(s_shape, ctx=ctx)

	m_model = Model()
	m_model.to(ctx=ctx)

	return m_model, [m_y, m_x, m_w, m_b]


	def profile_wkl(wkl_func, ctx, fuse, log_file, number=100, warmup=100):
	name = wkl_func.__qualname__
	m_model, inputs = wkl_func(ctx)

	def ir_optimizer(func):
	func = ir_simplify(func)
	if fuse:
	func = ir_fusion(func)
	return func

	print("Profiling workload %s with %s" % (name, "fused_tvmjit" if fuse else "unfused_cudnn"))

	print("Individual latency")

	def m_profiler():
	profiler, vm_inputs = get_vm_profiler(m_model, ctx, inputs, ir_optimizer)
	vm = profiler.make_executor(sch_file=log_file)
	# skip first several executions
	for _ in range(warmup):
	vm(*vm_inputs)

	profiler.reset()
	for _ in range(number):
	vm(*vm_inputs)
	print("\n{}".format(profiler.get_stat()))

	m_profiler()

	print("End-to-end latency")
	executor, vm_inputs = get_vm_executor(m_model, ctx, inputs, ir_optimizer, sch_file=log_file)

	def m_setup():
	# skip first several executions
	for _ in range(warmup):
	executor(*vm_inputs)

	def m_stmt():
	executor(*vm_inputs)

	m_time = (
	timeit.Timer(
	stmt="m_stmt()", setup="m_setup();", globals={"m_stmt": m_stmt, "m_setup": m_setup}
	).timeit(number)
	/ number
	* 1e3
	)
	print("Latency: %.2f ms" % m_time)


	if __name__ == "__main__":
	for fuse in [False]:
	profile_wkl(mul_conv2d_dw_add, "cuda", fuse, "sch_resnet_50.json")
	# profile_wkl(conv2d, "cuda", fuse, "sch_resnet_50.json")
	# profile_wkl(conv2d_dx_relu_dx, "cuda", fuse, "sch_resnet_50.json")
	# profile_wkl(batch_norm_train_dwxwb, "cuda", fuse, "sch_resnet_50.json")
	# profile_wkl(batch_norm, "cuda", fuse, "sch_resnet_50.json")