braun-steven · January 20, 2022 06:12
diff --git a/results.txt b/results.txt
 [---- F.softmax(x, dim) -----]
             |   cpu   |  cuda
 1 threads: -------------------
      dim=0  |  197.1  |   9.1
      dim=1  |  193.6  |  11.9
      dim=2  |  210.2  |  11.5
      dim=3  |  228.8  |   3.4

 Times are in milliseconds (ms).

 [---- torch.sum(x, dim) ----]
             |  cpu   |  cuda
 1 threads: ------------------
      dim=0  |  24.6  |  1.6
      dim=1  |  23.2  |  1.6
      dim=2  |  19.4  |  1.6
      dim=3  |  14.6  |  2.6

 Times are in milliseconds (ms).

 [---- torch.min(x, dim) -----]
             |   cpu   |  cuda
 1 threads: -------------------
      dim=0  |  113.6  |  1.7
      dim=1  |  108.9  |  1.7
      dim=2  |  120.2  |  1.7
      dim=3  |   78.3  |  4.4

 Times are in milliseconds (ms).
diff --git a/torch_dimension_performance.py b/torch_dimension_performance.py
 #!/usr/bin/env python3

 import torch.nn.functional as F
 import torch.utils.benchmark as benchmark
 import torch

 results = []

 # Example tensor
 x = torch.randn(100, 100, 100, 100)

 # Evaluate the function over each dimension
 for i in range(x.dim()):
    # Define different functions
    for stmt in ["F.softmax(x, dim=i)", "torch.sum(x, dim=i)", "torch.min(x, dim=i)"]:
        # Evaluate for cpu/cuda
        for device in ["cpu", "cuda"]:
            results.append(
                benchmark.Timer(
                    stmt=stmt,
                    globals={
                        "x": x.to(device),
                        "i": i,
                        "torch": torch,
                        "F": F,
                    },
                    label=stmt.replace("=i", ""),
                    sub_label=f"dim={i}",
                    description=device,
                ).blocked_autorange(min_run_time=1)
            )

 # Summarize
 compare = benchmark.Compare(results)
 compare.colorize()
 compare.print()
	[---- F.softmax(x, dim) -----]
	\| cpu \| cuda
	1 threads: -------------------
	dim=0 \| 197.1 \| 9.1
	dim=1 \| 193.6 \| 11.9
	dim=2 \| 210.2 \| 11.5
	dim=3 \| 228.8 \| 3.4

	Times are in milliseconds (ms).

	[---- torch.sum(x, dim) ----]
	\| cpu \| cuda
	1 threads: ------------------
	dim=0 \| 24.6 \| 1.6
	dim=1 \| 23.2 \| 1.6
	dim=2 \| 19.4 \| 1.6
	dim=3 \| 14.6 \| 2.6

	Times are in milliseconds (ms).

	[---- torch.min(x, dim) -----]
	\| cpu \| cuda
	1 threads: -------------------
	dim=0 \| 113.6 \| 1.7
	dim=1 \| 108.9 \| 1.7
	dim=2 \| 120.2 \| 1.7
	dim=3 \| 78.3 \| 4.4

	Times are in milliseconds (ms).
	#!/usr/bin/env python3

	import torch.nn.functional as F
	import torch.utils.benchmark as benchmark
	import torch

	results = []

	# Example tensor
	x = torch.randn(100, 100, 100, 100)

	# Evaluate the function over each dimension
	for i in range(x.dim()):
	# Define different functions
	for stmt in ["F.softmax(x, dim=i)", "torch.sum(x, dim=i)", "torch.min(x, dim=i)"]:
	# Evaluate for cpu/cuda
	for device in ["cpu", "cuda"]:
	results.append(
	benchmark.Timer(
	stmt=stmt,
	globals={
	"x": x.to(device),
	"i": i,
	"torch": torch,
	"F": F,
	},
	label=stmt.replace("=i", ""),
	sub_label=f"dim={i}",
	description=device,
	).blocked_autorange(min_run_time=1)
	)

	# Summarize
	compare = benchmark.Compare(results)
	compare.colorize()
	compare.print()