vfdev-5 · October 25, 2021 09:25
diff --git a/b1.log b/b1.log
 Torch config: PyTorch built with:
  - GCC 7.3
  - C++ Version: 201402
  - Intel(R) oneAPI Math Kernel Library Version 2021.3-Product Build 20210617 for Intel(R) 64 architecture applications
  - Intel(R) MKL-DNN v2.2.3 (Git Hash 7336ca9f055cf1bfa13efb658fe15dc9b41f0740)
  - OpenMP 201511 (a.k.a. OpenMP 4.5)
  - LAPACK is enabled (usually provided by MKL)
  - NNPACK is enabled
  - CPU capability usage: AVX2
  - CUDA Runtime 11.1
  - NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_61,code=sm_61;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_37,code=compute_37
  - CuDNN 8.0.5
  - Magma 2.5.2
  - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=11.1, CUDNN_VERSION=8.0.5, CXX_COMPILER=/opt/rh/devtoolset-7/root/usr/bin/c++, CXX_FLAGS= -Wno-deprecated -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -fopenmp -DNDEBUG -DUSE_KINETO -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -DEDGE_PROFILER_USE_KINETO -O2 -fPIC -Wno-narrowing -Wall -Wextra -Werror=return-type -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-sign-compare -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-unused-local-typedefs -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_VERSION=1.11.0, USE_CUDA=ON, USE_CUDNN=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=ON, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF, 

 Num threads: 1
 Torch version: 1.11.0.dev20211013
 Torchvision version: 0.12.0a0
 PIL version: 8.3.2
 [----------------------------- Resize -----------------------------]
                         |  Transform on PIL  |  Transform on Tensor
 1 threads: ---------------------------------------------------------
      torch.uint8 / RGB  |       1752.1       |         1839.4      
      torch.float32 / F  |        902.9       |          887.2      

 Times are in microseconds (us).

 [---------------------- RandomHorizontalFlip ----------------------]
                         |  Transform on PIL  |  Transform on Tensor
 1 threads: ---------------------------------------------------------
      torch.uint8 / RGB  |        62.9        |         390.2       
      torch.float32 / F  |        63.1        |          54.2       

 Times are in microseconds (us).

 [----------------------- RandomResizedCrop ------------------------]
                         |  Transform on PIL  |  Transform on Tensor
 1 threads: ---------------------------------------------------------
      torch.uint8 / RGB  |       1092.5       |         1396.3      
      torch.float32 / F  |        606.9       |          362.1      

 Times are in microseconds (us).

 [-------------------------- RandAugment ---------------------------]
                         |  Transform on PIL  |  Transform on Tensor
 1 threads: ---------------------------------------------------------
      torch.uint8 / RGB  |        1.0         |          2.0        

 Times are in milliseconds (ms).

 [------------------------- ImageNet train -------------------------]
                         |  Transform on PIL  |  Transform on Tensor
 1 threads: ---------------------------------------------------------
      torch.uint8 / RGB  |        2.5         |          2.0        

 Times are in milliseconds (ms).

diff --git a/main.py b/main.py
 from collections import namedtuple

 import PIL
 from PIL import Image

 import torch
 import torch.utils.benchmark as benchmark
 import torchvision
 import torchvision.transforms as T
 import torchvision.transforms.functional as F


 BTransform = namedtuple("BTransform", ["op", "input_size", "name", "supported_dtypes"])


 transforms = [
    BTransform(op=T.Resize([256, 256], interpolation=T.InterpolationMode.BILINEAR), input_size=[500, 500], name=None, supported_dtypes=None),
    BTransform(op=T.RandomHorizontalFlip(p=1.0), input_size=[256, 256], name=None, supported_dtypes=None),
    BTransform(op=T.RandomResizedCrop(224), input_size=[500, 500], name=None, supported_dtypes=None),
    BTransform(op=T.autoaugment.RandAugment(), input_size=[224, 224], name=None, supported_dtypes=[torch.uint8, ]),

    # ImageNet train preset:
    BTransform(op=T.Compose([
        T.RandomResizedCrop(224),
        T.RandomHorizontalFlip(p=0.5),
        lambda x: F.pil_to_tensor(x) if isinstance(x, PIL.Image.Image) else x,
        T.ConvertImageDtype(torch.float),
        T.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
    ]), name="ImageNet train", input_size=[500, 500], supported_dtypes=[torch.uint8, ]),
 ]


 def run_bench(t):

    min_run_time = 2

    transform = t.op
    label = transform.__class__.__name__ if t.name is None else t.name

    results = []
    for dtype in [torch.uint8, torch.float32]:

        if t.supported_dtypes is not None and dtype not in t.supported_dtypes:
            continue

        c = 3
        mode = "RGB"
        if dtype == torch.float32:
            c = 1
            mode = "F"

        size = [c, ] + t.input_size
        tensor = torch.randint(0, 256, size=size, dtype=dtype)
        data = tensor.permute(1, 2, 0).contiguous().cpu().numpy()

        if dtype == torch.float32:
            pil_img = Image.fromarray(data[..., 0], mode=mode)
        else:
            pil_img = Image.fromarray(data, mode=mode)

        sub_label = f"{dtype} / {mode}"

        results += [
            # With Pillow
            benchmark.Timer(
                stmt="t(x)",
                globals={
                    "x": pil_img,
                    "t": transform,
                },
                num_threads=torch.get_num_threads(),
                label=label,
                sub_label=sub_label,
                description=f"Transform on PIL",
            ).blocked_autorange(min_run_time=min_run_time),
            # With tensor
            benchmark.Timer(
                stmt="t(x)",
                globals={
                    "x": tensor,
                    "t": transform,
                },
                num_threads=torch.get_num_threads(),
                label=label,
                sub_label=sub_label,
                description=f"Transform on Tensor",
            ).blocked_autorange(min_run_time=min_run_time),
        ]

    return results


 def main():

    all_results = []
    for t in transforms:
        all_results += run_bench(t)
    compare = benchmark.Compare(all_results)
    compare.print()




 if __name__ == "__main__":

    print(f"Torch config: {torch.__config__.show()}")
    print(f"Num threads: {torch.get_num_threads()}")
    print(f"Torch version: {torch.__version__}")
    print(f"Torchvision version: {torchvision.__version__}")
    print(f"PIL version: {PIL.__version__}")

    main()
	Torch config: PyTorch built with:
	- GCC 7.3
	- C++ Version: 201402
	- Intel(R) oneAPI Math Kernel Library Version 2021.3-Product Build 20210617 for Intel(R) 64 architecture applications
	- Intel(R) MKL-DNN v2.2.3 (Git Hash 7336ca9f055cf1bfa13efb658fe15dc9b41f0740)
	- OpenMP 201511 (a.k.a. OpenMP 4.5)
	- LAPACK is enabled (usually provided by MKL)
	- NNPACK is enabled
	- CPU capability usage: AVX2
	- CUDA Runtime 11.1
	- NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_61,code=sm_61;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_37,code=compute_37
	- CuDNN 8.0.5
	- Magma 2.5.2
	- Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=11.1, CUDNN_VERSION=8.0.5, CXX_COMPILER=/opt/rh/devtoolset-7/root/usr/bin/c++, CXX_FLAGS= -Wno-deprecated -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -fopenmp -DNDEBUG -DUSE_KINETO -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -DEDGE_PROFILER_USE_KINETO -O2 -fPIC -Wno-narrowing -Wall -Wextra -Werror=return-type -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-sign-compare -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-unused-local-typedefs -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_VERSION=1.11.0, USE_CUDA=ON, USE_CUDNN=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=ON, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF,

	Num threads: 1
	Torch version: 1.11.0.dev20211013
	Torchvision version: 0.12.0a0
	PIL version: 8.3.2
	[----------------------------- Resize -----------------------------]
	\| Transform on PIL \| Transform on Tensor
	1 threads: ---------------------------------------------------------
	torch.uint8 / RGB \| 1752.1 \| 1839.4
	torch.float32 / F \| 902.9 \| 887.2

	Times are in microseconds (us).

	[---------------------- RandomHorizontalFlip ----------------------]
	\| Transform on PIL \| Transform on Tensor
	1 threads: ---------------------------------------------------------
	torch.uint8 / RGB \| 62.9 \| 390.2
	torch.float32 / F \| 63.1 \| 54.2

	Times are in microseconds (us).

	[----------------------- RandomResizedCrop ------------------------]
	\| Transform on PIL \| Transform on Tensor
	1 threads: ---------------------------------------------------------
	torch.uint8 / RGB \| 1092.5 \| 1396.3
	torch.float32 / F \| 606.9 \| 362.1

	Times are in microseconds (us).

	[-------------------------- RandAugment ---------------------------]
	\| Transform on PIL \| Transform on Tensor
	1 threads: ---------------------------------------------------------
	torch.uint8 / RGB \| 1.0 \| 2.0

	Times are in milliseconds (ms).

	[------------------------- ImageNet train -------------------------]
	\| Transform on PIL \| Transform on Tensor
	1 threads: ---------------------------------------------------------
	torch.uint8 / RGB \| 2.5 \| 2.0

	Times are in milliseconds (ms).
	from collections import namedtuple

	import PIL
	from PIL import Image

	import torch
	import torch.utils.benchmark as benchmark
	import torchvision
	import torchvision.transforms as T
	import torchvision.transforms.functional as F


	BTransform = namedtuple("BTransform", ["op", "input_size", "name", "supported_dtypes"])


	transforms = [
	BTransform(op=T.Resize([256, 256], interpolation=T.InterpolationMode.BILINEAR), input_size=[500, 500], name=None, supported_dtypes=None),
	BTransform(op=T.RandomHorizontalFlip(p=1.0), input_size=[256, 256], name=None, supported_dtypes=None),
	BTransform(op=T.RandomResizedCrop(224), input_size=[500, 500], name=None, supported_dtypes=None),
	BTransform(op=T.autoaugment.RandAugment(), input_size=[224, 224], name=None, supported_dtypes=[torch.uint8, ]),

	# ImageNet train preset:
	BTransform(op=T.Compose([
	T.RandomResizedCrop(224),
	T.RandomHorizontalFlip(p=0.5),
	lambda x: F.pil_to_tensor(x) if isinstance(x, PIL.Image.Image) else x,
	T.ConvertImageDtype(torch.float),
	T.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
	]), name="ImageNet train", input_size=[500, 500], supported_dtypes=[torch.uint8, ]),
	]


	def run_bench(t):

	min_run_time = 2

	transform = t.op
	label = transform.__class__.__name__ if t.name is None else t.name

	results = []
	for dtype in [torch.uint8, torch.float32]:

	if t.supported_dtypes is not None and dtype not in t.supported_dtypes:
	continue

	c = 3
	mode = "RGB"
	if dtype == torch.float32:
	c = 1
	mode = "F"

	size = [c, ] + t.input_size
	tensor = torch.randint(0, 256, size=size, dtype=dtype)
	data = tensor.permute(1, 2, 0).contiguous().cpu().numpy()

	if dtype == torch.float32:
	pil_img = Image.fromarray(data[..., 0], mode=mode)
	else:
	pil_img = Image.fromarray(data, mode=mode)

	sub_label = f"{dtype} / {mode}"

	results += [
	# With Pillow
	benchmark.Timer(
	stmt="t(x)",
	globals={
	"x": pil_img,
	"t": transform,
	},
	num_threads=torch.get_num_threads(),
	label=label,
	sub_label=sub_label,
	description=f"Transform on PIL",
	).blocked_autorange(min_run_time=min_run_time),
	# With tensor
	benchmark.Timer(
	stmt="t(x)",
	globals={
	"x": tensor,
	"t": transform,
	},
	num_threads=torch.get_num_threads(),
	label=label,
	sub_label=sub_label,
	description=f"Transform on Tensor",
	).blocked_autorange(min_run_time=min_run_time),
	]

	return results


	def main():

	all_results = []
	for t in transforms:
	all_results += run_bench(t)
	compare = benchmark.Compare(all_results)
	compare.print()




	if __name__ == "__main__":

	print(f"Torch config: {torch.__config__.show()}")
	print(f"Num threads: {torch.get_num_threads()}")
	print(f"Torch version: {torch.__version__}")
	print(f"Torchvision version: {torchvision.__version__}")
	print(f"PIL version: {PIL.__version__}")

	main()