ptrblck

Deep Learning Frameworks @NVIDIA

ptrblck / profile_conv3d

Created August 16, 2019 14:57

	import time
	import torch
	import torch.nn as nn



	def test(cudnn, benchmark, dtype):
	print('cudnn {}, benchmark {}, dtype {}'.format(cudnn, benchmark, dtype))
	torch.backends.cudnn.enabled = cudnn
	torch.backends.cudnn.benchmark = benchmark

ptrblck / layernorm_vs_fused

Created August 30, 2019 15:10

	import torch
	import torch.nn as nn

	torch.backends.cudnn.benchmark = True

	from apex.normalization import FusedLayerNorm

	import time

ptrblck / hessian_vector.nvprof

Last active September 23, 2019 17:37

This file has been truncated, but you can view the full file.

View raw

ptrblck / pytorch_full_update_vs_frozen

Created October 6, 2019 19:01

	import torch
	import torch.nn as nn
	import torchvision.models as models
	import time

	# Create dummy data
	data = torch.randn(1, 3, 224, 224, device='cuda')
	target = torch.randint(0, 100, (1,), device='cuda')

	model = models.resnet152()

ptrblck / gist:331d0e5087b3aef199020c32cba2f3c4

Created November 25, 2019 08:14

pytorch_cuda_pow_test

	import torch
	import torch.nn as nn

	import time

	torch.backends.cudnn.benachmark = True

	a = torch.randn(1024, 1024, 10).cuda()
	b = torch.randn(1024, 1024, 10).cuda()

ptrblck / pytorch_conv3d_smallkernel_profile

Created January 26, 2020 05:27

	import torch
	import torch.nn.functional as F
	import time


	def test(input, kernel, target):
	# Warmup
	for _ in range(50):
	output = F.conv3d(input, kernel)

ptrblck / pytorch_lstm_ddp

Created February 23, 2020 02:18

ptrblck / sync_bn_vanilla_apex

Created April 8, 2020 10:27

	import torch
	import torch.nn as nn
	from torch.nn.parallel import DistributedDataParallel as DDP
	from apex.parallel import SyncBatchNorm as ApexSyncBatchNorm

	import argparse

	parser = argparse.ArgumentParser()
	parser.add_argument('--local_rank', type=int, default=0)
	parser.add_argument('--apex', action='store_true')

ptrblck / layer_norm_nvfuser.py

Created July 5, 2022 23:23

	import time
	import torch
	import torch.nn as nn
	import torch.nn.functional as F


	class LayerNorm(nn.Module):
	def __init__(self, normalized_shape, eps=1e-6):
	super().__init__()
	self.weight = nn.Parameter(torch.ones(normalized_shape))

ptrblck / scatte_reduce_example

Created July 23, 2022 21:00

	# for https://twitter.com/francoisfleuret/status/1550886362815012865

	import torch

	# setup
	N, Q, R = 5, 20, 10

	U = torch.randn(N, Q)
	V = torch.arange(N*R).view(N, R).float()