Skip to content

Instantly share code, notes, and snippets.

@zou3519
zou3519 / bench.py
Last active June 12, 2018 12:21
[pytorch] GridSampler CUDNN vs THCUNN performance comparision script
import time
import torch
import torch.backends.cudnn as cudnn
import torch.nn.functional as F
from torch.autograd import Variable
def benchmark_shape(N, C, IH, IW, H, W, nrand, nrep):
"""
@zou3519
zou3519 / output.txt
Last active September 18, 2017 15:04
[pytorch] GridSampler CUDNN vs THCUNN perf comparision
Testing small sizes
# benchmark_shape(N, C, IH, IW, H, W, nrand, nrep)
# benchmark_shape(10, 5, 20, 20, 15, 15, 5, 5)
Running CUDNN benchmark
2.23145103455
Running THCUNN benchmark
0.0197968482971
Testing small sizes, big N
# benchmark_shape(N, C, IH, IW, H, W, nrand, nrep)
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
class MNISTConvNet(nn.Module):
def __init__(self):
super(MNISTConvNet, self).__init__()
@zou3519
zou3519 / output.txt
Created October 19, 2017 00:03
NCCL_DEBUG=INFO nccl 2.0.5
pytorch-desktop:26456:26456 [0] misc/ibvwrap.cu:60 WARN Failed to open libibverbs.so[.1]
pytorch-desktop:26456:26456 [0] INFO Using internal Network Socket
NCCL version 2.0.5 compiled with CUDA 9.0
pytorch-desktop:26456:26456 [0] INFO NET : Using interface enp6s0:192.168.178.25<0>
pytorch-desktop:26456:26456 [0] INFO NET/Socket : 1 interfaces found
pytorch-desktop:26456:26456 [1] INFO nvmlDeviceGetNvLinkCapability() failed: Not Supported
pytorch-desktop:26456:26456 [1] INFO nvmlDeviceGetNvLinkCapability() failed: Not Supported
pytorch-desktop:26456:26456 [1] INFO nvmlDeviceGetNvLinkCapability() failed: Not Supported
pytorch-desktop:26456:26456 [1] INFO nvmlDeviceGetNvLinkCapability() failed: Not Supported
pytorch-desktop:26456:26456 [1] INFO nvmlDeviceGetNvLinkCapability() failed: Not Supported
@zou3519
zou3519 / output.txt
Created October 19, 2017 00:09
NCCL_DEBUG=INFO nccl 1.3.5
(root) pytorch@pytorch-desktop:~/multigpu-test$
(root) pytorch@pytorch-desktop:~/multigpu-test$ NCCL_DEBUG=INFO python test-simple.py
Checkpoint 1
Checkpoint 2
INFO NCCL debug level set to INFO
NCCL version 1.3.5 compiled with CUDA 9.0
INFO rank 0 using buffSize = 2097152
INFO rank 0 using device 0 (0000:0C:00.0)
INFO rank 1 using buffSize = 2097152
INFO rank 1 using device 1 (0000:0D:00.0)
@zou3519
zou3519 / hang.py
Created October 19, 2017 02:17
This script gets stuck, but only on some machines...
from torch import nn
from torch.autograd import Variable
import torch
l = nn.Linear(5,5).cuda()
pl = nn.DataParallel(l)
print("Checkpoint 1")
a = Variable(torch.rand(5,5).cuda(), requires_grad=True)
print("Checkpoint 2")
print(pl(a)) # Here it gets stuck
@zou3519
zou3519 / test.py
Created October 25, 2017 17:51
test scatter_add_ and index_add_ safety checks
import torch
def test_index_add(cuda):
a = torch.ones(5)
b = torch.ones(5)
i = torch.ones(5).long()
if cuda:
a = a.cuda()
b = b.cuda()
i = i.cuda()
import argparse
import gym
import numpy as np
from itertools import count
import torch
import torch.distributions as D
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
@zou3519
zou3519 / testvar.py
Last active November 6, 2017 20:10
Benchmarking inner dimension variance speed, before and after numerical stability changes
import torch
tensor = torch.randn(100).cuda()
%timeit tensor.var(0); torch.cuda.synchronize()
tensor = torch.randn(10000).cuda()
%timeit tensor.var(0); torch.cuda.synchronize()
tensor = torch.randn(1000, 2, 10).cuda()
%timeit tensor.var(2); torch.cuda.synchronize()
@zou3519
zou3519 / test_outervar.py
Created November 7, 2017 15:56
Numbers for variance of an outer dimension on cuda, before and after numeric stability changes
import torch
tensor = torch.randn(100, 1).cuda()
%timeit tensor.var(0); torch.cuda.synchronize()
tensor = torch.randn(10000, 1).cuda()
%timeit tensor.var(0); torch.cuda.synchronize()
tensor = torch.randn(1000, 2, 10).cuda()
%timeit tensor.var(1); torch.cuda.synchronize()