Skip to content

Instantly share code, notes, and snippets.

@prerakmody
Last active September 11, 2020 17:11
Show Gist options
  • Save prerakmody/059a6dd666072a0c62a306138782b8e5 to your computer and use it in GitHub Desktop.
Save prerakmody/059a6dd666072a0c62a306138782b8e5 to your computer and use it in GitHub Desktop.
PyTorch Hacks
import torch
import torchsummary
torch.set_default_tensor_type(torch.FloatTensor)
from pynvml import *
try: nvmlInit()
except:pass
class VoxelNetFeat(torch.nn.Module):
def __init__(self, verbose=False):
super(VoxelNetFeat, self).__init__()
self.verbose = verbose
self.backbone_block1 = torch.nn.Sequential()
self.backbone_block1.add_module("BBone1_Conv3D", torch.nn.Conv3d(in_channels=1, out_channels=32, kernel_size=(7,7,7),padding=(3,3,3), bias=False))
self.backbone_block1.add_module("BBone1_MPool3D", torch.nn.MaxPool3d(kernel_size=(2, 2, 2)))
self.backbone_block1.add_module("BBone1_BN3D", torch.nn.BatchNorm3d(num_features=32))
self.backbone_block1.add_module("BBone1_LRelu", torch.nn.ReLU(inplace=True))
self.backbone_block2 = torch.nn.Sequential()
self.backbone_block2.add_module("BBone2_Conv3D", torch.nn.Conv3d(in_channels=32, out_channels=64, kernel_size=(5,5,5),padding=(2,2,2), bias=False))
self.backbone_block2.add_module("BBone2_MPool3D", torch.nn.MaxPool3d(kernel_size=(2, 2, 2)))
self.backbone_block2.add_module("BBone2_BN3D", torch.nn.BatchNorm3d(num_features=64))
self.backbone_block2.add_module("BBone2_LRelu", torch.nn.ReLU(inplace=True))
self.backbone_block3 = torch.nn.Sequential()
self.backbone_block3.add_module("BBone3_Conv3D", torch.nn.Conv3d(in_channels=64, out_channels=128, kernel_size=(3,3,3), padding=(1,1,1), bias=False))
self.backbone_block3.add_module("BBone3_BN3D", torch.nn.BatchNorm3d(num_features=128))
self.backbone_block3.add_module("BBone3_LRelu", torch.nn.ReLU(inplace=True))
def forward(self, x):
if self.verbose: print (' ---- [VoxelNetFeatOrig] x: ', x.shape)
x = self.backbone_block1(x)
if self.verbose: print (' ---- [VoxelNetFeatOrig] backbone_block1(x): ', x.shape)
x = self.backbone_block2(x)
if self.verbose: print (' ---- [VoxelNetFeatOrig] backbone_block2(x): ', x.shape)
x_backbone = self.backbone_block3(x)
if self.verbose: print (' ---- [VoxelNetFeatOrig] backbone_block3(x): ', x_backbone.shape)
return x_backbone
class VoxelNetJointOrig(torch.nn.Module):
def __init__(self, count_joints, in_channels, inter_channels, tag): #tag='Joints1'
super(VoxelNetJointOrig, self).__init__()
self.count_joints = count_joints
self.task_joints = torch.nn.Sequential()
self.task_joints.add_module(tag + "_Conv3D_1", torch.nn.Conv3d(in_channels=in_channels, out_channels=inter_channels, kernel_size=(3,3,3), padding=(1,1,1)))
self.task_joints.add_module(tag + "_LRelu_1", torch.nn.ReLU(inplace=True))
self.task_joints.add_module(tag + "_Conv3D_2", torch.nn.Conv3d(in_channels=inter_channels, out_channels=inter_channels, kernel_size=(3,3,3), padding=(1,1,1)))
self.task_joints.add_module(tag + "_LRelu_2", torch.nn.ReLU(inplace=True))
self.task_joints.add_module(tag + "_Conv3D_3", torch.nn.Conv3d(in_channels=inter_channels, out_channels=inter_channels, kernel_size=(3,3,3), padding=(1,1,1)))
self.task_joints.add_module(tag + "_LRelu_3", torch.nn.ReLU(inplace=True))
self.task_joints.add_module(tag + "_Conv3D_4", torch.nn.Conv3d(in_channels=inter_channels, out_channels=inter_channels, kernel_size=(1,1,1), padding=(0,0,0)))
self.task_joints.add_module(tag + "_LRelu_4", torch.nn.ReLU(inplace=True))
self.task_joints.add_module(tag + "_Conv3D_5", torch.nn.Conv3d(in_channels=inter_channels, out_channels=self.count_joints, kernel_size=(1,1,1), padding=(0,0,0)))
def forward(self, x):
return self.task_joints(x)
class VoxelNetBPartOrig(torch.nn.Module):
def __init__(self, count_bparts, in_channels, inter_channels, tag): #tag='BParts1'
super(VoxelNetBPartOrig, self).__init__()
self.count_bparts = count_bparts
self.task_bparts = torch.nn.Sequential()
self.task_bparts.add_module(tag + "_Conv3D_1", torch.nn.Conv3d(in_channels=in_channels, out_channels=inter_channels, kernel_size=(3,3,3), padding=(1,1,1)))
self.task_bparts.add_module(tag + "_LRelu_1", torch.nn.ReLU(inplace=True))
self.task_bparts.add_module(tag + "_Conv3D_2", torch.nn.Conv3d(in_channels=inter_channels, out_channels=inter_channels, kernel_size=(3,3,3), padding=(1,1,1)))
self.task_bparts.add_module(tag + "_LRelu_2", torch.nn.ReLU(inplace=True))
self.task_bparts.add_module(tag + "_Conv3D_3", torch.nn.Conv3d(in_channels=inter_channels, out_channels=inter_channels, kernel_size=(3,3,3), padding=(1,1,1)))
self.task_bparts.add_module(tag + "_LRelu_3", torch.nn.ReLU(inplace=True))
self.task_bparts.add_module(tag + "_Conv3D_4", torch.nn.Conv3d(in_channels=inter_channels, out_channels=inter_channels, kernel_size=(1,1,1), padding=(0,0,0)))
self.task_bparts.add_module(tag + "_LRelu_4", torch.nn.ReLU(inplace=True))
self.task_bparts.add_module(tag + "_Conv3D_5", torch.nn.Conv3d(in_channels=inter_channels, out_channels=self.count_bparts, kernel_size=(1,1,1), padding=(0,0,0)))
def forward(self, x):
return self.task_bparts(x)
def get_interpolated(x, align_corners_interpolate=False):
return torch.nn.functional.interpolate(x, scale_factor=4, mode='trilinear', align_corners=align_corners_interpolate)
def print_gpustats():
import os
info = nvmlDeviceGetMemoryInfo(nvmlDeviceGetHandleByIndex(int(os.environ['CUDA_VISIBLE_DEVICES'])))
str_gpu = '%.4f' % (info.used/1024/1024/1000) + '/' + '%.4f' % (info.total/1024/1024/1000) + ' GB'
print (' - GPU: ' + str_gpu)
class VoxelNetOrig(torch.nn.Module):
def __init__(self, count_joints, count_bparts, iters=1
, output='raw', nonlinearity_order='first', batch_norm=True
, net_interpolate=False, inference_only=False, align_corners_interpolate=False):
super(VoxelNetOrig, self).__init__()
self.count_joints = count_joints
self.count_bparts = count_bparts
self.net_interpolate = net_interpolate
self.align_corners_interpolate = align_corners_interpolate
self.iters = iters
self.x_backbone_net = VoxelNetFeatOrig().cuda()
self.x_joints1_net = VoxelNetJointOrig(self.count_joints, in_channels=128, inter_channels=128, tag="Joints1").cuda()
self.x_bparts1_net = VoxelNetBPartOrig(self.count_bparts, in_channels=128, inter_channels=128, tag="BParts1").cuda()
if self.iters == 3:
self.x_joints2_net = VoxelNetJointOrig(self.count_joints, in_channels=128 + self.count_joints + self.count_bparts, inter_channels=128, tag="Joints2").cuda()
self.x_bparts2_net = VoxelNetBPartOrig(self.count_bparts, in_channels=128 + self.count_joints + self.count_bparts, inter_channels=128, tag="BParts2").cuda()
self.x_joints3_net = VoxelNetJointOrig(self.count_joints, in_channels=128 + self.count_joints + self.count_bparts, inter_channels=128, tag="Joints3").cuda()
self.x_bparts3_net = VoxelNetBPartOrig(self.count_bparts, in_channels=128 + self.count_joints + self.count_bparts, inter_channels=128, tag="BParts3").cuda()
def forward(self, x):
x_backbone = self.x_backbone_net(x)
x_joints1 = self.x_joints1_net(x_backbone)
x_bparts1 = self.x_bparts1_net(x_backbone)
if self.iters == 1:
if self.net_interpolate is False:
return [], [x_joints1], [x_bparts1]
else:
x_joints1_interpolate = get_interpolated(x_joints1)
x_bparts1_interpolate = get_interpolated(x_bparts1)
return x_backbone, [x_joints1, x_joints1_interpolate], [x_bparts1, x_bparts1_interpolate]
if self.iters == 3:
x_iter1_op = torch.cat([x_backbone, x_joints1, x_bparts1], dim=1)
x_joints2 = self.x_joints2_net(x_iter1_op)
x_bparts2 = self.x_bparts2_net(x_iter1_op)
x_iter2_op = torch.cat([x_backbone, x_joints2, x_bparts2], dim=1)
x_joints3 = self.x_joints3_net(x_iter2_op)
x_bparts3 = self.x_bparts3_net(x_iter2_op)
if self.net_interpolate is False:
return [], [x_joints1, x_joints2, x_joints3], [x_bparts1, x_bparts2, x_bparts3]
else:
x_joints1_interpolate = get_interpolated(x_joints1)
x_bparts1_interpolate = get_interpolated(x_bparts1)
return [], [x_joints1, x_joints1_interpolate, x_joints2, [], x_joints3, []], [x_bparts1, x_bparts1_interpolate, x_bparts2, [], x_bparts3, []]
if __name__ == "__main__":
x = torch.rand(1,1,256,256,160).float()
if 1:
net = VoxelNetOrig(count_joints=8, count_bparts=7).cuda()
if 0:
x_backbone, x_joints, x_bparts = net(x.cuda())
print (' - [VoxelNetOrig] x_joints1: ', x_joints[0].shape, ' || x_bparts: ', x_bparts[0].shape)
print_gpustats()
else:
from torchsummary import summary
summary(net.cuda(), input_size=(1,256,256,160))
import torch
import torchvision
torch.set_default_tensor_type(torch.FloatTensor)
if not torch.cuda.is_available():
print (' - [ERROR] CUDA is not available!')
USE_CUDA = False
sys.exit(1)
else:
USE_CUDA = True
class DNN(torch.nn.Module):
def __init__(self, dim_in, dim_out):
super(DNN, self).__init__()
self.fc1 = torch.nn.Linear(dim_in, 256)
self.fc2 = torch.nn.Linear(256, 256)
self.fc3 = torch.nn.Linear(256, dim_out)
def forward(self,x):
x = torch.nn.relu(self.fc1(x))
x = torch.nn.relu(self.fc2(x))
x = self.fc3(x)
return x
class DQN(torch.nn.Module):
def __init__(self, h, w, outputs):
super(DQN, self).__init__()
self.conv1 = torch.nn.Conv2d(3, 16, kernel_size=5, stride=2)
self.bn1 = torch.nn.BatchNorm2d(16)
self.conv2 = torch.nn.Conv2d(16, 32, kernel_size=5, stride=2)
self.bn2 = torch.nn.BatchNorm2d(32)
self.conv3 = torch.nn.Conv2d(32, 32, kernel_size=5, stride=2)
self.bn3 = torch.nn.BatchNorm2d(32)
# Number of Linear input connections depends on output of conv2d layers
# and therefore the input image size, so compute it.
def conv2d_size_out(size, kernel_size = 5, stride = 2):
return (size - (kernel_size - 1) - 1) // stride + 1
convw = conv2d_size_out(conv2d_size_out(conv2d_size_out(w)))
convh = conv2d_size_out(conv2d_size_out(conv2d_size_out(h)))
linear_input_size = convw * convh * 32
self.head = torch.nn.Linear(linear_input_size, outputs)
# Called with either one element to determine next action, or a batch
# during optimization. Returns tensor([[left0exp,right0exp]...]).
def forward(self, x):
x = torch.nn.relu(self.bn1(self.conv1(x)))
x = torch.nn.relu(self.bn2(self.conv2(x)))
x = torch.nn.relu(self.bn3(self.conv3(x)))
return self.head(x.view(x.size(0), -1))
# Running a python script as background process
# CUDA_VISIBLE_DEVICES=1 nohup python3 -u <yourfile>.py > <outputfile>.log &

Package Environment

To create a enviornment config file, one may use conda

Steps

  1. Ensure you have conda installed
  • conda --version
  • conda init bash OR conda init powershell
    • Restart terminal
  1. Create conda env, install packages and extract env-config file
  • For Unix: conda env create --name <env_name> python=3.7, For Windows: conda create --name <env_name> python=3.7
  • conda activate hpe
  • conda install torch pynvml open3d==0.9.0.0
  • conda env export --no-builds > conda_env_config.yml
    • Ensure to remove certain platform specific files
      • Windows: [vc, vs2015_runtime, wincertstore]
  • conda deactivate
  1. Share env-config file on new machine
  • conda env create --file conda_env_config.yml --name <env_name>
    • Windows: conda env export --no-builds | findstr -v "prefix" > environment.yml
      • Get-Content .\environment.yml | Set-Content -Encoding utf8 .\environment1.yml
    • Unix: conda env export --no-builds | grep -v "prefix" > environment.yml
      • ``
  • conda activate hpe
  1. Delete conda env
  • conda env list
  • conda env remove -n <env_name>
  1. Other conda commands
  • conda update conda
    • #As of Aug,2020 - [conda version : 4.8.3, conda-build version : 3.18.11]
  • conda env export > environment.yml
import torch
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
USE_GPU = torch.cuda.is_available()
if __name__ == "__main__":
print(' - cuda', torch.cuda.current_device(), torch.cuda.device_count(), torch.cuda.get_device_name(0))
##################### memory footprint support libraries/code #####################
!ln -sf /opt/bin/nvidia-smi /usr/bin/nvidia-smi
!pip install gputil
!pip install psutil
!pip install humanize
import psutil
import humanize
import os
import GPUtil as GPU
GPUs = GPU.getGPUs()
# XXX: only one GPU on Colab and isn’t guaranteed
gpu = GPUs[0]
def printm():
process = psutil.Process(os.getpid())
print("Gen RAM Free: " + humanize.naturalsize( psutil.virtual_memory().available ), " | Proc size: " + humanize.naturalsize( process.memory_info().rss))
print("GPU RAM Free: {0:.0f}MB | Used: {1:.0f}MB | Util {2:3.0f}% | Total {3:.0f}MB".format(gpu.memoryFree, gpu.memoryUsed, gpu.memoryUtil*100, gpu.memoryTotal))
printm()
#####################
# ! pip install jupyter_contrib_nbextensions
# ! jupyter contrib nbextension install --user
# !jupyter nbextension enable codefolding/main
!jupyter nbextension enable hinterland/hinterland
# Pytorch intricacies
1. [Accumulating gradients to have large batch size in small GPU])(https://discuss.pytorch.org/t/how-to-implement-accumulated-gradient-in-pytorch-i-e-iter-size-in-caffe-prototxt/2522/4)
# Installation
1. Check CUDA version
- `nvcc --version`
2. Accordingly install [pytorch](https://pytorch.org/resources)
3. Open python console
- `import torch`
- `torch.cuda.get_device_name(0)`
- `torch.cuda.is_available()`
- `watch -n 0.1 nvidia-smi`
- `torch.rand(3,3).cuda()` # check the nvidia-smi processes for a python process
from torchsummary import summary
if __name__ == "__main__":
summary(your_model.to("cpu"), input_size=(3, 448, 448))
summary(your_model.cuda(), input_size=(3, 448, 448))
"""
CUDA_VISIBLE_DEVICES="" python model_investigate.py
"""
import hiddenlayer as hl # pip install hiddenlayer
if __name__ == "__main__":
transforms = [
# Fold Conv, BN, RELU layers into one
hl.transforms.Fold("Conv > BatchNorm > LeakyRelu", "ConvBnRelu"),
hl.transforms.Fold("ConvBnRelu > MaxPool", "ConvBnReluMax"),
hl.transforms.Fold("Constant > Reshape > Transpose", "ConstantReshape")
]
g = hl.build_graph(model, torch.zeros([1, 3, 416, 416]).cuda(), transforms=transforms)
g.save(os.path.join("pytorch_yolov2.pdf"))
# hl.build_graph(model, torch.zeros([1, 3, 416, 416]).cuda(), transforms=transforms)
import torch
import numpy as np
"""
1 1
2 2
- the above is one slice of the 2x2x2 cube
"""
tmp = np.expand_dims(np.expand_dims(np.array( [[[1,1],[2,2]],[[1,1],[2,2]]]),axis=0),axis=0)
tmp_torch = torch.tensor(tmp, dtype=torch.float32)
tmp_torch_interp = torch.nn.functional.interpolate(tmp_torch, scale_factor=2, mode='trilinear', align_corners=True)
print (' - original shape : ', tmp_torch.shape)
print (' - interp shape : ', tmp_torch_interp.shape)
print (tmp_torch)
print (tmp_torch_interp)
tmp = np.expand_dims(np.expand_dims(np.random.random((2,2,2)),axis=0),axis=0)
tmp_torch = torch.tensor(tmp, dtype=torch.float32)
tmp_torch_interp = torch.nn.functional.interpolate(tmp_torch, scale_factor=2, mode='trilinear')
print (tmp_torch)
print (tmp_torch_interp)
import torch
import numpy as np
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
if __name__ == "__main__":
data = np.random.random((3))
torch.from_numpy(data).unsqueeze(0).to(DEVICE)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment