tmux new [-s name] [cmd]
(:new
) - new session
tmux ls
(:ls
) - list sessionstmux switch [-t name]
(:switch
) - switches to an existing session
Latency Comparison Numbers | |
-------------------------- | |
L1 cache reference/hit 1.5 ns 4 cycles | |
Floating-point add/mult/FMA operation 1.5 ns 4 cycles | |
L2 cache reference/hit 5 ns 12 ~ 17 cycles | |
Branch mispredict 6 ns 15 ~ 20 cycles | |
L3 cache hit (unshared cache line) 16 ns 42 cycles | |
L3 cache hit (shared line in another core) 25 ns 65 cycles | |
Mutex lock/unlock 25 ns | |
L3 cache hit (modified in another core) 29 ns 75 cycles |
from graphviz import Digraph | |
from torch.autograd import Variable | |
import torch | |
def make_dot(var, params=None): | |
if params is not None: | |
assert isinstance(params.values()[0], Variable) | |
param_map = {id(v): k for k, v in params.items()} |
import gc | |
import torch | |
## MEM utils ## | |
def mem_report(): | |
'''Report the memory usage of the tensor.storage in pytorch | |
Both on CPUs and GPUs are reported''' | |
def _mem_report(tensors, mem_type): |
def logsigsoftmax(logits): | |
""" | |
Computes sigsoftmax from the paper - https://arxiv.org/pdf/1805.10829.pdf | |
""" | |
max_values = torch.max(logits, 1, keepdim = True)[0] | |
exp_logits_sigmoided = torch.exp(logits - max_values) * torch.sigmoid(logits) | |
sum_exp_logits_sigmoided = exp_logits_sigmoided.sum(1, keepdim = True) | |
log_probs = logits - max_values + torch.log(torch.sigmoid(logits)) - torch.log(sum_exp_logits_sigmoided) | |
return log_probs |
import pycuda.driver as cuda | |
import pycuda.autoinit | |
from pycuda.compiler import SourceModule | |
import numpy as np | |
def compute_xcorr_cpu(d): | |
dc = d.astype('float32').view('complex64') | |
dc = dc.transpose((0,2,3,1)).copy() | |
xcorr_cpu = np.einsum('...i,...j', dc, np.conj(dc)).view('float32').astype('int32').sum(axis=-4) | |
return xcorr_cpu |
# This isn't supposed to run as a bash script, i named it with ".sh" for syntax highlighting. | |
# https://developer.nvidia.com/nsight-systems | |
# https://docs.nvidia.com/nsight-systems/profiling/index.html | |
# My preferred nsys (command line executable used to create profiles) commands | |
# | |
# In your script, write | |
# torch.cuda.nvtx.range_push("region name") | |
# ... |