Skip to content

Instantly share code, notes, and snippets.

% curl -L https://github.com/pytorch/pytorch/pull/69840.patch|diffstat
% Total % Received % Xferd Average Speed Time Time Time Current
Dload Upload Total Spent Left Speed
100 143 100 143 0 0 459 0 --:--:-- --:--:-- --:--:-- 468
100 7148 0 7148 0 0 11172 0 --:--:-- --:--:-- --:--:-- 11172
aten/src/ATen/templates/RegisterBackendSelect.cpp | 27 ++++++++++++++-------------
aten/src/ATen/templates/RegisterSchema.cpp | 16 +---------------
tools/codegen/gen.py | 40 +++++++++++++++++++++++-----------------
3 files changed, 38 insertions(+), 45 deletions(-)
% curl -L https://github.com/pytorch/pytorch/pull/69840.diff|diffstat
@malfet
malfet / torch_query_memory.py
Created November 24, 2021 01:46
Query PyTorch memory utilisation
#!/usr/bin/env python3
import os
from subprocess import check_output
def get_pmem():
pmem = check_output(["sh", "-c", f"pmap -d {os.getpid()}|tail -n 1"], encoding="latin1").split()[3]
return pmem
def get_gpumem():
@malfet
malfet / hello.cu
Last active November 24, 2021 21:05
CUDA Hello world
#include <stdio.h>
__global__ void kernel() {
printf("Hello World of CUDA %d\n", threadIdx.x);
}
int main() {
kernel<<<1,1>>>();
return cudaDeviceSynchronize();
}
@malfet
malfet / gist:175d76815eebf8b2f19005cb80105b45
Created November 1, 2021 17:44
pytorch.github.io broken links
URL `resources/fonts/dejavu.css'
Parent URL file:///home/nshulga/git/pytorch.github.io/javadoc/stylesheet.css, line 3, col 12
Real URL file:///home/nshulga/git/pytorch.github.io/javadoc/resources/fonts/dejavu.css
Result Error: URLError: <urlopen error [Errno 2] No such file or directory: '/home/nshulga/git/pytorch.github.io/javadoc/resources/fonts/dejavu.css'>
URL `../fonts/FreightSans/freight-sans-bold.woff'
Parent URL file:///home/nshulga/git/pytorch.github.io/assets/main.css, line 1, col 147264
Real URL file:///home/nshulga/git/pytorch.github.io/fonts/FreightSans/freight-sans-bold.woff
Result Error: URLError: <urlopen error [Errno 2] No such file or directory: '/home/nshulga/git/pytorch.github.io/fonts/FreightSans/freight-sans-bold.woff'>
@malfet
malfet / whatsmyname.cpp
Created October 7, 2021 16:41
Prints name of the template argument
#include <iostream>
#include <string>
template<typename T>
struct WhatsMyName {
WhatsMyName() {
std::cout << __PRETTY_FUNCTION__ << std::endl;
}
};
@malfet
malfet / mup-smaller.cpp
Created September 28, 2021 13:49
clang-13 crash using omp critical pragma
// c++ -std=c++14 -O2 -fPIC -Xpreprocessor -fopenmp mup-smaller.cpp
//
#include <omp.h>
#include <atomic>
#include <exception>
#include <algorithm>
#define C10_UNLIKELY(expr) (__builtin_expect(static_cast<bool>(expr), 0))
namespace at {
@malfet
malfet / conda_remove_abi.py
Created August 5, 2021 04:46
Ugly script that removes python_abi dep from conda packages
#!/usr/bin/env python3
from conda_build.convert import extract_temporary_directory, create_target_archive
import shutil
import os
import sys
def remove_line_with_pattern(fname, pattern):
with open(fname) as f:
lines = f.read().split('\n')
del_idx = []
@malfet
malfet / neon-trunc.cpp
Created June 11, 2021 14:23
Implements trunc using vcvtq vs vrndq
#include <arm_neon.h>
#include <iostream>
int main(void) {
const float inp[4] = { 3.1, 2.7, -2.9, -1.0000000200408773e+20 };
std::cout << "inp= " << inp[0] << " " << inp[1] << " " << inp[2] << " " << inp[3] << std::endl;
float32x4_t f = vld1q_f32(inp);
float32x4_t old_trunc = vcvtq_f32_s32(vcvtq_s32_f32(f));
float32x4_t new_trunc = vrndq_f32(f);
float out[4];
// If linked against cudnn dynamically, i.e. using following command:
// g++ cudnn-xmma.cpp -I/usr/local/cuda/include -L/usr/local/cuda/lib64 -lcudnn -lcudart
// Then nvprof shows following kernels executed on RTX2080:
// Type Time(%) Time Calls Avg Min Max Name
// GPU activities: 64.57% 20.416us 1 20.416us 20.416us 20.416us void xmma_cudnn::gemm::kernel<xmma_cudnn::implicit_gemm::fprop::Kernel_traits<xmma_cudnn::Turing_hmma_fp32_traits, xmma_cudnn::Cta_tile<xmma_cudnn::Turing, int=64, int=32, int=64, int=2, int=1, int=2, int=1, int=1>, xmma_cudnn::implicit_gemm::fprop::Gmem_tile_a_t<xmma_cudnn::Turing_hmma_fp32_traits, xmma_cudnn::Cta_tile<xmma_cudnn::Turing, int=64, int=32, int=64, int=2, int=1, int=2, int=1, int=1>, xmma_cudnn::implicit_gemm::Input_related<int=0, int=0, int=0, bool=0>, int=16, bool=0, xmma_cudnn::implicit_gemm::fprop::Gmem_tile_base_a<xmma_cudnn::Turing_hmma_fp32_traits, xmma_cudnn::Cta_tile<xmma_cudnn::Turing, int=64, int=32, int=64
@malfet
malfet / gist:23e71fbb909707166bbb8cf24de8b17e
Created May 26, 2021 22:40
Triggering new pipeline on Circle and checking it's status
% curl --request POST --url https://circleci.com/api/v2/project/gh/pytorch/pytorch/pipeline --data '{"branch":"pull/59020/head", "parameters": {"run_slow_gradcheck_build": true}}' --header 'content-type: application/json' --header 'Circle-Token: XXXXXX'
{
"number" : 328134,
"state" : "pending",
"id" : "d23f0239-1cd0-4d22-8965-d40f8c7bbd73",
"created_at" : "2021-05-26T22:37:55.955Z"
}
% curl https://circleci.com/api/v2/project/gh/pytorch/pytorch/pipeline/328134
{
"id" : "d23f0239-1cd0-4d22-8965-d40f8c7bbd73",