This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
% curl -L https://github.com/pytorch/pytorch/pull/69840.patch|diffstat | |
% Total % Received % Xferd Average Speed Time Time Time Current | |
Dload Upload Total Spent Left Speed | |
100 143 100 143 0 0 459 0 --:--:-- --:--:-- --:--:-- 468 | |
100 7148 0 7148 0 0 11172 0 --:--:-- --:--:-- --:--:-- 11172 | |
aten/src/ATen/templates/RegisterBackendSelect.cpp | 27 ++++++++++++++------------- | |
aten/src/ATen/templates/RegisterSchema.cpp | 16 +--------------- | |
tools/codegen/gen.py | 40 +++++++++++++++++++++++----------------- | |
3 files changed, 38 insertions(+), 45 deletions(-) | |
% curl -L https://github.com/pytorch/pytorch/pull/69840.diff|diffstat |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import os | |
from subprocess import check_output | |
def get_pmem(): | |
pmem = check_output(["sh", "-c", f"pmap -d {os.getpid()}|tail -n 1"], encoding="latin1").split()[3] | |
return pmem | |
def get_gpumem(): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
__global__ void kernel() { | |
printf("Hello World of CUDA %d\n", threadIdx.x); | |
} | |
int main() { | |
kernel<<<1,1>>>(); | |
return cudaDeviceSynchronize(); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
URL `resources/fonts/dejavu.css' | |
Parent URL file:///home/nshulga/git/pytorch.github.io/javadoc/stylesheet.css, line 3, col 12 | |
Real URL file:///home/nshulga/git/pytorch.github.io/javadoc/resources/fonts/dejavu.css | |
Result Error: URLError: <urlopen error [Errno 2] No such file or directory: '/home/nshulga/git/pytorch.github.io/javadoc/resources/fonts/dejavu.css'> | |
URL `../fonts/FreightSans/freight-sans-bold.woff' | |
Parent URL file:///home/nshulga/git/pytorch.github.io/assets/main.css, line 1, col 147264 | |
Real URL file:///home/nshulga/git/pytorch.github.io/fonts/FreightSans/freight-sans-bold.woff | |
Result Error: URLError: <urlopen error [Errno 2] No such file or directory: '/home/nshulga/git/pytorch.github.io/fonts/FreightSans/freight-sans-bold.woff'> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <string> | |
template<typename T> | |
struct WhatsMyName { | |
WhatsMyName() { | |
std::cout << __PRETTY_FUNCTION__ << std::endl; | |
} | |
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// c++ -std=c++14 -O2 -fPIC -Xpreprocessor -fopenmp mup-smaller.cpp | |
// | |
#include <omp.h> | |
#include <atomic> | |
#include <exception> | |
#include <algorithm> | |
#define C10_UNLIKELY(expr) (__builtin_expect(static_cast<bool>(expr), 0)) | |
namespace at { |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
from conda_build.convert import extract_temporary_directory, create_target_archive | |
import shutil | |
import os | |
import sys | |
def remove_line_with_pattern(fname, pattern): | |
with open(fname) as f: | |
lines = f.read().split('\n') | |
del_idx = [] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <arm_neon.h> | |
#include <iostream> | |
int main(void) { | |
const float inp[4] = { 3.1, 2.7, -2.9, -1.0000000200408773e+20 }; | |
std::cout << "inp= " << inp[0] << " " << inp[1] << " " << inp[2] << " " << inp[3] << std::endl; | |
float32x4_t f = vld1q_f32(inp); | |
float32x4_t old_trunc = vcvtq_f32_s32(vcvtq_s32_f32(f)); | |
float32x4_t new_trunc = vrndq_f32(f); | |
float out[4]; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// If linked against cudnn dynamically, i.e. using following command: | |
// g++ cudnn-xmma.cpp -I/usr/local/cuda/include -L/usr/local/cuda/lib64 -lcudnn -lcudart | |
// Then nvprof shows following kernels executed on RTX2080: | |
// Type Time(%) Time Calls Avg Min Max Name | |
// GPU activities: 64.57% 20.416us 1 20.416us 20.416us 20.416us void xmma_cudnn::gemm::kernel<xmma_cudnn::implicit_gemm::fprop::Kernel_traits<xmma_cudnn::Turing_hmma_fp32_traits, xmma_cudnn::Cta_tile<xmma_cudnn::Turing, int=64, int=32, int=64, int=2, int=1, int=2, int=1, int=1>, xmma_cudnn::implicit_gemm::fprop::Gmem_tile_a_t<xmma_cudnn::Turing_hmma_fp32_traits, xmma_cudnn::Cta_tile<xmma_cudnn::Turing, int=64, int=32, int=64, int=2, int=1, int=2, int=1, int=1>, xmma_cudnn::implicit_gemm::Input_related<int=0, int=0, int=0, bool=0>, int=16, bool=0, xmma_cudnn::implicit_gemm::fprop::Gmem_tile_base_a<xmma_cudnn::Turing_hmma_fp32_traits, xmma_cudnn::Cta_tile<xmma_cudnn::Turing, int=64, int=32, int=64 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
% curl --request POST --url https://circleci.com/api/v2/project/gh/pytorch/pytorch/pipeline --data '{"branch":"pull/59020/head", "parameters": {"run_slow_gradcheck_build": true}}' --header 'content-type: application/json' --header 'Circle-Token: XXXXXX' | |
{ | |
"number" : 328134, | |
"state" : "pending", | |
"id" : "d23f0239-1cd0-4d22-8965-d40f8c7bbd73", | |
"created_at" : "2021-05-26T22:37:55.955Z" | |
} | |
% curl https://circleci.com/api/v2/project/gh/pytorch/pytorch/pipeline/328134 | |
{ | |
"id" : "d23f0239-1cd0-4d22-8965-d40f8c7bbd73", |