This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
% curl --request POST --url https://circleci.com/api/v2/project/gh/pytorch/pytorch/pipeline --data '{"branch":"pull/59020/head", "parameters": {"run_slow_gradcheck_build": true}}' --header 'content-type: application/json' --header 'Circle-Token: XXXXXX' | |
{ | |
"number" : 328134, | |
"state" : "pending", | |
"id" : "d23f0239-1cd0-4d22-8965-d40f8c7bbd73", | |
"created_at" : "2021-05-26T22:37:55.955Z" | |
} | |
% curl https://circleci.com/api/v2/project/gh/pytorch/pytorch/pipeline/328134 | |
{ | |
"id" : "d23f0239-1cd0-4d22-8965-d40f8c7bbd73", |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# Results of recent runs: | |
# Mac Apple M1 50.3 sec | |
# Mac Intel(R) Core(TM) i9-9980HK CPU @ 2.40GH in 61.1 sec | |
# Linux Intel(R) Xeon(R) W-2135 CPU @ 3.70GHz in 53.5 sec | |
import time | |
import torch | |
import torch.nn as nn | |
import torch.nn.functional as F | |
import torch.optim as optim |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import torch | |
from torch.autograd.profiler import profile as _profile | |
def workload(): | |
s1 = torch.cuda.Stream(device="cuda") | |
s2 = torch.cuda.Stream(device="cuda") | |
with torch.cuda.stream(s1): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
from datetime import datetime | |
from typing import Any, Dict, List, Optional, Union | |
from urllib.request import urlopen, Request | |
import json | |
import enum | |
import os | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import boto3 | |
import os | |
import bz2 | |
import json | |
import subprocess | |
from datetime import datetime | |
def get_git_commit_history(path, branch="master"): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <arm_neon.h> | |
#include <math.h> | |
#include <stdio.h> | |
void run_neon_reciproc(float data_in[4], float data_out[4]) { | |
float32x4_t input = vld1q_f32(data_in); | |
float32x4_t out = vrecpeq_f32(input); | |
//out = vmulq_f32(vrecpsq_f32(input, out), out); | |
//out = vmulq_f32(vrecpsq_f32(input, out), out); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[Inline Frame] torch_cuda.dll!std::_Default_allocator_traits<std::allocator<std::_Tree_node<unsigned int,void *>>>::deallocate(std::allocator<std::_Tree_node<unsigned int,void *>> &) Line 689 C++ | |
[Inline Frame] torch_cuda.dll!std::_Tree_node<unsigned int,void *>::_Freenode0(std::allocator<std::_Tree_node<unsigned int,void *>> &) Line 373 C++ | |
[Inline Frame] torch_cuda.dll!std::_Tree_val<std::_Tree_simple_types<unsigned int>>::_Erase_head(std::allocator<std::_Tree_node<unsigned int,void *>> &) Line 753 C++ | |
[Inline Frame] torch_cuda.dll!std::_Tree<std::_Tset_traits<unsigned int,std::less<unsigned int>,std::allocator<unsigned int>,0>>::{dtor}() Line 1191 C++ | |
> torch_cuda.dll!torch::jit::fuser::newForReduction(torch::jit::fuser::TensorView * tv, const std::vector<unsigned int,std::allocator<unsigned int>> & axes) Line 438 C++ | |
torch_cuda.dll!torch::jit::fuser::reductionOp(torch::jit::fuser::BinaryOpType reduction_op_type, const std::vector<int,std::allocator<int>> & axes, torch::jit::fuser::Val * init, to |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// nvcc -o hello hello.cu; ./hello | |
#include <stdio.h> | |
__global__ void kernel() { | |
printf("Hello World of CUDA\n"); | |
} | |
int main() { | |
kernel<<<1,1>>>(); | |
return cudaDeviceSynchronize(); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// gcc -c -Os -mavx512f -masm=intel | |
#include <immintrin.h> | |
float foo(float* con) { | |
__mmask16 msk = 0x00ff; | |
__m512 a = _mm512_maskz_loadu_ps(msk, con); | |
__m512 b = _mm512_set1_ps(con[1]); | |
__m512 c = _mm512_mul_ps(a,b); | |
return ((float *)&c)[0]; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# as -o hello.o hello.S ; cc -o hello hello.o -nostdlib | |
.text | |
.globl _start | |
.type _start, @function | |
_start: | |
movl $1, %eax # sys_write( | |
movl $1, %edi # fd = stdout, | |
movl $.LC0, %esi # buf = LC0, | |
movl $12, %edx # 12); | |
syscall |