malfet’s gists

malfet / gist:23e71fbb909707166bbb8cf24de8b17e

Created May 26, 2021 22:40

Triggering new pipeline on Circle and checking it's status

	% curl --request POST --url https://circleci.com/api/v2/project/gh/pytorch/pytorch/pipeline --data '{"branch":"pull/59020/head", "parameters": {"run_slow_gradcheck_build": true}}' --header 'content-type: application/json' --header 'Circle-Token: XXXXXX'
	{
	"number" : 328134,
	"state" : "pending",
	"id" : "d23f0239-1cd0-4d22-8965-d40f8c7bbd73",
	"created_at" : "2021-05-26T22:37:55.955Z"
	}
	% curl https://circleci.com/api/v2/project/gh/pytorch/pytorch/pipeline/328134
	{
	"id" : "d23f0239-1cd0-4d22-8965-d40f8c7bbd73",

malfet / cifar10_benchmark.py

Created April 5, 2021 14:36

Run CIFAR

	#!/usr/bin/env python3
	# Results of recent runs:
	# Mac Apple M1 50.3 sec
	# Mac Intel(R) Core(TM) i9-9980HK CPU @ 2.40GH in 61.1 sec
	# Linux Intel(R) Xeon(R) W-2135 CPU @ 3.70GHz in 53.5 sec
	import time
	import torch
	import torch.nn as nn
	import torch.nn.functional as F
	import torch.optim as optim

malfet / simple-kineto.py

Created March 3, 2021 20:37

	#!/usr/bin/env python

	import torch
	from torch.autograd.profiler import profile as _profile

	def workload():
	s1 = torch.cuda.Stream(device="cuda")
	s2 = torch.cuda.Stream(device="cuda")

	with torch.cuda.stream(s1):

malfet / gh-get-milestone-issues.py

Last active March 19, 2021 17:03

	#!/usr/bin/env python3

	from datetime import datetime
	from typing import Any, Dict, List, Optional, Union
	from urllib.request import urlopen, Request
	import json
	import enum
	import os

malfet / test_time_trends.py

Created February 4, 2021 17:03

	#!/usr/bin/env python3
	import boto3
	import os
	import bz2
	import json
	import subprocess
	from datetime import datetime


	def get_git_commit_history(path, branch="master"):

malfet / NEON reciprocal example

Last active November 6, 2020 00:17

	#include <arm_neon.h>
	#include <math.h>
	#include <stdio.h>


	void run_neon_reciproc(float data_in[4], float data_out[4]) {
	float32x4_t input = vld1q_f32(data_in);
	float32x4_t out = vrecpeq_f32(input);
	//out = vmulq_f32(vrecpsq_f32(input, out), out);
	//out = vmulq_f32(vrecpsq_f32(input, out), out);

malfet / gist:8e33477f1971fcfd3ca90472edbe9b67

Created October 7, 2020 04:50

	[Inline Frame] torch_cuda.dll!std::_Default_allocator_traits<std::allocator<std::_Tree_node<unsigned int,void >>>::deallocate(std::allocator<std::_Tree_node<unsigned int,void >> &) Line 689 C++
	[Inline Frame] torch_cuda.dll!std::_Tree_node<unsigned int,void >::_Freenode0(std::allocator<std::_Tree_node<unsigned int,void >> &) Line 373 C++
	[Inline Frame] torch_cuda.dll!std::_Tree_val<std::_Tree_simple_types<unsigned int>>::_Erase_head(std::allocator<std::_Tree_node<unsigned int,void *>> &) Line 753 C++
	[Inline Frame] torch_cuda.dll!std::_Tree<std::_Tset_traits<unsigned int,std::less<unsigned int>,std::allocator<unsigned int>,0>>::{dtor}() Line 1191 C++
	> torch_cuda.dll!torch::jit::fuser::newForReduction(torch::jit::fuser::TensorView * tv, const std::vector<unsigned int,std::allocator<unsigned int>> & axes) Line 438 C++
	torch_cuda.dll!torch::jit::fuser::reductionOp(torch::jit::fuser::BinaryOpType reduction_op_type, const std::vector<int,std::allocator<int>> & axes, torch::jit::fuser::Val * init, to

malfet / hello.cu

Created September 15, 2020 15:43

	// nvcc -o hello hello.cu; ./hello
	#include <stdio.h>

	__global__ void kernel() {
	printf("Hello World of CUDA\n");
	}

	int main() {
	kernel<<<1,1>>>();
	return cudaDeviceSynchronize();

malfet / wrong-vmul-ps.c

Last active September 12, 2020 00:25

GCC masm=intel bug

malfet / hello.S

Created August 30, 2020 14:15

HelloWorld in x86_64 assembly

	# as -o hello.o hello.S ; cc -o hello hello.o -nostdlib
	.text
	.globl _start
	.type _start, @function
	_start:
	movl $1, %eax # sys_write(
	movl $1, %edi # fd = stdout,
	movl $.LC0, %esi # buf = LC0,
	movl $12, %edx # 12);
	syscall

Nikita Shulga malfet