This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
comma@tiny24:/data/openpilot/tinygrad_repo$ python3 openpilot/compile2.py https://github.com/commaai/openpilot/raw/v0.9.7/selfdrive/modeld/models/supercombo.onnx | |
https://github.com/commaai/openpilot/raw/v0.9.7/selfdrive/modeld/models/supercombo.onnx: 100%|███████████████████████████████████████████| 51.5M/51.5M [00:00<00:00, 88.2MB/s] | |
cache is out of date, clearing it | |
/usr/local/pyenv/versions/3.11.4/lib/python3.11/site-packages/pyopencl/__init__.py:528: CompilerWarning: Non-empty compiler output encountered. Set the environment variable PYOPENCL_COMPILER_OUTPUT=1 to see more. | |
lambda: self._prg.build(options_bytes, devices), | |
190 schedule items depend on the input, 462 don't | |
7 inputs | |
13: rewrite input, image dtype dtypes.imageh((16, 2048, 4)), (View(shape=(1, 16, 32, 64, 2), strides=(0, 8192, 256, 4, 1), offset=0, mask=None, contiguous=False), View(shape=(1, 16, 32, 128), strides=(0, 4096, 128, 1), offset=0, mask=None, contiguous=True)) | |
24: rewrite input, image dtype dtypes.imageh((8, 2048, 4)), (View(shap |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from huggingface_hub import snapshot_download | |
from tinygrad import nn, Tensor, TinyJit, Device | |
import time | |
class Block: | |
def __init__(self, in_dims, dims, stride=1): | |
super().__init__() | |
self.conv1 = nn.Conv2d( | |
in_dims, dims, kernel_size=3, stride=stride, padding=1, bias=False |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdlib.h> | |
#include <stdbool.h> | |
#include <tgmath.h> | |
#define max(x,y) ((x>y)?x:y) | |
#define half __fp16 | |
void E_(int* data0) { | |
int val0 = data0[0]; | |
data0[0] = (val0+1); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[55883.721977] amdgpu: map VA 0x702eae9d2000 - 0x702eae9d3000 in entry 0000000072d2b750 | |
[55883.721996] amdgpu: INC mapping count 1 | |
[55883.722133] kfd kfd: amdgpu: ioctl cmd 0xc0184b0c (#0xc), arg 0x7ffe16172bef | |
[55883.722238] gmc_v11_0_process_interrupt: 6 callbacks suppressed | |
[55883.722250] amdgpu 0000:c3:00.0: amdgpu: [gfxhub] page fault (src_id:0 ring:24 vmid:8 pasid:32774, for process python3 pid 356134 thread python3 pid 356134) | |
[55883.722343] amdgpu 0000:c3:00.0: amdgpu: in page starting at address 0x00000000aabbc000 from client 10 | |
[55883.722391] amdgpu 0000:c3:00.0: amdgpu: GCVM_L2_PROTECTION_FAULT_STATUS:0x00800A30 | |
[55883.722429] amdgpu 0000:c3:00.0: amdgpu: Faulty UTCL2 client ID: CPC (0x5) | |
[55883.722466] amdgpu 0000:c3:00.0: amdgpu: MORE_FAULTS: 0x0 | |
[55883.722497] amdgpu 0000:c3:00.0: amdgpu: WALKER_ERROR: 0x0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# one hit, no loop needed | |
# this is caused by creating a KFD_IOC_QUEUE_TYPE_COMPUTE_AQL without an EOP buffer | |
# this causes the MES to page fault | |
import os, ctypes, pathlib, re, fcntl, functools, mmap, time | |
import tinygrad.runtime.autogen.kfd as kfd | |
from tinygrad.helpers import to_mv | |
from extra.hip_gpu_driver import hip_ioctl | |
import tinygrad.runtime.autogen.hsa as hsa |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
# | |
# TARGET arch is: ['-D__HIP_PLATFORM_AMD__', '-I/opt/rocm/include'] | |
# WORD_SIZE is: 8 | |
# POINTER_SIZE is: 8 | |
# LONGDOUBLE_SIZE is: 16 | |
# | |
import ctypes | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# tiny@tiny9:~/tinygrad$ python3 examples/benchmark_copies.py | |
# CPU copy 6.18 ms, 16.28 GB/s | |
# GPU copy 4.38 ms, 23.00 GB/s | |
# GPU 6x 1.85 ms, 54.54 GB/s | |
import time | |
def timeit(fxn): | |
tms = [] | |
for _ in range(10): | |
st = time.perf_counter() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
__kernel void matmul(__global float* data0, const __global float* data1, const __global float* data2) { | |
int gidx0 = get_group_id(1); /* 512 */ | |
int gidx1 = get_group_id(0); /* 512 */ | |
float2 acc0 = (float2)(0.0f,0.0f); | |
float2 acc1 = (float2)(0.0f,0.0f); | |
for (int ridx0 = 0; ridx0 < 512; ++ridx0) { | |
float2 val0 = (float2)(*((__global float2*)(data1+(gidx0*2048)+(ridx0*2)))); | |
float2 val1 = (float2)(*((__global float2*)(data1+(gidx0*2048)+(ridx0*2)+1024))); | |
float2 val2 = (float2)(*((__global float2*)(data2+(gidx1*2)+(ridx0*2048)))); | |
float2 val3 = (float2)(*((__global float2*)(data2+(gidx1*2)+(ridx0*2048)+1024))); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
*** 0 E_64_32_6_6n5 arg 2 sz [64, 1, 1] [32, 1, 1] OPs 33M/ 0.00G mem 3.07 GB tm 3.20us/ 0.00ms (10483.20 GFLOPS, 297.02 GB/s) | |
*** 1 r_128_31_31_3_2_3_2_2_2_8n26 arg 3 sz [31, 31, 128] [2, 3, 1] OPs 283M/ 0.03G mem 3.07 GB tm 218.44us/ 0.22ms ( 1297.42 GFLOPS, 216.24 GB/s) | |
*** 2 r_1024_32_16_2_3_4_4_8n6 arg 3 sz [32, 1024, 1] [2, 16, 1] OPs 805M/ 0.32G mem 3.07 GB tm 64.68us/ 0.29ms (12450.43 GFLOPS, 1426.62 GB/s) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import sys | |
import time | |
import torch | |
import torch.distributed as dist | |
import torch.multiprocessing as mp | |
def all_reduce_latency(nbytes, rank): | |
buf = torch.randn(nbytes // 4).cuda(rank) |
NewerOlder