vadimkantorov’s gists

vadimkantorov / nvidia-pids.sh

Last active July 28, 2025 14:31

Print all CUDA-using PIDs

nvidia-smi -q -x | grep "</pid>" | tr -d "</pid>\t"

vadimkantorov / to_.py

Last active July 26, 2025 09:31

Inplace downcasting in PyTorch

	# https://github.com/pytorch/pytorch/issues/158710
	# https://github.com/pytorch/pytorch/issues/158698
	# https://github.com/pytorch/pytorch/issues/69431

	import torch

	def to_(tensor1d, dtype, *, chunks = 0, split_size = 0):
	# TODO: instead of clone() maybe could copy_ into a buffer, clone() does not allow using a buffer
	# TODO: unclear if these codes can support autograd, and if so, will it remember too much in saved_for_backward

vadimkantorov / base64_torch.py

Last active July 19, 2025 18:54

Base64 decoding in PyTorch

	# https://en.wikipedia.org/wiki/Base64
	# 00123456 00ABCDEF 00abcdef 00uvwxyz
	# 123456AB CDEFabcd efuvwxyz
	# this code does not support batches. adapting for e.g. concatenated varlen format is possible, but need to handle/preserve varlen information and paddings in some way

	import torch

	def base64_encode_padded(input_as_uint8_tensor):
	base64_alphabet, base64_pad = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/', '='
	encode = torch.tensor(list(map(ord, base64_alphabet)), dtype = torch.uint8, device = input_as_uint8_tensor.device)

vadimkantorov / .nebius.s3cfg

Created July 16, 2025 13:24

s3cmd config .s3cfg for Nebius s3

	# Nebius s3
	# sudo apt-get install s3cmd # export the vars below or configure access_key / secret_key
	# export AWS_ACCESS_KEY_ID=...
	# export AWS_SECRET_ACCESS_KEY=...
	# https://s3tools.org/usage
	# s3cmd -c ~/.nebius.s3cfg ls

	[default]
	# access_key=...
	# secret_key=...

vadimkantorov / sitecustomize.py

Created July 10, 2025 18:03

Install a OOM hook in PyTorch

	# PYTHONPATH=. python ...

	import os
	import torch

	def cuda_oom_hook(device, alloc, device_alloc, device_free, info = dict(counter = 0), snapshot_dump_file_pattern = './memory_snapshot_{pid}_{oom_counter}.pt'):
	memory_summary = torch.cuda.memory_summary(device = device)
	memory_snapshot = torch.cuda.memory._snapshot(device = device)
	pid = os.getpid()
	print('device:', device, 'oom#:', info['oom_counter'], 'pid:', pid, 'alloc:', alloc, 'device_alloc:', device_alloc, 'device_free:', device_free)

vadimkantorov / multiprocessing_pool_batched.py

Last active May 27, 2025 09:45

Example of using multiprocessing with explicitly batched inputs

	import multiprocessing
	import itertools

	inputs = list(range(111))
	batchsize = 10
	num_workers = 4

	batches = itertools.batched(inputs, batchsize)

	def reducer(xs):

vadimkantorov / cache_hf_model.sh

Created May 23, 2025 17:26

Downloads and tests import of a HuggingFace model into a meta device (and thus does not use any GPU/CPU memory)

	# Usage: bash cache_hf_model.sh Qwen/Qwen3-8B
	# export HF_HOME=/my/cache/HF_HOME
	python -c 'import sys, transformers; transformers.AutoModel.from_pretrained(sys.argv[-1], trust_remote_code=True, device_map="meta")' $@

vadimkantorov / minidotenv.py

Created May 22, 2025 18:40

toml can abused to read some basic key-value pairs as well

	def load_dotenv(dotenv_path = '.env'):
	# https://snarky.ca/use-toml-for-env-files/
	# https://github.com/theskumar/python-dotenv
	'''
	# such simple key-value files are toml subset and can be read via tomllib without external packages or hacks
	a="b"
	c="d"
	'''
	import os, tomllib
	os.environ.update(tomllib.load(open(dotenv_path, 'rb')))

vadimkantorov / catfsspec.py

Last active June 12, 2025 11:50

Basic example of using fsspec explaining some quirks on porting from regular Python I/O

	import sys, fsspec
	with fsspec.open(sys.argv[1], 'rt') as f: # must pass 'rt' explicitly, as in fsspec the default mode is 'rb'
	print(f.read()) # msut use context manager as in fsspec the result of fsspec.open(...) does not have method read()

	# echo world > hello.txt
	# python catfsspec.py hello.txt
	# python catfsspec.py file://hello.txt
	# python catfsspec.py s3://mybucket/hello.txt

vadimkantorov / git_private_fork.sh

Last active July 28, 2025 12:50

Create a private fork of verl

	# reference: https://gist.github.com/0xjac/85097472043b697ab57ba1b1c7530274

	git clone --bare [email protected]:volcengine/verl.git
	cd verl.git

	# create a bare repo vadimkantorov/verl
	git push --mirror [email protected]:vadimkantorov/verl.git
	cd .. && rm -rf verl.git

	# set up upstream remote

Vadim Kantorov vadimkantorov