Skip to content

Instantly share code, notes, and snippets.

View danieldk's full-sized avatar

Daniël de Kok danieldk

View GitHub Profile
import torch
from . import _activation_psnp6q5y4k4wg
ops = torch.ops._activation_psnp6q5y4k4wg
def add_op_namespace_prefix(op_name: str):
"""
Prefix op by namespace.
"""
return f"_activation_psnp6q5y4k4wg::{op_name}"
cmake_minimum_required(VERSION 3.26)
project(activation LANGUAGES CXX)
set(TARGET_DEVICE "cuda" CACHE STRING "Target device backend for kernel")
install(CODE "set(CMAKE_INSTALL_LOCAL_ONLY TRUE)" ALL_COMPONENTS)
include(FetchContent)
file(MAKE_DIRECTORY ${FETCHCONTENT_BASE_DIR}) # Ensure the directory exists
message(STATUS "FetchContent base directory: ${FETCHCONTENT_BASE_DIR}")
@danieldk
danieldk / gist:2e75f947c2eaf0953d6f2a2377d96893
Created April 18, 2025 08:38
rpath-sha265-changes.diff
diff --git a/pkgs/rocm-packages/rocm-6.3.4-metadata.json b/pkgs/rocm-packages/rocm-6.3.4-metadata.json
index 190da56..1d0a325 100644
--- a/pkgs/rocm-packages/rocm-6.3.4-metadata.json
+++ b/pkgs/rocm-packages/rocm-6.3.4-metadata.json
@@ -12,7 +12,7 @@
},
{
"name": "amd-smi-lib-rpath",
- "sha256": "19277765db20c680667a91968790f5b86e2c8c5cd344d640f784f639b9fff7a9",
+ "sha256": "55c5651c6ebb418795a9e51edd24dd98e3d27ac0c6252629cffdae0cfb875816",
@danieldk
danieldk / default.nix
Created April 5, 2025 12:39
Lenovo WWAN unlock derivation
{
fetchFromGitHub,
buildFHSEnv,
stdenv,
makeWrapper,
}:
let
pname = "lenovo-wwan-unlock";
version = "2.1.3";
{
"_name_or_path": "/scratch/daniel/.cache/huggingface/hub/models--meta-llama--Meta-Llama-3.1-8B-Instruct/snapshots/0e9e39f249a16976918f6564b8830bc894c89659",
"architectures": [
"LlamaForCausalLM"
],
"attention_bias": false,
"attention_dropout": 0.0,
"bos_token_id": 128000,
"compression_config": {
"config_groups": {
# tc -s qdisc
qdisc noqueue 0: dev lo root refcnt 2
Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
backlog 0b 0p requeues 0
qdisc mq 0: dev eth0 root
Sent 761134400 bytes 2005241 pkt (dropped 24, overlimits 0 requeues 686)
backlog 0b 0p requeues 686
qdisc fq_codel 0: dev eth0 parent :10 limit 10240p flows 1024 quantum 1518 target 5ms interval 100ms memory_limit 4Mb ecn drop_batch 64
Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
backlog 0b 0p requeues 0
{
inputs = {
tgi-nix.url = "github:danieldk/tgi-nix";
#tgi-nix.url = "path:/home/daniel/git/tgi-nix";
nixpkgs.follows = "tgi-nix/nixpkgs";
flake-utils.url = "github:numtide/flake-utils";
};
outputs =
{
self,
diff --git a/pkgs/development/python-modules/torch/default.nix b/pkgs/development/python-modules/torch/default.nix
index 4e3d54f4caa4..00d6b2727981 100644
--- a/pkgs/development/python-modules/torch/default.nix
+++ b/pkgs/development/python-modules/torch/default.nix
@@ -232,7 +232,12 @@ buildPythonPackage rec {
};
patches =
- lib.optionals cudaSupport [ ./fix-cmake-cuda-toolkit.patch ]
+ [
# Author: Daniel de Kok
# Usage: python shard.py --safetensors-path /fsx/danieldk/4bit-gptq-instruct/gptq_model-4bit-128g.safetensors --framework torch --output-path /fsx/danieldk/4bit-gptq-instruct/gptq-sharded
import argparse
import safetensors
import huggingface_hub
def get_args():
@danieldk
danieldk / punct-unigrams-bigrams.txt
Created September 2, 2023 09:04
Punctuation unigram frequencies across various programming languages, with their most common bigrams
_ 44919, bigrams: __: 1445, ._: 531, _(: 529, _): 138, (_: 99
, 42109, bigrams: ),: 3297, ",: 3266, ],: 1725, },: 461, >,: 389
. 37513, bigrams: ..: 1881, ).: 1539, ._: 531, ].: 436, .": 431
) 32189, bigrams: (): 6046, );: 3852, ),: 3297, )): 2780, "): 2528
( 32188, bigrams: (): 6046, (": 2536, (&: 1116, ((: 699, _(: 529
: 24991, bigrams: ::: 4528, ):: 2302, ":: 849, ]:: 427, :{: 351
" 22854, bigrams: ",: 3266, "": 2633, (": 2536, "): 2528, ":: 849
= 20709, bigrams: ==: 1217, =": 670, +=: 351, !=: 347, =>: 316
/ 12470, bigrams: //: 5087, /*: 494, */: 493, :/: 239, </: 163
; 10225, bigrams: );: 3852, };: 895, ?;: 473, ";: 468, ];: 456