Skip to content

Instantly share code, notes, and snippets.

@cmdr2
cmdr2 / ops.cpp.patch
Last active March 29, 2025 13:07
ops.cpp.patch
diff --git a/ops.cpp b/ops.cpp
index 6190d0d..c44157b 100644
--- a/ops.cpp
+++ b/ops.cpp
@@ -2347,7 +2347,7 @@ static void ggml_compute_forward_repeat_back_f32(
GGML_ASSERT(nb00 == sizeof(float));
if (ggml_is_contiguous(dst)) {
- ggml_vec_set_f32(ne0*ne1*ne2*ne3, dst->data, 0);
+ ggml_vec_set_f32(ne0*ne1*ne2*ne3, (float *)dst->data, 0);
11a12,13
> #include "ggml-cpu/unary-ops.h"
> #include "ggml-cpu/binary-ops.h"
4292,4625d4293
< static void ggml_compute_forward_add_f32(
< const struct ggml_compute_params * params,
< struct ggml_tensor * dst) {
<
< const struct ggml_tensor * src0 = dst->src[0];
< const struct ggml_tensor * src1 = dst->src[1];
@cmdr2
cmdr2 / simple_addition_fp16.cpp
Created February 24, 2025 07:44
Add two float16 tensors using ggml. Each tensor takes 1 GB of memory.
#include "ggml.h"
#include "ggml-cpu.h"
#ifdef GGML_USE_CUDA
#include "ggml-cuda.h"
#endif
#include <vector>
#include <iostream>
#include <chrono>
@cmdr2
cmdr2 / simple_addition_fp32.cpp
Created February 24, 2025 07:44
Add two float32 tensors using ggml. Each tensor takes 1 GB of memory.
#include "ggml.h"
#include "ggml-cpu.h"
#ifdef GGML_USE_CUDA
#include "ggml-cuda.h"
#endif
#include <vector>
#include <iostream>
#include <chrono>
@cmdr2
cmdr2 / simple_ggml_addition.cpp
Last active February 17, 2025 11:43
A simple program to add two vectors using ggml, that can compile to CPU or CUDA.
#include "ggml.h"
#include "ggml-cpu.h"
#ifdef GGML_USE_CUDA
#include "ggml-cuda.h"
#endif
#include <vector>
#include <iostream>
@cmdr2
cmdr2 / hexdump.py
Created November 17, 2024 16:38
Simple python-based hexdumper for analysing dll and executable files. Uses pefile (https://github.com/erocarrera/pefile)
import pefile
import sys
if len(sys.argv) < 2:
print("Error: No file path specified. Usage: python hexdump.py <file_path>")
sys.exit(1)
file_path = sys.argv[1]
try:
import sys
import os
import platform
from importlib.metadata import version as pkg_version
from sdkit.utils import log
from easydiffusion import app
# future home of scripts/check_modules.py
@cmdr2
cmdr2 / diffusers_tensor_rt_directml.py
Last active June 1, 2023 06:28
TensorRT and DirectML with regular diffusers pipelines
import torch
import tensorrt as trt
from polygraphy import cuda
import sys
from packaging import version
from diffusers import StableDiffusionPipeline
from diffusers.pipelines.onnx_utils import OnnxRuntimeModel, ORT_TO_NP_TYPE
from dataclasses import dataclass
import numpy as np
import onnxruntime as ort
from sdkit import Context
from sdkit.generate import generate_images
from sdkit.utils import load_tensor_file, save_tensor_file
from ldm.util import instantiate_from_config
from omegaconf import OmegaConf
import time
model_path = "/path/to/models/stable-diffusion/sd-v1-4.ckpt"
import time
MODEL_PATH = "F:/models/stable-diffusion/sd-v1-4.ckpt"
CONFIG_PATH = "F:/models/stable-diffusion/v1-inference.yaml"
DEVICE = "mps" # or "cuda" or "cpu"
def diff():
print('diffusers')
import torch
from transformers import logging as tr_logging