Skip to content

Instantly share code, notes, and snippets.

View CoffeeVampir3's full-sized avatar

Z CoffeeVampir3

View GitHub Profile
def load_cfg_from_json(json_file):
with open(json_file, "r", encoding="utf-8") as reader:
text = reader.read()
return json.loads(text)
def load_cfg(model_id, cfg_path):
hf_config = load_cfg_from_json(cfg_path)
if 'pretrained_cfg' not in hf_config:
# old form, pull pretrain_cfg out of the base dict
pretrained_cfg = hf_config
@CoffeeVampir3
CoffeeVampir3 / bitnetting.ipynb
Last active April 27, 2024 21:11
Bitnet 1.58 MLP Example
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
from transformers import AutoTokenizer
import json
import sys
model = "/home/blackroot/Desktop/llama3-8b/llama-3-8b"
max_tokens = 8192
def count_tokens_hf(text: str, model_name: str) -> int:
tokenizer = AutoTokenizer.from_pretrained(model_name)
encoded_input = tokenizer.encode(text)
@CoffeeVampir3
CoffeeVampir3 / example_app.py
Last active April 26, 2024 14:47
exllama minimum example
from flask import Flask, render_template
import torch
from flask_socketio import SocketIO, emit
from generation.make_instruct import get_generator_func
from generation.exllama_generator_wrapper import encode_message, encode_system, encode_header
import os,sys
app = Flask(__name__)
socketio = SocketIO(app)
system_prompt = "Respond to all inputs with EEE"
@CoffeeVampir3
CoffeeVampir3 / example_pretokenization.py
Created April 30, 2024 18:56
Exllama tokenization for train
for text, inst, summary in zip(text, instructions, previous_summary):
if summary != "":
summarized_augmentation = random.choice(augmented_continuation)
inst = f"{inst}\n\n{summarized_augmentation} {summary}"
next_prompt = copy.deepcopy(enc_sys_prompt)
next_message = encode_message(tokenizer, "user", inst)
next_prompt.extend(next_message)
mask_length = len(next_prompt)
next_prompt.extend(encode_message_english_sentence_truncate(tokenizer, "assistant", text, mask_length, 8150))
@CoffeeVampir3
CoffeeVampir3 / Difftervention.ipynb
Last active May 11, 2024 09:22
Instruct VS Base Analysis
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
from flask import Flask, render_template
import torch
from flask_socketio import SocketIO, emit
from generation.make_instruct import get_generator_func
from generation.exllama_generator_wrapper import encode_message, encode_system, encode_header, encode_header_prefilled, encode_message_with_eot, encode_completion
from collections import deque
import time
import os,sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
use std::io::Write;
use tokenizers::Tokenizer;
use candle_core::quantized::{gguf_file};
use candle_core::Tensor;
use candle_core::Device;
use candle_transformers::generation::{LogitsProcessor, Sampling};
use candle_transformers::models::quantized_llama as model;
use candle_examples::token_output_stream::TokenOutputStream;
use model::ModelWeights;
use std::collections::HashMap;
@CoffeeVampir3
CoffeeVampir3 / example.rs
Created May 30, 2024 01:41
Rust Candle Inference Examples
use std::io::Write;
use tokenizers::Tokenizer;
use candle_core::quantized::{gguf_file};
use candle_core::Device;
use candle_transformers::generation::{LogitsProcessor, Sampling};
use candle_transformers::models::quantized_llama as model;
use model::ModelWeights;
use burnt_wick::streamable_model::StreamableModel;
fn load_model_and_tokenizer(
let cube_mesh_handle: Handle<Mesh> = meshes.add(Rectangle::new(6., 9.));
let cube_mesh_handle2: Handle<Mesh> = meshes.add(Rectangle::new(6., 9.));
commands.spawn((
SpatialBundle {
transform: Transform::default(),
..default()
},
ParentMarker,
// On::<Pointer<Drag>>::listener_component_mut::<Transform>(|drag, transform| {