Skip to content

Instantly share code, notes, and snippets.

View CoffeeVampir3's full-sized avatar

Z CoffeeVampir3

View GitHub Profile
@CoffeeVampir3
CoffeeVampir3 / burnt_wick.rs
Last active July 22, 2024 20:53
candle model stream
use candle_transformers::models::quantized_llama as model;
use candle_transformers::generation::{LogitsProcessor, Sampling};
use candle_core::quantized::{gguf_file};
use candle_core::Tensor;
pub use candle_core::Device;
pub use tokenizers::Tokenizer;
pub struct StreamableModel {
model: model::ModelWeights,
tokenizer: tokenizers::Tokenizer,
@CoffeeVampir3
CoffeeVampir3 / train.ipynb
Last active July 9, 2024 08:14
Weird Initial Latent Issue.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
#pragma once
#include <coroutine>
#include <utility>
#include <functional>
struct NecroAiState;
struct NecroAiState
{
struct promise_type;
let cube_mesh_handle: Handle<Mesh> = meshes.add(Rectangle::new(6., 9.));
let cube_mesh_handle2: Handle<Mesh> = meshes.add(Rectangle::new(6., 9.));
commands.spawn((
SpatialBundle {
transform: Transform::default(),
..default()
},
ParentMarker,
// On::<Pointer<Drag>>::listener_component_mut::<Transform>(|drag, transform| {
@CoffeeVampir3
CoffeeVampir3 / example.rs
Created May 30, 2024 01:41
Rust Candle Inference Examples
use std::io::Write;
use tokenizers::Tokenizer;
use candle_core::quantized::{gguf_file};
use candle_core::Device;
use candle_transformers::generation::{LogitsProcessor, Sampling};
use candle_transformers::models::quantized_llama as model;
use model::ModelWeights;
use burnt_wick::streamable_model::StreamableModel;
fn load_model_and_tokenizer(
use std::io::Write;
use tokenizers::Tokenizer;
use candle_core::quantized::{gguf_file};
use candle_core::Tensor;
use candle_core::Device;
use candle_transformers::generation::{LogitsProcessor, Sampling};
use candle_transformers::models::quantized_llama as model;
use candle_examples::token_output_stream::TokenOutputStream;
use model::ModelWeights;
use std::collections::HashMap;
from flask import Flask, render_template
import torch
from flask_socketio import SocketIO, emit
from generation.make_instruct import get_generator_func
from generation.exllama_generator_wrapper import encode_message, encode_system, encode_header, encode_header_prefilled, encode_message_with_eot, encode_completion
from collections import deque
import time
import os,sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
@CoffeeVampir3
CoffeeVampir3 / Difftervention.ipynb
Last active May 11, 2024 09:22
Instruct VS Base Analysis
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@CoffeeVampir3
CoffeeVampir3 / example_pretokenization.py
Created April 30, 2024 18:56
Exllama tokenization for train
for text, inst, summary in zip(text, instructions, previous_summary):
if summary != "":
summarized_augmentation = random.choice(augmented_continuation)
inst = f"{inst}\n\n{summarized_augmentation} {summary}"
next_prompt = copy.deepcopy(enc_sys_prompt)
next_message = encode_message(tokenizer, "user", inst)
next_prompt.extend(next_message)
mask_length = len(next_prompt)
next_prompt.extend(encode_message_english_sentence_truncate(tokenizer, "assistant", text, mask_length, 8150))
@CoffeeVampir3
CoffeeVampir3 / example_app.py
Last active April 26, 2024 14:47
exllama minimum example
from flask import Flask, render_template
import torch
from flask_socketio import SocketIO, emit
from generation.make_instruct import get_generator_func
from generation.exllama_generator_wrapper import encode_message, encode_system, encode_header
import os,sys
app = Flask(__name__)
socketio = SocketIO(app)
system_prompt = "Respond to all inputs with EEE"