Skip to content

Instantly share code, notes, and snippets.

View Blaizzy's full-sized avatar
🏠
Working from home

Prince Canuma Blaizzy

🏠
Working from home
View GitHub Profile
diff --git a/src/transformers/models/csm/convert_csm.py b/src/transformers/models/csm/convert_csm.py
index 28fbc9fe49..fab38f5ec5 100644
--- a/src/transformers/models/csm/convert_csm.py
+++ b/src/transformers/models/csm/convert_csm.py
@@ -31,6 +31,7 @@ from transformers import (
MimiModel,
)
from transformers.utils.hub import cached_file
+from safetensors.torch import load_file, save_file
import argparse
import asyncio
import json
import logging
import os
import wave
import aiohttp
import numpy as np
import sounddevice as sd
import json
import time
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Dict, Optional, Union
import mlx.core as mx
import mlx.nn as nn
from mlx_lm.generate import stream_generate
from mlx_lm.models.base import BaseModelArgs, create_attention_mask
import argparse
import gradio as gr
import requests
import json
import asyncio
async def process_sse_stream(url, headers, data):
response = requests.post(url, headers=headers, json=data, stream=True)
import aiohttp
import json
import gradio as gr
from gradio import ChatMessage
class CustomChatAPI:
def __init__(self, base_url, model):
self.base_url = base_url
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from datasets import load_dataset
from torch.utils.data import DataLoader
import os
import pyarrow as pa
import pyarrow.parquet as pq
from tqdm import tqdm
def process_and_save_parquet(all_logits, all_input_ids, all_attention_masks, top_50_token_ids, top_50_values, output_dir, shard_idx, total_shards):
import torch
import torch.nn.functional as F
from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments
from trl import SFTTrainer, SFTConfig
from datasets import load_dataset
import numpy as np
# Hyperparameters
temperature = 0.7
alpha = 0.5
@Blaizzy
Blaizzy / llama-3-70B-qlora.yaml
Created May 15, 2024 17:08 — forked from mtisz/llama-3-70B-qlora.yaml
Axolotl Config for Llama-3-70B QLoRA
base_model: meta-llama/Meta-Llama-3-70B
model_type: LlamaForCausalLM
tokenizer_type: AutoTokenizer
load_in_8bit: false
load_in_4bit: true
strict: false
datasets:
- path: /home/migel/ai_datasets/tess-v1.5b-chatml.jsonl
@Blaizzy
Blaizzy / mixtral-8x22B.yaml
Created May 9, 2024 16:14 — forked from mtisz/mixtral-8x22B.yaml
Axolotl Config for Mixtral-8x22B
base_model: mistral-community/Mixtral-8x22B-v0.1
model_type: MixtralForCausalLM
tokenizer_type: AutoTokenizer
is_mistral_derived_model: false
trust_remote_code: true
load_in_8bit: false
load_in_4bit: true
strict: false
@Blaizzy
Blaizzy / DataCollatorForCompletionOnlyLM
Last active February 19, 2026 09:24
How TRL DataCollatorForCompletionOnlyLM works
```python
from trl import SFTTrainer, DataCollatorForCompletionOnlyLM
from transformers import AutoTokenizer
from datasets import load_dataset
# Load Dataset and tokenizer
dataset = load_dataset('prince-canuma/tinyOrca', split='train')
tokenizer = AutoTokenizer.from_pretrained("prince-canuma/Damysus-2.7B-Chat")