This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
import torch.nn.functional as F | |
from transformers import GPTNeoForCausalLM, AutoTokenizer | |
from datasets import load_dataset | |
from sklearn.model_selection import train_test_split | |
import pandas as pd | |
import numpy as np | |
import random | |
# Parameters |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# pip install -U bitsandbytes | |
# pip install -U git+https://github.com/huggingface/transformers.git | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
model_id = "mistralai/Mixtral-8x7B-Instruct-v0.1" | |
path_to_hub = XXX | |
tokenizer = AutoTokenizer.from_pretrained(model_id) | |
model = AutoModelForCausalLM.from_pretrained(model_id, load_in_4bit=True) | |
model.push_to_hub(path_to_hub) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# to use this, first install python and exllamav2 (https://github.com/turboderp/exllamav2) | |
# load a model, rearrange the layers as you like, set generation parameters, and run it | |
# duplicate layers share tensors, but still need extra memory for the cache | |
# thanks to @dnhkng for showing that the cache needs to be re-created | |
# licensed under WTFPL (http://www.wtfpl.net/about/) - Silphendio | |
from exllamav2 import * | |
from exllamav2.generator import * | |
import sys, torch |
OlderNewer