This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
import glob | |
import tqdm | |
import json | |
from transformers import LlamaTokenizer | |
from nltk import tokenize | |
pretrained_model_path = '/home/models/Llama-2-7b-hf' | |
tokenizer = LlamaTokenizer.from_pretrained(pretrained_model_path) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import multiprocessing | |
import pathlib | |
import ebooklib | |
import typer | |
from ebooklib import epub | |
from markdownify import markdownify as md | |
from transformers import LlamaTokenizer | |
from nltk import tokenize |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import multiprocessing | |
import pathlib | |
import ebooklib | |
import typer | |
from ebooklib import epub | |
from markdownify import markdownify as md | |
from transformers import LlamaTokenizer | |
from nltk import tokenize |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# | |
# A wrapper script to quantise models with GPTQ, from one of various datasets | |
# | |
import time | |
import os | |
import logging | |
import random | |
from datasets import load_dataset |