This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import os | |
| import numpy as np | |
| from datasets import ClassLabel, Dataset, DatasetDict | |
| def split_dataset( | |
| dataset: Dataset, | |
| test_size=0.025, |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import logging | |
| import torch | |
| def configure_tf32(): | |
| """ | |
| Enable TF32 precision for GPUs with compute capability >= 8.0 (Ampere+). | |
| """ | |
| if not torch.cuda.is_available(): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import argparse | |
| import logging | |
| import time | |
| from datetime import datetime | |
| from pathlib import Path | |
| from typing import Optional | |
| from huggingface_hub import upload_folder | |
| from watchdog.events import PatternMatchingEventHandler | |
| from watchdog.observers import Observer |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import re | |
| from itertools import chain | |
| def calculate_readability(code_string:str) -> float: | |
| code = code_string.splitlines() | |
| # Heuristic 1: Line length | |
| max_line_length = 80 | |
| long_lines = sum(1 for line in code if len(line) > max_line_length) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """ | |
| generic & basic sbert-like embedder class for the jina-bert model | |
| Usage: | |
| model = EmbeddingModel("jinaai/jina-embeddings-v2-base-en") | |
| embeddings = model.encode( | |
| ["How is the weather today?", "What is the current weather like today?"] | |
| ) | |
| print(model.cos_sim(embeddings[0], embeddings[1])) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import os | |
| import argparse | |
| import requests | |
| from urllib.parse import urlparse | |
| from tqdm import tqdm | |
| from joblib import Parallel, delayed | |
| from tenacity import retry, stop_after_attempt, wait_fixed | |
| @retry(stop=stop_after_attempt(5), wait=wait_fixed(2)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import logging | |
| import gzip | |
| from pathlib import Path | |
| import fire | |
| from tqdm import tqdm | |
| from tokenizers import ( | |
| Tokenizer, | |
| decoders, | |
| models, | |
| normalizers, |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/bash | |
| # pip install nougat-ocr | |
| # see https://github.com/facebookresearch/nougat for details and license | |
| DEFAULT_BATCHSIZE=4 | |
| usage() { | |
| echo "Usage: $0 <path_to_directory> [--batchsize BATCHSIZE]" | |
| exit 1 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Example usage: | |
| # python merge_peft.py --base_model=meta-llama/Llama-2-7b-hf --peft_model=./qlora-out --hub_id=alpaca-qlora | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| from peft import PeftModel | |
| import torch | |
| import argparse | |
| def get_args(): |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.