This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
import logging | |
def check_ampere_gpu(): | |
""" | |
Check if the GPU supports NVIDIA Ampere or later and enable FP32 in PyTorch if it does. | |
""" | |
# Check if CUDA is available | |
if not torch.cuda.is_available(): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
cli.py - Command line interface for textsum. | |
this edition: fast CPU inference with intel IPEX https://archive.ph/oY5b1 | |
Usage: | |
textsum-dir --help | |
""" | |
import os |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import logging | |
import re | |
from datetime import datetime | |
from pathlib import Path | |
import datasets | |
import evaluate | |
import fire | |
import intel_extension_for_pytorch as ipex |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# coding=utf-8 | |
# Copyright 2020 The HuggingFace Inc. team. All rights reserved. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Summary | |
""" | |
import logging | |
from pathlib import Path | |
import fire | |
from datasets import Dataset, load_dataset | |
from tqdm.auto import tqdm | |
from transformers import AutoTokenizer |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pathlib import Path | |
import json | |
def update_tokenizer_post_processor(input_path): | |
""" | |
Load a tokenizer configuration from the input path, update its post_processor | |
with a custom TemplateProcessing configuration, and overwrite the original file. | |
Args: |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# pip install sentence-transformers | |
from sentence_transformers import SentenceTransformer, util, models | |
model_name = "nomic-ai/nomic-embed-text-v1" | |
pooling_mode = "mean" | |
word_embedding_model = models.Transformer( | |
model_name, | |
max_seq_length=8192, | |
model_args={"trust_remote_code": True, "rotary_scaling_factor": 2}, | |
tokenizer_args={"trust_remote_code": True}, |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# pip install sentence-transformers -q | |
# source: https://www.sbert.net/docs/usage/semantic_textual_similarity.html | |
from sentence_transformers import SentenceTransformer, util | |
model = SentenceTransformer("all-MiniLM-L6-v2") | |
# Two lists of sentences | |
sentences1 = [ | |
"The cat sits outside", |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from transformers import AutoTokenizer | |
def load_and_ensure_tokens(model_name): | |
# Load the tokenizer | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
# Essential special tokens with their default values | |
essential_tokens = { | |
"pad_token": "<pad>", |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
import logging | |
import time | |
from datetime import datetime | |
from pathlib import Path | |
from typing import Optional | |
from huggingface_hub import upload_folder | |
from watchdog.events import PatternMatchingEventHandler | |
from watchdog.observers import Observer |