This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
this script will upload a folder to Hugging Face Hub | |
python upload_folder.py --help | |
pip install fire huggingface-hub | |
""" | |
import logging |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import socket | |
import uuid | |
import yake | |
from flask import Flask, redirect, render_template_string, request, url_for | |
from markupsafe import escape | |
app = Flask(__name__) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import logging | |
import torch | |
def configure_tf32(): | |
""" | |
Enable TF32 precision for GPUs with compute capability >= 8.0 (Ampere+). | |
""" | |
if not torch.cuda.is_available(): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
cli.py - Command line interface for textsum. | |
this edition: fast CPU inference with intel IPEX https://archive.ph/oY5b1 | |
Usage: | |
textsum-dir --help | |
""" | |
import os |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import logging | |
import re | |
from datetime import datetime | |
from pathlib import Path | |
import datasets | |
import evaluate | |
import fire | |
import intel_extension_for_pytorch as ipex |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# coding=utf-8 | |
# Copyright 2020 The HuggingFace Inc. team. All rights reserved. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Summary | |
""" | |
import logging | |
from pathlib import Path | |
import fire | |
from datasets import Dataset, load_dataset | |
from tqdm.auto import tqdm | |
from transformers import AutoTokenizer |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pathlib import Path | |
import json | |
def update_tokenizer_post_processor(input_path): | |
""" | |
Load a tokenizer configuration from the input path, update its post_processor | |
with a custom TemplateProcessing configuration, and overwrite the original file. | |
Args: |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# pip install sentence-transformers | |
from sentence_transformers import SentenceTransformer, util, models | |
model_name = "nomic-ai/nomic-embed-text-v1" | |
pooling_mode = "mean" | |
word_embedding_model = models.Transformer( | |
model_name, | |
max_seq_length=8192, | |
model_args={"trust_remote_code": True, "rotary_scaling_factor": 2}, | |
tokenizer_args={"trust_remote_code": True}, |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# pip install sentence-transformers -q | |
# source: https://www.sbert.net/docs/usage/semantic_textual_similarity.html | |
from sentence_transformers import SentenceTransformer, util | |
model = SentenceTransformer("all-MiniLM-L6-v2") | |
# Two lists of sentences | |
sentences1 = [ | |
"The cat sits outside", |