This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from rapidfuzz import process, fuzz | |
| def fuzzy_align(masterlist, list2, cutoff=70): | |
| # Dictionary to hold matches | |
| matches = {} | |
| # Track used indices to avoid duplicate matches in the masterlist | |
| used_indices = set() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import logging | |
| from email.parser import BytesParser | |
| from pathlib import Path | |
| import fire | |
| import html2text | |
| import pandas as pd | |
| from tqdm import tqdm | |
| # Setup logging |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import os | |
| import numpy as np | |
| from datasets import ClassLabel, Dataset, DatasetDict | |
| def split_dataset( | |
| dataset: Dataset, | |
| test_size=0.025, |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """ | |
| this script will upload a folder to Hugging Face Hub | |
| python upload_folder.py --help | |
| pip install fire huggingface-hub | |
| """ | |
| import logging |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import json | |
| import socket | |
| import uuid | |
| import yake | |
| from flask import Flask, redirect, render_template_string, request, url_for | |
| from markupsafe import escape | |
| app = Flask(__name__) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import logging | |
| import torch | |
| def configure_tf32(): | |
| """ | |
| Enable TF32 precision for GPUs with compute capability >= 8.0 (Ampere+). | |
| """ | |
| if not torch.cuda.is_available(): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """ | |
| cli.py - Command line interface for textsum. | |
| this edition: fast CPU inference with intel IPEX https://archive.ph/oY5b1 | |
| Usage: | |
| textsum-dir --help | |
| """ | |
| import os |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import json | |
| import logging | |
| import re | |
| from datetime import datetime | |
| from pathlib import Path | |
| import datasets | |
| import evaluate | |
| import fire | |
| import intel_extension_for_pytorch as ipex |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python | |
| # coding=utf-8 | |
| # Copyright 2020 The HuggingFace Inc. team. All rights reserved. | |
| # | |
| # Licensed under the Apache License, Version 2.0 (the "License"); | |
| # you may not use this file except in compliance with the License. | |
| # You may obtain a copy of the License at | |
| # | |
| # http://www.apache.org/licenses/LICENSE-2.0 | |
| # |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """Summary | |
| """ | |
| import logging | |
| from pathlib import Path | |
| import fire | |
| from datasets import Dataset, load_dataset | |
| from tqdm.auto import tqdm | |
| from transformers import AutoTokenizer |