Peter pszemraj

Useful misc installs

Microcode Update: Keeping your CPU microcode updated can help in better performance and security. You can install the AMD microcode package by running:

sudo apt install amd64-microcode

	import logging
	import random
	import time
	from pathlib import Path

	import fire
	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer

	logging.basicConfig(format="%(levelname)s - %(message)s", level=logging.INFO)

	"""
	The script is designed to monitor a specified directory for any file system changes (like additions, deletions, or modifications of files and subdirectories) and automatically upload the changes to a specified repository on the Hugging Face Hub.


	pip install huggingface-hub watchdog
	"""
	import argparse
	import logging
	import time
	from pathlib import Path

	import os
	import random

	from datasets import load_dataset


	def format_dataset(example):
	"""Formats the dataset example into a single 'text' field."""

	# Add input only if it is longer than 2 characters

	import logging
	import subprocess

	import torch


	def check_ampere_gpu():
	"""Check if the GPU supports NVIDIA Ampere or later and enable FP32 in PyTorch if it does."""

	# Check if CUDA is available

	"""
	inference with a model trained on query well-formedness

	https://huggingface.co/Ashishkr/query_wellformedness_score

	pip transformers install accelerate optimum -q
	"""
	import torch
	from transformers import AutoTokenizer, AutoModelForSequenceClassification

	"""
	run_langchain_summarization.py - Generate summaries using langchain + LLMs

	For usage details, run `python run_langchain_summarization.py --help` and fire will print the usage details.

	Notes:
	- you need to have OPENAI_API_KEY set as an environment variable (easiest way is export OPENAI_API_KEY=memes123)
	- install the dependencies using the requirements.txt file or below

	pip install fire langchain clean-text tqdm tiktoken

	BEE-spoke-data/smol_llama-101M-GQA with Model Revision: .
	Output dir: 101mgqa
	Batch Size: 64
	Device ID: cuda:2
	Setting number of workers to 4
	[dynet] random seed: 1234
	[dynet] allocating memory: 32MB
	[dynet] memory allocation done.
	Token indices sequence length is longer than the specified maximum sequence length for this model (1377 > 1024). Running this sequence through the model will result in indexing errors
	Selected Tasks: ['arc_easy', 'boolq', 'lambada_openai', 'openbookqa', 'piqa', 'winogrande']

	from pathlib import Path
	import logging
	from typing import Optional, Union
	import torch
	from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline


	# Function to process audio using distil-whisper
	def process_audio_distil_whisper(
	audio_path: Union[str, Path],