Sani khursani8

YouTube Transcript SKILL

Use this skill to fetch the transcript of a YouTube video, with or without timestamps.

Use this skill with Claude (by extracting it to .claude/skills/) or with any other agent using Skillz.

Note: This skill is unlikely to run successfully on the Claude web app, since access to YouTube is blocked. Use it with Claude Code or other local agents.

	# train_grpo.py
	#
	# See https://github.com/willccbb/verifiers for ongoing developments
	#
	"""
	citation:

	@misc{brown2025grpodemo,
	title={Granular Format Rewards for Eliciting Mathematical Reasoning Capabilities in Small Language Models},
	author={Brown, William},

	ministrations
	audible pop
	rivulets of
	admit it
	the ball is in your court
	the game is on
	the choice is yours
	I don't bite... unless you want me to
	half-lidded eyes
	she worries her bottom lip


	"""
	a simple script that reads tweets inside a json file, uses openai to compute embeddings and creates two files, metadata.tsv and output.tsv, which cam be used to visualise the tweets and their embeddings in TensorFlow Projector (https://projector.tensorflow.org/)
	"""

	# obtain tweets.json from https://gist.github.com/gd3kr/948296cf675469f5028911f8eb276dbc

	import pandas as pd
	import json
	from openai import OpenAI

	import os

	import gradio as gr
	import torch

	from infer import get_net_g, infer
	import utils

	voice_keys = ["dec", "flow"]
	speech_style_keys = ["enc_p"]

	"""

	The code below combines approaches published by both @eugene-yh and @jinyongyoo on Github.

	Thanks for the contributions guys!

	"""

	import torch
	import peft

	from datasets import load_dataset
	import torch
	from peft import LoraConfig, prepare_model_for_int8_training
	from trl import SFTTrainer
	from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, AutoTokenizer, TrainingArguments

	dataset_name = "timdettmers/openassistant-guanaco"
	dataset = load_dataset(dataset_name, split="train")

	model_name = "facebook/opt-350m"

	#!/usr/bin/env python

	import math
	import matplotlib.pyplot as plt
	import torch
	import torch.nn as nn

	from sklearn.datasets import make_moons
	from torch import Tensor
	from tqdm import tqdm

	from transformers import AutoTokenizer

	tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
	text = "It 's a pre-tokenized , silly sentence !"
	words = text.split()
	encoded = tokenizer(words, is_split_into_words=True)

	for token, wordid in zip(encoded.tokens(), encoded.word_ids()):
	if wordid is not None:
	print(token, words[wordid])