One Paragraph of project description goes here
These instructions will get you a copy of the project up and running on your local machine for development and testing purposes. See deployment for notes on how to deploy the project on a live system.
# So you want to run GPT-J-6B using HuggingFace+FastAPI on a local rig (3090 or TITAN) ... tricky. | |
# special help from the Kolob Colab server https://colab.research.google.com/drive/1VFh5DOkCJjWIrQ6eB82lxGKKPgXmsO5D?usp=sharing#scrollTo=iCHgJvfL4alW | |
# Conversion to HF format (12.6GB tar image) found at https://drive.google.com/u/0/uc?id=1NXP75l1Xa5s9K18yf3qLoZcR6p4Wced1&export=download | |
# Uses GDOWN to get the image | |
# You will need 26 GB of space, 12+GB for the tar and 12+GB expanded (you can nuke the tar after expansion) | |
# Near Simplest Language model API, with room to expand! | |
# runs GPT-J-6B on 3090 and TITAN and servers it using FastAPI | |
# change "seq" (which is the context size) to adjust footprint |
import tensorflow as tf | |
from tqdm import tqdm | |
index = open("data/openwebtext2_new_inputs.train.index").read().splitlines() | |
dataset = tf.data.Dataset.from_tensor_slices(index) | |
dataset = dataset.interleave(tf.data.TFRecordDataset, cycle_length=128, num_parallel_calls=tf.data.experimental.AUTOTUNE) | |
d = dataset.shuffle(10000).prefetch(100) |
# refactor of https://lukeplant.me.uk/blog/posts/double-checked-locking-with-django-orm/ | |
# untested | |
def double_checked_lock_iterator(queryset): | |
for item_pk in queryset.values_list("pk", flat=True): | |
with transaction.atomic(): | |
try: | |
yield queryset.select_for_update(skip_locked=True).get(id=item_pk) | |
except queryset.model.DoesNotExist: | |
pass |
$/
artifacts/
build/
docs/
lib/
packages/
samples/
src/
tests/
const routes = { | |
home: '/', | |
transactions: '/transactions', | |
transactionDetails: '/transactions/:uuid', | |
} | |
const urls: Record< | |
keyof typeof routes, | |
{ get: (params?: any) => string; route: string } | |
> = new Proxy(routes, { |
# Sebastian Raschka 09/24/2022 | |
# Create a new conda environment and packages | |
# conda create -n whisper python=3.9 | |
# conda activate whisper | |
# conda install mlxtend -c conda-forge | |
# Install ffmpeg | |
# macOS & homebrew | |
# brew install ffmpeg | |
# Ubuntu |
This document summarises the work that I have done as part of Google Summer of Code 2022 (GSoC).
#!/usr/bin/env bash | |
# Small shell script to more easily automatically download and transcribe live stream VODs. | |
# This uses YT-DLP, ffmpeg and the CPP version of Whisper: https://github.com/ggerganov/whisper.cpp | |
# Use `./transcribe-vod help` to print help info. | |
# MIT License | |
# Copyright (c) 2022 Daniils Petrovs |