This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import yaml | |
import heapq | |
import argparse | |
import numpy as np | |
import language_tool_python | |
from functools import lru_cache | |
from collections import Counter | |
from nltk.translate.bleu_score import sentence_bleu | |
from nltk.tokenize import word_tokenize |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@register("azure_openai") | |
class AzureOpenAIEmbeddings(TextEmbeddingFunction): | |
""" | |
An embedding function that uses the Azure OpenAI API | |
""" | |
name: str = "text-embedding-ada-002" | |
azure_api_key: str | |
azure_endpoint: str | |
azure_deployment: str |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import openai | |
from jinja2 import Template, meta, Environment | |
from dotenv import load_dotenv | |
load_dotenv() # add a .env file with the following | |
# setup is for azure, change accordingly for normal openai | |
openai.api_key = os.getenv("OPENAI_API_KEY") | |
openai.api_type = os.getenv("OPENAI_API_TYPE") | |
openai.api_version = os.getenv("OPENAI_API_VERSION") | |
openai.api_base = os.getenv("OPENAI_API_BASE") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
the prompts.json file should contain (adjust for your prompt and use case): | |
{ | |
"prompts": { | |
{ | |
"example": { | |
"input_variables = ["variables", "here"], | |
"template = ["Prompt details and {variables} {here} according to your use case.\nRationale:"] | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import tensorflow as tf | |
from tqdm import tqdm | |
index = open("data/openwebtext2_new_inputs.train.index").read().splitlines() | |
dataset = tf.data.Dataset.from_tensor_slices(index) | |
dataset = dataset.interleave(tf.data.TFRecordDataset, cycle_length=128, num_parallel_calls=tf.data.experimental.AUTOTUNE) | |
d = dataset.shuffle(10000).prefetch(100) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def AttentionMask(encoder_len, state_len, decoder_len, offset=0, near_decay=0, far_decay=0, device='cpu'): | |
m = -offset*np.tri(decoder_len, encoder_len+decoder_len+state_len, encoder_len) | |
for i in range(encoder_len+decoder_len-1): | |
m += np.tri(decoder_len, encoder_len+decoder_len+state_len, encoder_len-i-1) | |
if state_len: | |
ms = np.zeros((state_len, encoder_len+decoder_len+state_len)) | |
m = np.concatenate([m, ms], axis=0) | |
m = torch.tensor(m, dtype=torch.float32, device=device) | |
mx = 1-np.tri(decoder_len, encoder_len+decoder_len, encoder_len) | |
mx = np.concatenate([mx, np.zeros((decoder_len, state_len))], axis=1) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
import torch.nn as nn | |
class FIR(nn.Module): | |
def __init__(self, in_dim, out_dim=None, hidden_dim=None, segment_sizes=[1,2,4,8], activation=nn.functional.gelu, device='cpu'): | |
super().__init__() | |
if not out_dim: out_dim = in_dim | |
if not hidden_dim: hidden_dim = in_dim | |
cursor = 1 | |
nodes = [cursor] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Modified StyleGAN2 Projector with CLIP, addl. losses, kmeans, etc. | |
# by Peter Baylies, 2021 -- @pbaylies on Twitter | |
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. | |
# | |
# NVIDIA CORPORATION and its licensors retain all intellectual property | |
# and proprietary rights in and to this software, related documentation | |
# and any modifications thereto. Any use, reproduction, disclosure or | |
# distribution of this software and related documentation without an express | |
# license agreement from NVIDIA CORPORATION is strictly prohibited. |