This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
model = AutoModelForCausalLM.from_pretrained('NousResearch/Nous-Hermes-Llama2-13b', device_map = 'auto') | |
tokenizer = AutoTokenizer.from_pretrained('NousResearch/Nous-Hermes-Llama2-13b') | |
model.eval() | |
print(tokenizer('yes')) # [1, 4871] | |
print(tokenizer.decode(4874)) # yes |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import llama | |
import torch | |
import pandas as pd | |
from torch.utils.data import Dataset, random_split | |
from transformers import TrainingArguments, Trainer | |
MODEL = 'decapoda-research/llama-7b-hf' | |
DATA_FILE_PATH = 'elon_musk_tweets.csv' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import pickle | |
import struct | |
import zipfile | |
import numpy as np | |
from sentencepiece import SentencePieceProcessor | |
def rms_norm(x): return (x / np.sqrt(np.square(x).mean(-1, keepdims=True) + 1e-6)) | |
def softmax(x): return (np.exp(x - np.max(x, axis=-1, keepdims=True))) / np.sum((np.exp(x - np.max(x, axis=-1, keepdims=True))), axis=-1, keepdims = True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Cleaned and minimal perceiver transformer, originally from code https://github.com/Rishit-dagli/Perceiver | |
# Original paper: Perceiver: General Perception with Iterative Attention. Jaegle et al. https://arxiv.org/pdf/2103.03206.pdf. | |
import math | |
import tensorflow as tf | |
from typing import Callable | |
from einops import rearrange, repeat | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import tensorflow as tf | |
from copy import deepcopy | |
import tensorflow.keras.backend as K | |
from tensorflow.keras.optimizers import Adam | |
from tensorflow.keras.models import Model, clone_model | |
from tensorflow.keras.layers import Input, Dropout, Dense, ReLU, BatchNormalization, Activation, Concatenate | |
class DynamicNet(object): | |
def __init__(self, c0 = None, lr = None, concat_input=False, additive_boosting=False, encoder_layers=None): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import math | |
import pickle | |
import shutil | |
import joblib | |
import warnings | |
import datetime | |
import numpy as np |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
import os | |
import sys | |
import keras | |
import shutil | |
import numpy as np | |
import pandas as pd | |
import keras.backend as K | |
from keras.models import Model |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def tf_graph_wrapper(func): | |
"""Wraps a class method with a tf.Graph context manager""" | |
@wraps(func) | |
def wrapper(self, *args, **kwargs): | |
with self._graph.as_default(): | |
return func(self, *args, **kwargs) | |
return wrapper | |
def tf_init(func): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import tensorflow as tf | |
from tensorflow.keras.layers import Activation | |
from tensorflow.keras.utils import get_custom_objects | |
class Mish(Activation): | |
''' | |
Mish Activation Function. | |
.. math:: | |
mish(x) = x * tanh(softplus(x)) = x * tanh(ln(1 + e^{x})) | |
Shape: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import random | |
import numpy as np | |
import pandas as pd | |
from collections import Counter, defaultdict | |
def stratified_group_k_fold(X, y, groups, k, seed=None): | |
labels_num = np.max(y) + 1 | |
y_counts_per_group = defaultdict(lambda: np.zeros(labels_num)) | |
y_distr = Counter() |