Goals: Add links that are reasonable and good explanations of how stuff works. No hype and no vendor content if possible. Practical first-hand accounts of models in prod eagerly sought.

""" | |
Minimal character-level Vanilla RNN model. Written by Andrej Karpathy (@karpathy) | |
BSD License | |
""" | |
import numpy as np | |
# data I/O | |
data = open('input.txt', 'r').read() # should be simple plain text file | |
chars = list(set(data)) | |
data_size, vocab_size = len(data), len(chars) |
package server | |
import ( | |
"bufio" | |
"fmt" | |
"log" | |
"net" | |
) | |
// Server ... |
import torch | |
from torch import LongTensor | |
from torch.nn import Embedding, LSTM | |
from torch.autograd import Variable | |
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence | |
## We want to run LSTM on a batch of 3 character sequences ['long_str', 'tiny', 'medium'] | |
# | |
# Step 1: Construct Vocabulary | |
# Step 2: Load indexed data (list of instances, where each instance is list of character indices) |
# [Mamba: Linear-Time Sequence Modeling with Selective State Spaces](https://arxiv.org/abs/2312.00752) | |
import torch | |
import torch.nn as nn | |
import torch.optim as optim | |
from torch.utils.data import DataLoader, Dataset | |
from torch.nn import functional as F | |
from einops import rearrange, repeat | |
from tqdm import tqdm |