Asif Rahman asifr

Purpose

Bootstrap knowledge of LLMs ASAP. With a bias/focus to GPT.

Avoid being a link dump. Try to provide only valuable well tuned information.

Neural network links before starting with transformers.

	❯ rm out.csv
	❯ cat 1.py
	from glob import glob
	import mmap

	files = glob("data/*")

	files.sort(key=lambda x: int(x.split("/")[-1].split(".")[0]))
	write_f = open("out.csv", "w+b")

	import os; import psutil; import timeit
	from datasets import load_dataset

	mem_before = psutil.Process(os.getpid()).memory_info().rss >> 20
	wiki = load_dataset("wikipedia", "20200501.en", split='train')
	mem_after = psutil.Process(os.getpid()).memory_info().rss >> 20
	print(f"RAM memory used: {(mem_after - mem_before)} MB")

	s = """batch_size = 1000
	for i in range(0, len(wiki), batch_size):

	# Copyright 2019 Google LLC.
	# SPDX-License-Identifier: Apache-2.0

	# Author: Anton Mikhailov

	turbo_colormap_data = [[0.18995,0.07176,0.23217],[0.19483,0.08339,0.26149],[0.19956,0.09498,0.29024],[0.20415,0.10652,0.31844],[0.20860,0.11802,0.34607],[0.21291,0.12947,0.37314],[0.21708,0.14087,0.39964],[0.22111,0.15223,0.42558],[0.22500,0.16354,0.45096],[0.22875,0.17481,0.47578],[0.23236,0.18603,0.50004],[0.23582,0.19720,0.52373],[0.23915,0.20833,0.54686],[0.24234,0.21941,0.56942],[0.24539,0.23044,0.59142],[0.24830,0.24143,0.61286],[0.25107,0.25237,0.63374],[0.25369,0.26327,0.65406],[0.25618,0.27412,0.67381],[0.25853,0.28492,0.69300],[0.26074,0.29568,0.71162],[0.26280,0.30639,0.72968],[0.26473,0.31706,0.74718],[0.26652,0.32768,0.76412],[0.26816,0.33825,0.78050],[0.26967,0.34878,0.79631],[0.27103,0.35926,0.81156],[0.27226,0.36970,0.82624],[0.27334,0.38008,0.84037],[0.27429,0.39043,0.85393],[0.27509,0.40072,0.86692],[0.27576,0.41097,0.87936],[0.27628,0.42118,0.89123],[0.27667,0.43134,0.90254],[0.27691,0.44145,0.913

	# taken from http://www.piware.de/2011/01/creating-an-https-server-in-python/
	# generate server.pem with the following command:
	# openssl req -new -x509 -keyout key.pem -out server.pem -days 365 -nodes
	# run as follows:
	# python simple-https-server.py
	# then in your browser, visit:
	# https://localhost:4443


	import http.server

	import torch, torch.nn as nn, torch.nn.functional as F
	import numpy as np
	import torch.optim as optim

	# tied autoencoder using off the shelf nn modules
	class TiedAutoEncoderOffTheShelf(nn.Module):
	def __init__(self, inp, out, weight):
	super().__init__()
	self.encoder = nn.Linear(inp, out, bias=False)
	self.decoder = nn.Linear(out, inp, bias=False)

	import math
	import numpy as np
	from sklearn.linear_model import Ridge


	class LinearModelTree:
	def __init__(self, min_node_size, node_model_fit_func, min_split_improvement=0):
	self.min_node_size = min_node_size
	self.node_model_fit_func = node_model_fit_func
	self.min_split_improvement = min_split_improvement