Philip Turner philipturner

Simulation results, ranked from highest to lowest latency. Generated using https://gist.github.com/philipturner/d408351d68b5b1701bb651d4542e26e6

Raw data is private, but there's an older, publicly available substitute at https://gist.github.com/philipturner/94e7c5094915f23438440d49da823c9d

The statistics for MFA Winograd are speculative and eventual performance may be less. For example, Winograd may not be finished this summer, and/or upon publication of the code.

System:

32-core Apple 7 GPU, 1.296 GHz

TODO: Put this into the GitHub repository instead, along with data from the Google Sheet.

Attention Heads Scaling with Model Size

Model	size	$n_{heads}$	$d_{head}$	size $/ n_{heads}^2$
GPT-3 Small	125M	12	64	0.9M
GPT-3 Medium	350M	16	64	1.4M
GPT-3 Large	760M	16	96	3.0M
GPT-3 XL	1.3B	24	128	2.3M


	import Metal

	// MARK: - Usage

	// Usage:
	//
	// 1) Install Xcode from the Mac App Store
	//
	// 2) From the command line:

	20:47:34 ▶ metal ▶ 12⎘ ▶ $ ▶ ./bin/main -m ../models/7B/ggml-model-q4_0.bin --export
	main: build = 652 (b252acb)
	main: seed = 1685900854
	llama.cpp: loading model from ../models/7B/ggml-model-q4_0.bin
	llama_model_load_internal: format = ggjt v3 (latest)
	llama_model_load_internal: n_vocab = 32000
	llama_model_load_internal: n_ctx = 512
	llama_model_load_internal: n_embd = 4096
	llama_model_load_internal: n_mult = 256
	llama_model_load_internal: n_head = 32

	//
	// main.swift
	// CalculateLLaMA
	//
	// Created by Philip Turner on 6/8/23.
	//

	import Foundation
	import QuartzCore

	//
	// ContentView.swift
	// SIMDFuturesA15
	//
	// Created by Philip Turner on 6/9/23.
	//

	import SwiftUI
	import Foundation
	import QuartzCore

	//
	// main.swift
	// TestMFAGEMM
	//
	// Created by Philip Turner on 6/21/23.
	//

	import Accelerate
	import Metal

	REMARK 5
	REMARK 5 Created 2023/06/04 01:23:03 AM UTC
	REMARK 5 with NanoEngineer-1 version 1.1.1.14 nanoengineer-1.com
	REMARK 5
	REMARK 6
	REMARK 6 This file generally complies with the PDB format version 2.3
	REMARK 6 Notes:
	REMARK 6
	REMARK 6 - Sets of atoms are treated as PDB chains terminated with TER
	REMARK 6 records. Since the number of atom sets can exceed the 95 possible

	__attribute__((__always_inline__))
	inline static double _kmeans1d_cost(double* cumsum, double* cumsum2, int i, int j)
	{
	if (j < i)
	return 0;
	double mu = (cumsum[j + 1] - cumsum[i]) / (j - i + 1);
	double result = cumsum2[j + 1] - cumsum2[i];
	result += (j - i + 1) * (mu * mu);
	result -= (2 * mu) * (cumsum[j + 1] - cumsum[i]);
	return result;

	//
	// main.swift
	// KMeans
	//
	// Created by Philip Turner on 6/29/23.
	//

	import Metal

	example_smawk()