git clone [email protected]:filipstrand/mflux.git
cd mflux && pip install -r requirements.txt
Name this anything, maybe flux.py
. Make sure to update the two paths marked below.
cmake_minimum_required(VERSION 3.27) | |
project(_ext LANGUAGES CXX) | |
# ----------------------------- Setup ----------------------------- | |
set(CMAKE_CXX_STANDARD 17) | |
set(CMAKE_CXX_STANDARD_REQUIRED ON) | |
set(CMAKE_POSITION_INDEPENDENT_CODE ON) | |
option(BUILD_SHARED_LIBS "Build as a shared library" ON) |
import argparse | |
from functools import partial | |
import multiprocessing as mp | |
from typing import Callable, Optional | |
import mlx.core as mx | |
import mlx.nn as nn | |
from mlx.utils import tree_map_with_path | |
from mlx_lm.utils import * |
from huggingface_hub import snapshot_download | |
import mlx.core as mx | |
import mlx.nn as nn | |
import time | |
class Block(nn.Module): | |
def __init__(self, in_dims, dims, stride=1): | |
super().__init__() |
import numpy as np | |
import mlx.core as mx | |
import matplotlib.pyplot as plt | |
from matplotlib.animation import FuncAnimation | |
import tqdm | |
def conway(a: mx.array): | |
source = """ |
import os | |
import mlx.core as mx | |
from mlx_lm import load, generate | |
filename = os.path.join(os.path.dirname(mx.__file__), "core/__init__.pyi") | |
with open(filename, 'r') as fid: | |
prompt = fid.read() | |
prompt += "\nHow do you write a self-attention layer using the above API in MLX?" | |
model, tokenizer = load("mlx-community/meta-Llama-3.1-8B-Instruct-4bit") |
git clone [email protected]:filipstrand/mflux.git
cd mflux && pip install -r requirements.txt
Name this anything, maybe flux.py
. Make sure to update the two paths marked below.
""" | |
A minimal, fast example generating text with Llama 3.1 in MLX. | |
To run, install the requirements: | |
pip install -U mlx transformers fire | |
Then generate text with: | |
python l3min.py "How tall is K2?" |
# Requires: | |
# pip install pyobjc-framework-Metal | |
import numpy as np | |
import Metal | |
# Get the default GPU device | |
device = Metal.MTLCreateSystemDefaultDevice() | |
# Make a command queue to encode command buffers to | |
command_queue = device.newCommandQueue() |
Recall, MLX is lazy. No actual computation happens until you explicitly or implicitly evaluate the graph. Even loading arrays from a file is lazy:
weights = mx.load("model.safetensors")