Install MLX LM:
pip install mlx-lm
And run:
On every machine in the cluster install openmpi
and mlx-lm
:
conda install conda-forge::openmpi
pip install -U mlx-lm
Next download the pipeline parallel run script. Download it to the same path on every machine:
cmake_minimum_required(VERSION 3.27) | |
project(example LANGUAGES CXX) | |
set(CMAKE_CXX_STANDARD 17) | |
set(CMAKE_CXX_STANDARD_REQUIRED ON) | |
find_package( | |
Python 3.9 | |
COMPONENTS Interpreter Development.Module |
import argparse | |
from functools import partial | |
import multiprocessing as mp | |
from typing import Callable, Optional | |
import mlx.core as mx | |
import mlx.nn as nn | |
from mlx.utils import tree_map_with_path | |
from mlx_lm.utils import * |
This guide is adapted from this original post by Christopher Charles.
from huggingface_hub import snapshot_download | |
import mlx.core as mx | |
import mlx.nn as nn | |
import time | |
class Block(nn.Module): | |
def __init__(self, in_dims, dims, stride=1): | |
super().__init__() |
import numpy as np | |
import mlx.core as mx | |
import matplotlib.pyplot as plt | |
from matplotlib.animation import FuncAnimation | |
import tqdm | |
def conway(a: mx.array): | |
source = """ |
import os | |
import mlx.core as mx | |
from mlx_lm import load, generate | |
filename = os.path.join(os.path.dirname(mx.__file__), "core/__init__.pyi") | |
with open(filename, 'r') as fid: | |
prompt = fid.read() | |
prompt += "\nHow do you write a self-attention layer using the above API in MLX?" | |
model, tokenizer = load("mlx-community/meta-Llama-3.1-8B-Instruct-4bit") |
git clone [email protected]:filipstrand/mflux.git
cd mflux && pip install -r requirements.txt
Name this anything, maybe flux.py
. Make sure to update the two paths marked below.