Install packages:
pip install open-webui mlx-lm
Start Open WebUI server:
| class GLU: Module, UnaryLayer { | |
| let dim: Int | |
| init(dim: Int) { | |
| self.dim = dim | |
| } | |
| func callAsFunction(_ x: MLXArray) -> MLXArray { | |
| let (a, b) = x.split(axis: dim) | |
| return a * MLXNN.sigmoid(b) |
This guide assumes you have some familiarity with MLX and want to make your MLX model or algorithm as efficient as possible.
The guide covers the following:
On every machine in the cluster install openmpi and mlx-lm:
conda install conda-forge::openmpi
pip install -U mlx-lmNext download the pipeline parallel run script. Download it to the same path on every machine:
| cmake_minimum_required(VERSION 3.27) | |
| project(example LANGUAGES CXX) | |
| set(CMAKE_CXX_STANDARD 17) | |
| set(CMAKE_CXX_STANDARD_REQUIRED ON) | |
| find_package( | |
| Python 3.9 | |
| COMPONENTS Interpreter Development.Module |
| import argparse | |
| from functools import partial | |
| import multiprocessing as mp | |
| from typing import Callable, Optional | |
| import mlx.core as mx | |
| import mlx.nn as nn | |
| from mlx.utils import tree_map_with_path | |
| from mlx_lm.utils import * |
This guide is adapted from this original post by Christopher Charles.
| from huggingface_hub import snapshot_download | |
| import mlx.core as mx | |
| import mlx.nn as nn | |
| import time | |
| class Block(nn.Module): | |
| def __init__(self, in_dims, dims, stride=1): | |
| super().__init__() |