Awni Hannun awni

Install packages:

pip install open-webui mlx-lm

Start Open WebUI server:

Making MLX Go Fast

This guide assumes you have some familiarity with MLX and want to make your MLX model or algorithm as efficient as possible.

The guide covers the following:

Install MLX LM:

pip install mlx-lm

And run:

On every machine in the cluster install openmpi and mlx-lm:

conda install conda-forge::openmpi
pip install -U mlx-lm

Next download the pipeline parallel run script. Download it to the same path on every machine:

This guide is adapted from this original post by Christopher Charles.

	class GLU: Module, UnaryLayer {
	let dim: Int

	init(dim: Int) {
	self.dim = dim
	}

	func callAsFunction(_ x: MLXArray) -> MLXArray {
	let (a, b) = x.split(axis: dim)
	return a * MLXNN.sigmoid(b)

	cmake_minimum_required(VERSION 3.27)

	project(example LANGUAGES CXX)

	set(CMAKE_CXX_STANDARD 17)
	set(CMAKE_CXX_STANDARD_REQUIRED ON)

	find_package(
	Python 3.9
	COMPONENTS Interpreter Development.Module

	import argparse
	from functools import partial
	import multiprocessing as mp
	from typing import Callable, Optional


	import mlx.core as mx
	import mlx.nn as nn
	from mlx.utils import tree_map_with_path
	from mlx_lm.utils import *

	from huggingface_hub import snapshot_download
	import mlx.core as mx
	import mlx.nn as nn
	import time


	class Block(nn.Module):
	def __init__(self, in_dims, dims, stride=1):
	super().__init__()