vitorcalvi · May 13, 2024 08:12
diff --git a/Dockerfile b/Dockerfile
 # Work in progress. V0.1
 # ALL THE THINGS.
 ARG     APT_PROXY         #=http://apt-cacher-ng.lan:3142/
 ARG     PIP_INDEX_URL     #=http://devpi.lan:3141/root/pypi/+simple
 ARG     PIP_TRUSTED_HOST  #=devpi.lan
 ARG     JUPYTER_PORT=37799
 ARG     LITELLM_PORT=11111

 FROM    nvidia/cuda:11.8.0-devel-ubuntu22.04 as build-llama

 ARG     APT_PROXY
 ENV     APT_PROXY=$APT_PROXY
 ARG     TARGETARCH=amd64
 ENV     GOARCH=$TARGETARCH
 ARG     GOFLAGS="'-ldflags=-w -s'"
 ENV     GOFLAGS=$GOFLAGS

 ADD     https://dl.google.com/go/go1.21.3.linux-$TARGETARCH.tar.gz /tmp/go1.21.3.tar.gz
 WORKDIR /go/src/github.com/jmorganca/ollama
 RUN     if [ -z "${APT_PROXY}" ]; then echo "Acquire::Http::Proxy {\"${APT_PROXY}\";}" >/etc/apt/apt.conf.d/02-proxy ; fi; \
        apt update && \
        apt upgrade -qy && \
        apt install -qy \
            git cmake build-essential \
            && \
        apt autoclean && \
        rm -rf /var/lib/apt/lists/* && \
        mkdir -p /usr/local && \
        tar xz -C /usr/local </tmp/go1.21.3.tar.gz && rm /tmp/go1.*.tar.gz && \
        mkdir -p /go/src/github.com/jmorganca && \
        cd /go/src/github.com/jmorganca && \
        git clone --recurse-submodules https://github.com/jmorganca/ollama.git

 RUN     /usr/local/go/bin/go generate ./... && \
        /usr/local/go/bin/go build .

 ################################################################################
 # Create a final stage for running your application.
 #
 # The following commands copy the output from the "build" stage above and tell
 # the container runtime to execute it when the image is run. Ideally this stage
 # contains the minimal runtime dependencies for the application as to produce
 # the smallest image possible. This often means using a different and smaller
 # image than the one used for building the application, but for illustrative
 # purposes the "base" image is used here.
 FROM    ubuntu:22.04 as packages
 ARG     APT_PROXY
 RUN     if [ -z "${APT_PROXY}" ]; then echo "Acquire::Http::Proxy {\"${APT_PROXY}\";}" >/etc/apt/apt.conf.d/02-proxy ; fi; \
        apt update && \
        apt upgrade -qy && \
        apt install -qy \
            vim git curl wget python3 python-is-python3 python3-venv python3-pip  \
            && \
        apt autoclean && \
        rm -rf /var/lib/apt/lists/* && \
        rm -f /etc/apt/apt.conf.d/02-proxy

 WORKDIR /workspace
 ARG     PIP_INDEX_URL
 ENV     PIP_INDEX_URL=$PIP_INDEX_URL
 ARG     PIP_TRUSTED_HOST
 ENV     PIP_TRUSTED_HOST=$PIP_TRUSTED_HOST
 RUN     python -m venv venv && \
        . venv/bin/activate && \
        pip install --no-cache pyautogen pymemgpt jupyter numpy pandas pyyaml && \
        deactivate

 FROM    packages AS litellm_prep
 ARG     PIP_INDEX_URL
 ENV     PIP_INDEX_URL=$PIP_INDEX_URL
 ARG     PIP_TRUSTED_HOST
 ENV     PIP_TRUSTED_HOST=$PIP_TRUSTED_HOST
 RUN     git clone https://github.com/BerriAI/litellm.git /app && rm -rf /app/dist
 WORKDIR /app
 RUN     if [ -z "${PIP_INDEX_URL}" ]; then pip config set global.index-url "${PIP_INDEX_URL}"; fi; \
        if [ -z "${PIP_TRUSTED_HOST}" ]; then pip config set global.trusted-host "${PIP_TRUSTED_HOST}"; fi; \
        python -m venv venv && \
        . venv/bin/activate && \
        pip install --no-cache -r requirements.txt pyyaml && \
        deactivate

 FROM    litellm_prep as final
 ARG     JUPYTER_PORT
 ENV     JUPYTER_PORT=$JUPYTER_PORT
 ARG     LITELLM_PORT
 ENV     LITELLM_PORT=$LITELLM_PORT

 WORKDIR /workspace

 COPY    --from=build-llama /go/src/github.com/jmorganca/ollama/ollama /usr/bin/ollama
 COPY    start.sh .
 COPY    ollama.yaml .
 RUN     chmod +x start.sh

 EXPOSE  $JUPYTER_PORT
 EXPOSE  $LITELLM_PORT
diff --git a/gistfile2.txt b/gistfile2.txt
 Suggested command line (for this early incomplete version):

 docker build --tag ollama-litellm-memgpt 
 docker run -d --gpus=all -v ollama:/root/.ollama --name litellm -p 37799:37799 -p 11111:11111 ollama-litellm-memgpt ./start.sh

 Port 11111 is the litellm api, 37799 is the Jupyter notebook API, password MyJupyter

 NOTE: I have the model in start.sh set to vicuna:7b-16k, but litellm doesn't know about this model yet (I'm experimenting with my own branch of litellm)
diff --git a/ollama.yaml b/ollama.yaml
 model_list:
  - model_name: llama2
    litellm_params:
      model: ollama/llama2:7b
      api_base: http://localhost:11434
  - model_name: vicuna
    litellm_params:
      model: ollama/vicuna:7b-16k
      api_base: http://localhost:11434
  - model_name: mistral
    litellm_params:
      model: ollama/mistral:7b
      api_base: http://localhost:11434
diff --git a/start.sh b/start.sh
 #! /bin/bash

 # Start ollama
 cd /workspace
 ollama serve &
 sleep 2
 ollama pull vicuna:7b-16k

 # Start litellm, which wants its own venv so the openai module
 # doesn't conflict with MemGPTs.
 cd /app
 . ./venv/bin/activate
 hash -r
 pip install -r requirements.txt fastapi tomli tomli_w backoff pyyaml
 litellm --config /workspace/ollama.yaml --port ${LITELLM_PORT} &
 deactivate

 # Start jupyter notebook
 cd /workspace
 . /workspace/venv/bin/activate
 #jupyter notebook --ip 0.0.0.0 --port ${JUPYTER_PORT} --allow-root --NotebookApp.token='' --NotebookApp.password='MyLlms' &

 #wait
	# Work in progress. V0.1
	# ALL THE THINGS.
	ARG APT_PROXY #=http://apt-cacher-ng.lan:3142/
	ARG PIP_INDEX_URL #=http://devpi.lan:3141/root/pypi/+simple
	ARG PIP_TRUSTED_HOST #=devpi.lan
	ARG JUPYTER_PORT=37799
	ARG LITELLM_PORT=11111

	FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 as build-llama

	ARG APT_PROXY
	ENV APT_PROXY=$APT_PROXY
	ARG TARGETARCH=amd64
	ENV GOARCH=$TARGETARCH
	ARG GOFLAGS="'-ldflags=-w -s'"
	ENV GOFLAGS=$GOFLAGS

	ADD https://dl.google.com/go/go1.21.3.linux-$TARGETARCH.tar.gz /tmp/go1.21.3.tar.gz
	WORKDIR /go/src/github.com/jmorganca/ollama
	RUN if [ -z "${APT_PROXY}" ]; then echo "Acquire::Http::Proxy {\"${APT_PROXY}\";}" >/etc/apt/apt.conf.d/02-proxy ; fi; \
	apt update && \
	apt upgrade -qy && \
	apt install -qy \
	git cmake build-essential \
	&& \
	apt autoclean && \
	rm -rf /var/lib/apt/lists/* && \
	mkdir -p /usr/local && \
	tar xz -C /usr/local </tmp/go1.21.3.tar.gz && rm /tmp/go1.*.tar.gz && \
	mkdir -p /go/src/github.com/jmorganca && \
	cd /go/src/github.com/jmorganca && \
	git clone --recurse-submodules https://github.com/jmorganca/ollama.git

	RUN /usr/local/go/bin/go generate ./... && \
	/usr/local/go/bin/go build .

	################################################################################
	# Create a final stage for running your application.
	#
	# The following commands copy the output from the "build" stage above and tell
	# the container runtime to execute it when the image is run. Ideally this stage
	# contains the minimal runtime dependencies for the application as to produce
	# the smallest image possible. This often means using a different and smaller
	# image than the one used for building the application, but for illustrative
	# purposes the "base" image is used here.
	FROM ubuntu:22.04 as packages
	ARG APT_PROXY
	RUN if [ -z "${APT_PROXY}" ]; then echo "Acquire::Http::Proxy {\"${APT_PROXY}\";}" >/etc/apt/apt.conf.d/02-proxy ; fi; \
	apt update && \
	apt upgrade -qy && \
	apt install -qy \
	vim git curl wget python3 python-is-python3 python3-venv python3-pip \
	&& \
	apt autoclean && \
	rm -rf /var/lib/apt/lists/* && \
	rm -f /etc/apt/apt.conf.d/02-proxy

	WORKDIR /workspace
	ARG PIP_INDEX_URL
	ENV PIP_INDEX_URL=$PIP_INDEX_URL
	ARG PIP_TRUSTED_HOST
	ENV PIP_TRUSTED_HOST=$PIP_TRUSTED_HOST
	RUN python -m venv venv && \
	. venv/bin/activate && \
	pip install --no-cache pyautogen pymemgpt jupyter numpy pandas pyyaml && \
	deactivate

	FROM packages AS litellm_prep
	ARG PIP_INDEX_URL
	ENV PIP_INDEX_URL=$PIP_INDEX_URL
	ARG PIP_TRUSTED_HOST
	ENV PIP_TRUSTED_HOST=$PIP_TRUSTED_HOST
	RUN git clone https://github.com/BerriAI/litellm.git /app && rm -rf /app/dist
	WORKDIR /app
	RUN if [ -z "${PIP_INDEX_URL}" ]; then pip config set global.index-url "${PIP_INDEX_URL}"; fi; \
	if [ -z "${PIP_TRUSTED_HOST}" ]; then pip config set global.trusted-host "${PIP_TRUSTED_HOST}"; fi; \
	python -m venv venv && \
	. venv/bin/activate && \
	pip install --no-cache -r requirements.txt pyyaml && \
	deactivate

	FROM litellm_prep as final
	ARG JUPYTER_PORT
	ENV JUPYTER_PORT=$JUPYTER_PORT
	ARG LITELLM_PORT
	ENV LITELLM_PORT=$LITELLM_PORT

	WORKDIR /workspace

	COPY --from=build-llama /go/src/github.com/jmorganca/ollama/ollama /usr/bin/ollama
	COPY start.sh .
	COPY ollama.yaml .
	RUN chmod +x start.sh

	EXPOSE $JUPYTER_PORT
	EXPOSE $LITELLM_PORT
	Suggested command line (for this early incomplete version):

	docker build --tag ollama-litellm-memgpt
	docker run -d --gpus=all -v ollama:/root/.ollama --name litellm -p 37799:37799 -p 11111:11111 ollama-litellm-memgpt ./start.sh

	Port 11111 is the litellm api, 37799 is the Jupyter notebook API, password MyJupyter

	NOTE: I have the model in start.sh set to vicuna:7b-16k, but litellm doesn't know about this model yet (I'm experimenting with my own branch of litellm)
	model_list:
	- model_name: llama2
	litellm_params:
	model: ollama/llama2:7b
	api_base: http://localhost:11434
	- model_name: vicuna
	litellm_params:
	model: ollama/vicuna:7b-16k
	api_base: http://localhost:11434
	- model_name: mistral
	litellm_params:
	model: ollama/mistral:7b
	api_base: http://localhost:11434
	#! /bin/bash

	# Start ollama
	cd /workspace
	ollama serve &
	sleep 2
	ollama pull vicuna:7b-16k

	# Start litellm, which wants its own venv so the openai module
	# doesn't conflict with MemGPTs.
	cd /app
	. ./venv/bin/activate
	hash -r
	pip install -r requirements.txt fastapi tomli tomli_w backoff pyyaml
	litellm --config /workspace/ollama.yaml --port ${LITELLM_PORT} &
	deactivate

	# Start jupyter notebook
	cd /workspace
	. /workspace/venv/bin/activate
	#jupyter notebook --ip 0.0.0.0 --port ${JUPYTER_PORT} --allow-root --NotebookApp.token='' --NotebookApp.password='MyLlms' &

	#wait