martin-g · October 28, 2024 09:39 · martin-g · Oct 28, 2024
diff --git a/Dockerfile.kleidiai-oe b/Dockerfile.kleidiai-oe
 FROM openeuler/openeuler:22.03-lts-sp4

 RUN dnf update -y && dnf install -y gperftools-libs git wget

 RUN adduser kleidiai

 USER kleidiai

 WORKDIR /home/kleidiai

 RUN wget --quiet https://github.com/conda-forge/miniforge/releases/download/24.7.1-2/Miniforge3-Linux-aarch64.sh

 RUN bash Miniforge3-Linux-aarch64.sh -b

 RUN echo "export PATH=/home/kleidiai/miniforge3/bin:$PATH" >> /home/kleidiai/.bashrc

 ENV PATH="/home/kleidiai/miniforge3/bin:$PATH"

 RUN conda init bash && \
    source /home/kleidiai/.bashrc && \
    conda install -y gcc gxx python=3.10

 RUN python3 --version && \
    which python3

 RUN git clone --recursive https://github.com/pytorch/ao.git
 WORKDIR ao
 RUN git checkout 174e630af2be8cd18bc47c5e530765a82e97f45b
 RUN wget --quiet https://raw.githubusercontent.com/ArmDeveloperEcosystem/PyTorch-arm-patches/main/0001-Feat-Add-support-for-kleidiai-quantization-schemes.patch
 RUN git apply --whitespace=nowarn 0001-Feat-Add-support-for-kleidiai-quantization-schemes.patch
 WORKDIR ../
 RUN git clone --recursive https://github.com/pytorch/torchchat.git
 WORKDIR torchchat
 RUN git checkout 925b7bd73f110dd1fb378ef80d17f0c6a47031a6
 RUN wget --quiet https://raw.githubusercontent.com/ArmDeveloperEcosystem/PyTorch-arm-patches/main/0001-modified-generate.py-for-cli-and-browser.patch
 RUN wget --quiet https://raw.githubusercontent.com/ArmDeveloperEcosystem/PyTorch-arm-patches/main/0001-Feat-Enable-int4-quantized-models-to-work-with-pytor.patch
 RUN git apply 0001-Feat-Enable-int4-quantized-models-to-work-with-pytor.patch
 RUN git apply --whitespace=nowarn 0001-modified-generate.py-for-cli-and-browser.patch
 RUN pip install -r requirements.txt

 RUN wget --quiet https://github.com/ArmDeveloperEcosystem/PyTorch-arm-patches/raw/main/torch-2.5.0.dev20240828+cpu-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl
 RUN pip install --force-reinstall torch-2.5.0.dev20240828+cpu-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl
 WORKDIR ..
 RUN pip uninstall torchao
 WORKDIR ao/
 RUN rm -rf build
 RUN python setup.py install

 ARG HF_TOKEN

 RUN huggingface-cli login --token $HF_TOKEN

 WORKDIR ../torchchat
 RUN python torchchat.py export llama3.1 --output-dso-path exportedModels/llama3.1.so --quantize config/data/aarch64_cpu_channelwise.json --device cpu --max-seq-length 1024

 CMD ["LD_PRELOAD=/usr/lib64/libtcmalloc.so.4 TORCHINDUCTOR_CPP_WRAPPER=1 TORCHINDUCTOR_FREEZING=1 OMP_NUM_THREADS=16 python torchchat.py generate llama3.1 --dso-path exportedModels/llama3.1.so --device cpu --max-new-tokens 32 --chat"]
	FROM openeuler/openeuler:22.03-lts-sp4

	RUN dnf update -y && dnf install -y gperftools-libs git wget

	RUN adduser kleidiai

	USER kleidiai

	WORKDIR /home/kleidiai

	RUN wget --quiet https://github.com/conda-forge/miniforge/releases/download/24.7.1-2/Miniforge3-Linux-aarch64.sh

	RUN bash Miniforge3-Linux-aarch64.sh -b

	RUN echo "export PATH=/home/kleidiai/miniforge3/bin:$PATH" >> /home/kleidiai/.bashrc

	ENV PATH="/home/kleidiai/miniforge3/bin:$PATH"

	RUN conda init bash && \
	source /home/kleidiai/.bashrc && \
	conda install -y gcc gxx python=3.10

	RUN python3 --version && \
	which python3

	RUN git clone --recursive https://github.com/pytorch/ao.git
	WORKDIR ao
	RUN git checkout 174e630af2be8cd18bc47c5e530765a82e97f45b
	RUN wget --quiet https://raw.githubusercontent.com/ArmDeveloperEcosystem/PyTorch-arm-patches/main/0001-Feat-Add-support-for-kleidiai-quantization-schemes.patch
	RUN git apply --whitespace=nowarn 0001-Feat-Add-support-for-kleidiai-quantization-schemes.patch
	WORKDIR ../
	RUN git clone --recursive https://github.com/pytorch/torchchat.git
	WORKDIR torchchat
	RUN git checkout 925b7bd73f110dd1fb378ef80d17f0c6a47031a6
	RUN wget --quiet https://raw.githubusercontent.com/ArmDeveloperEcosystem/PyTorch-arm-patches/main/0001-modified-generate.py-for-cli-and-browser.patch
	RUN wget --quiet https://raw.githubusercontent.com/ArmDeveloperEcosystem/PyTorch-arm-patches/main/0001-Feat-Enable-int4-quantized-models-to-work-with-pytor.patch
	RUN git apply 0001-Feat-Enable-int4-quantized-models-to-work-with-pytor.patch
	RUN git apply --whitespace=nowarn 0001-modified-generate.py-for-cli-and-browser.patch
	RUN pip install -r requirements.txt

	RUN wget --quiet https://github.com/ArmDeveloperEcosystem/PyTorch-arm-patches/raw/main/torch-2.5.0.dev20240828+cpu-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl
	RUN pip install --force-reinstall torch-2.5.0.dev20240828+cpu-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl
	WORKDIR ..
	RUN pip uninstall torchao
	WORKDIR ao/
	RUN rm -rf build
	RUN python setup.py install

	ARG HF_TOKEN

	RUN huggingface-cli login --token $HF_TOKEN

	WORKDIR ../torchchat
	RUN python torchchat.py export llama3.1 --output-dso-path exportedModels/llama3.1.so --quantize config/data/aarch64_cpu_channelwise.json --device cpu --max-seq-length 1024

	CMD ["LD_PRELOAD=/usr/lib64/libtcmalloc.so.4 TORCHINDUCTOR_CPP_WRAPPER=1 TORCHINDUCTOR_FREEZING=1 OMP_NUM_THREADS=16 python torchchat.py generate llama3.1 --dso-path exportedModels/llama3.1.so --device cpu --max-new-tokens 32 --chat"]