Last active
January 6, 2020 11:49
-
-
Save nashmaniac/c55bdcee133ed4f557e6684367331b1f to your computer and use it in GitHub Desktop.
Dockerfile for building a datascience image
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
FROM alpine:latest | |
LABEL MAINTAINER="Raju Ahmed Shetu<[email protected]>" | |
# Linking of locale.h as xlocale.h | |
# This is done to ensure successfull install of python numpy package | |
# see https://forum.alpinelinux.org/comment/690#comment-690 for more information. | |
WORKDIR /var/www/ | |
# SOFTWARE PACKAGES | |
# * musl: standard C library | |
# * lib6-compat: compatibility libraries for glibc | |
# * linux-headers: commonly needed, and an unusual package name from Alpine. | |
# * build-base: used so we include the basic development packages (gcc) | |
# * bash: so we can access /bin/bash | |
# * git: to ease up clones of repos | |
# * ca-certificates: for SSL verification during Pip and easy_install | |
# * freetype: library used to render text onto bitmaps, and provides support font-related operations | |
# * libgfortran: contains a Fortran shared library, needed to run Fortran | |
# * libgcc: contains shared code that would be inefficient to duplicate every time as well as auxiliary helper routines and runtime support | |
# * libstdc++: The GNU Standard C++ Library. This package contains an additional runtime library for C++ programs built with the GNU compiler | |
# * openblas: open source implementation of the BLAS(Basic Linear Algebra Subprograms) API with many hand-crafted optimizations for specific processor types | |
# * tcl: scripting language | |
# * tk: GUI toolkit for the Tcl scripting language | |
# * libssl1.0: SSL shared libraries | |
ENV PACKAGES="\ | |
dumb-init \ | |
musl \ | |
libc6-compat \ | |
linux-headers \ | |
build-base \ | |
bash \ | |
git \ | |
ca-certificates \ | |
freetype \ | |
libgfortran \ | |
libgcc \ | |
libstdc++ \ | |
openblas \ | |
tcl \ | |
tk \ | |
libssl1.0 postgresql-dev gcc python3-dev musl-dev libffi \ | |
" | |
# PYTHON DATA SCIENCE PACKAGES | |
# * numpy: support for large, multi-dimensional arrays and matrices | |
# * matplotlib: plotting library for Python and its numerical mathematics extension NumPy. | |
# * scipy: library used for scientific computing and technical computing | |
# * scikit-learn: machine learning library integrates with NumPy and SciPy | |
# * pandas: library providing high-performance, easy-to-use data structures and data analysis tools | |
# * nltk: suite of libraries and programs for symbolic and statistical natural language processing for English | |
ENV PYTHON_PACKAGES="\ | |
numpy \ | |
matplotlib \ | |
cython \ | |
scipy \ | |
scikit-learn \ | |
pandas \ | |
nltk \ | |
" | |
RUN apk add --no-cache --virtual build-dependencies python3 \ | |
&& apk add --virtual build-runtime \ | |
&& echo 'http://dl-cdn.alpinelinux.org/alpine/v3.8/main' >> /etc/apk/repositories \ | |
&& apk add libssl1.0 \ | |
build-base python3-dev openblas-dev freetype-dev pkgconfig gfortran \ | |
&& ln -s /usr/include/locale.h /usr/include/xlocale.h \ | |
&& python3 -m ensurepip \ | |
&& rm -r /usr/lib/python*/ensurepip \ | |
&& pip3 install --upgrade pip setuptools \ | |
&& ln -sf /usr/bin/python3 /usr/bin/python \ | |
&& ln -sf pip3 /usr/bin/pip \ | |
&& rm -r /root/.cache \ | |
&& pip install --no-cache-dir $PYTHON_PACKAGES \ | |
&& apk del build-runtime \ | |
&& apk add --no-cache --virtual build-dependencies $PACKAGES \ | |
&& rm -rf /var/cache/apk/* |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment