Last active
June 28, 2022 02:26
-
-
Save alonisser/b5e7b599151c5f27f92054a97d1d3f1f to your computer and use it in GitHub Desktop.
Dockerfile for spacy prodigy for cloud setup using remote postgresql including changing instructions file and overriding index.html - and the leanest I've got
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
FROM python:3.6-alpine | |
# Opted for alpine to get a lean docker image as possible | |
RUN apk add --no-cache openssl | |
ENV DOCKERIZE_VERSION v0.6.1 | |
RUN wget https://github.com/jwilder/dockerize/releases/download/$DOCKERIZE_VERSION/dockerize-alpine-linux-amd64-$DOCKERIZE_VERSION.tar.gz \ | |
&& tar -C /usr/local/bin -xzvf dockerize-alpine-linux-amd64-$DOCKERIZE_VERSION.tar.gz \ | |
&& rm dockerize-alpine-linux-amd64-$DOCKERIZE_VERSION.tar.gz | |
# Python deps for alpine | |
RUN apk add --no-cache postgresql-libs && \ | |
apk add --no-cache --virtual .build-deps gcc musl-dev postgresql-dev g++ | |
RUN mkdir -pv /prodigy /prodigy/src | |
WORKDIR /prodigy | |
# the prodigy wheel file is something you get when you buy prodigy, it's not a free package | |
COPY ./*.whl /prodigy | |
COPY requirements.txt /prodigy | |
RUN pip install -r requirements.txt --no-cache-dir \ | |
&& find /usr/local \ | |
\( -type d -a -name test -o -name tests \) \ | |
-o \( -type f -a -name '*.pyc' -o -name '*.pyo' \) \ | |
-exec rm -rf '{}' + \ | |
&& runDeps="$( \ | |
scanelf --needed --nobanner --recursive /usr/local \ | |
| awk '{ gsub(/,/, "\nso:", $2); print "so:" $2 }' \ | |
| sort -u \ | |
| xargs -r apk info --installed \ | |
| sort -u \ | |
)" \ | |
&& apk add --virtual .rundeps $runDeps \ | |
&& apk del .build-deps | |
ENV PRODIGY_HOME /prodigy | |
# the template that dockerize would use to create the actual prodigy.json config based on environment variables | |
COPY ./prodigy.json.tpl /prodigy/prodigy.json.tpl | |
#Preparing an instructions file | |
COPY ./instructions.txt /prodigy/instructions.txt | |
COPY *.sh /prodigy/ | |
COPY src/* /prodigy/src/ | |
# Comment the two next lines if you don't override the index.html file | |
COPY static/index.html index.html | |
RUN PRODIGY_FILES=`python -c "import prodigy; print(prodigy.__file__.replace(\"__init__.py\",''))"`; cp index.html $PRODIGY_FILES/static/ | |
# The actual entry point compiling the template | |
CMD ["dockerize", "-template", "/prodigy/prodigy.json.tpl:/prodigy/prodigy.json", "./launch.sh"] | |
EXPOSE 8080 | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"batch_size": 5, | |
"host":"0.0.0.0", | |
"instructions":"/prodigy/instructions.txt", | |
"hide_meta": true, | |
"choice_auto_accept": true, | |
"db": "postgresql", | |
"db_settings": { | |
"postgresql": { | |
"host":"{{ .Env.DB_HOST }}", | |
"dbname": "{{ .Env.DATABASE_NAME }}", | |
"port": 5432, | |
"user":"{{ .Env.DB_USERNAME }}", | |
"password":"{{ .Env.DB_PASSWORD }}" | |
} | |
} | |
} |
@lukleu The launch.sh
file was just the commands to start Prodigy itself on the container.
#!/usr/bin/env bash
prodigy dataset $(your dataset) $(string name of dataset)
prodigy textcat.........
For my needs, I needed a Text Categorization with a special recipe (to select multiple choices), but YMMV.
@walterg2 thank you Sir! i was suspecting as much but was really unsure
What is in the requirements.txt file?
@teebu then it was
psycopg2
prodigy-1.6.1-cp35.cp36.cp37-cp35m.cp36m.cp37m-linux_x86_64.whl
Probably ancient versions now
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
@walterg2 what did you add to your launch.sh? new to the subject and kinda clueless :)