Skip to content

Instantly share code, notes, and snippets.

@ponkin
Last active October 28, 2016 20:10
Show Gist options
  • Save ponkin/cac0a071e7fe75ca7c390b7388cf4f91 to your computer and use it in GitHub Desktop.
Save ponkin/cac0a071e7fe75ca7c390b7388cf4f91 to your computer and use it in GitHub Desktop.
# Creates pseudo distributed hadoop 2.7.1
#
# docker build -t sequenceiq/hadoop .
FROM centos:6.6
MAINTAINER Alexey Ponkin
USER root
ARG CFG_URL
# install dev tools
RUN yum clean all; \
rpm --rebuilddb; \
yum install -y curl which tar sudo zlib-devel gzip-devel
# update libselinux. see https://github.com/sequenceiq/hadoop-docker/issues/14
RUN yum update -y libselinux
# java
RUN curl --insecure -LO 'http://download.oracle.com/otn-pub/java/jdk/7u79-b14/jdk-7u79-linux-x64.rpm' -H 'Cookie: oraclelicense=accept-securebackup-cookie'
RUN rpm -i jdk-7u79-linux-x64.rpm
RUN rm jdk-7u79-linux-x64.rpm
ENV JAVA_HOME /usr/java/default
ENV PATH $PATH:$JAVA_HOME/bin
RUN rm /usr/bin/java && ln -s $JAVA_HOME/bin/java /usr/bin/java
# download native support
# RUN mkdir -p /tmp/native
# RUN curl --insecure -vL https://github.com/sequenceiq/docker-hadoop-build/releases/download/v2.7.1/hadoop-native-64-2.7.1.tgz | tar -xz -C /tmp/native
# COPY lib/hadoop-native-64-2.7.1.tgz /tmp
# RUN cat /tmp/hadoop-native-64-2.7.1.tgz | tar -xz -C /tmp/native
# Local yarn directories
RUN mkdir /datalocal
# hadoop
RUN curl --insecure -s http://www.eu.apache.org/dist/hadoop/common/hadoop-2.7.1/hadoop-2.7.1.tar.gz | tar -xz -C /usr/local/
RUN cd /usr/local && ln -s ./hadoop-2.7.1 hadoop
ENV HADOOP_PREFIX /usr/local/hadoop
ENV HADOOP_COMMON_HOME /usr/local/hadoop
ENV HADOOP_HDFS_HOME /usr/local/hadoop
ENV HADOOP_MAPRED_HOME /usr/local/hadoop
ENV HADOOP_YARN_HOME /usr/local/hadoop
ENV HADOOP_CONF_DIR /usr/local/hadoop/etc/hadoop
ENV YARN_CONF_DIR $HADOOP_PREFIX/etc/hadoop
ADD docker/config/core-site.xml $HADOOP_CONF_DIR/core-site.xml
ADD docker/config/hdfs-site.xml $HADOOP_CONF_DIR/hdfs-site.xml
ADD docker/config/yarn-site.xml $HADOOP_CONF_DIR/yarn-site.xml
RUN sed -i '/^export JAVA_HOME/ s:.*:export JAVA_HOME=/usr/java/default\nexport HADOOP_PREFIX=/usr/local/hadoop\nexport HADOOP_HOME=/usr/local/hadoop\n:' $HADOOP_PREFIX/etc/hadoop/hadoop-env.sh
RUN sed -i '/^export HADOOP_CONF_DIR/ s:.*:export HADOOP_CONF_DIR=/usr/local/hadoop/etc/hadoop/:' $HADOOP_PREFIX/etc/hadoop/hadoop-env.sh
ADD docker/scripts /docker/scripts
RUN chmod +x /docker/scripts
ENV PATH $PATH:$HADOOP_PREFIX/bin
#Install Apache Spark
#COPY docker/lib/spark-1.6.2-bin-without-hadoop.tgz /tmp
RUN curl --insecure -s http://d3kbcqa49mib13.cloudfront.net/spark-1.6.2-bin-without-hadoop.tgz | tar -zx -C /usr/local/
RUN cd /usr/local && ln -s ./spark-1.6.2-bin-without-hadoop spark
ENV SPARK_HOME /usr/local/spark
ENV SPARK_LOG_DIR /var/log/spark
ENV SPARK_PID_DIR /var/run/spark
ENV PATH $PATH:$SPARK_HOME/bin
ADD docker/scripts/spark-env.sh /usr/local/spark/conf
#ENTRYPOINT /docker/scripts/entrypoint.sh
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment