Skip to content

Instantly share code, notes, and snippets.

@mushkevych
Last active December 30, 2015 07:39
Show Gist options
  • Save mushkevych/7797563 to your computer and use it in GitHub Desktop.
Save mushkevych/7797563 to your computer and use it in GitHub Desktop.
Docker CDH 4.5
#!/bin/bash
sudo docker build -t bohdanm/cdh_4_5 .
FROM ubuntu:precise
MAINTAINER Bohdan Mushkevych
# Installing Oracle JDK
RUN apt-get -y install python-software-properties ;\
add-apt-repository ppa:webupd8team/java ;\
apt-get update && apt-get -y upgrade ;\
echo oracle-java7-installer shared/accepted-oracle-license-v1-1 select true | /usr/bin/debconf-set-selections ;\
apt-get -y install oracle-java7-installer && apt-get clean ;\
update-alternatives --display java ;\
export JAVA_HOME=/usr/lib/jvm/java-7-oracle ;\
export HADOOP_LIBEXEC_DIR=/usr/lib/hadoop/libexec
# Cloudera CDH4 APT key and DPKG repositories
RUN apt-get -y install curl ;\
curl -s http://archive.cloudera.com/cdh4/ubuntu/precise/amd64/cdh/archive.key | apt-key add - ;\
echo "deb [arch=amd64] http://archive.cloudera.com/cdh4/ubuntu/precise/amd64/cdh precise-cdh4 contrib\ndeb-src http://archive.cloudera.com/cdh4/ubuntu/precise/amd64/cdh precise-cdh4 contrib" > /etc/apt/sources.list.d/cloudera.list
# Removing anything extra and installing pseudo distributed YARN-powered Hadoop
RUN apt-get remove hadoop-0.20-conf-pseudo hadoop-0.20-mapreduce-* ;\
apt-get update ; apt-get install -y hadoop-conf-pseudo
# Installing zookeeper
RUN apt-get install zookeeper-server ;\
# Installing HBase
RUN apt-get install -y hbase ;\
apt-get install -y hbase-master ;\
apt-get install -y hbase-regionserver
# Installing Pig
RUN apt-get install -y pig
# Install command-line utils
RUN apt-get install ping ;\
apt-get install -y vim.tiny
# Copy configuration files
ADD ./etc/ /etc/
ADD ./root_scripts/ /root/
# Init environment
RUN cat /root/set_env >> /etc/profile
RUN unlink /etc/hadoop/conf
ADD ./hadoop/ /etc/hadoop/conf/
RUN unlink /etc/hbase/conf
ADD ./hbase/ /etc/hbase/conf/
RUN unlink /etc/zookeeper/conf
ADD ./zookeeper/ /etc/zookeeper/conf/
# Replace placeholders with the actual settings
RUN sed -i 's/$HOST_ADDRESS/hstation.vanlab.com/g' /etc/hadoop/conf/*
RUN sed -i 's/$HOST_ADDRESS/hstation.vanlab.com/g' /etc/hbase/conf/*
RUN sed -i 's/$HOST_ADDRESS/hstation.vanlab.com/g' /etc/zookeeper/conf/*
RUN sed -i 's/$FS_MOUNT_POINT/\/dfs/g' /etc/hadoop/conf/*
RUN sed -i 's/$FS_MOUNT_POINT/\/dfs/g' /etc/hbase/conf/*
RUN sed -i 's/$FS_MOUNT_POINT/\/dfs/g' /etc/zookeeper/conf/*
# make scripts runnable
RUN chmod +x /root/*.sh
# add user <zookeeper> to group <hadoop>
RUN usermod -a -G hadoop zookeeper
# Expose Hadoop+Eco ports
# HDFS
EXPOSE 8020 50070 50075 50090
# HBase
EXPOSE 60000 60010 60020 60030 8080
# Yarn
EXPOSE 8030 8031 8032 8033 8040 8041 8042 8088 10020 19888
CMD ["/usr/local/bin/circusd", "/etc/circusd.ini"]
├── etc
│   └── environment
├── hadoop
│   ├── core-site.xml
│   ├── hadoop-env.sh
│   ├── hadoop-metrics2.properties
│   ├── hadoop-metrics.properties
│   ├── hdfs-site.xml
│   ├── log4j.properties
│   ├── mapred-site.xml
│   ├── slaves
│   ├── ssl-client.xml.example
│   ├── ssl-server.xml.example
│   ├── yarn-env.sh
│   └── yarn-site.xml
├── hbase
│   ├── hadoop-metrics.properties
│   ├── hbase-env.sh
│   ├── hbase-policy.xml
│   ├── hbase-site.xml
│   ├── log4j.properties
│   └── regionservers
├── pig
│   ├── build.properties
│   ├── log4j.properties
│   └── pig.properties
├── root_scripts
│   ├── clear_hadoop_logs.sh
│   ├── hadoop_pseudo_start.sh
│   ├── hadoop_pseudo_stop.sh
│   ├── hdfs_format.sh
│   ├── hdfs_init.sh
│   └── set_env
├── zookeeper
│   ├── configuration.xsl
│   ├── log4j.properties
│   ├── zoo.cfg
│   └── zoo_sample.cfg
├── build.sh
├── Dockerfile
├── local_env.sh
└── run.sh
#!/bin/bash
sudo sh -c "wget -qO- https://get.docker.io/gpg | apt-key add -"
sudo sh -c "echo deb http://get.docker.io/ubuntu docker main\
> /etc/apt/sources.list.d/docker.list"
sudo apt-get update
sudo apt-get install lxc-docker
sudo mkdir -p --mode=777 /var/hstation/dfs
sudo mkdir -p --mode=777 /var/hstation/workspace
sudo mkdir -p --mode=777 /var/hstation/logs
#!/bin/bash
sudo docker run -v /var/hstation/dfs:/dfs -v /var/hstation/workspace:/workspace -v /var/hstation/logs:/hlogs -h hstation.vanlab.com -i -t bohdanm/cdh_4_5 /bin/bash -l
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment