Skip to content

Instantly share code, notes, and snippets.

@mplscorwin
Last active December 27, 2015 17:29
Show Gist options
  • Save mplscorwin/7362663 to your computer and use it in GitHub Desktop.
Save mplscorwin/7362663 to your computer and use it in GitHub Desktop.
CentOS init for Cassandra + Graphite + StatsD (forever via cron)
################################################################################
#
# CentOS - Graphite/statsD/Cassandra configuration script
#
################################################################################
# This should be performed as root since it's going to be installing a bunch of stuff
##########
# Environment specific - only these are expected to change from dev->prod
# cassandra cluster name
CLUSTER_NAME=sta-datamart
CLUSTER_SEEDS='10.55.64.91,10.55.64.92,10.55.64.93'
# END Environment specific
##########
# store away our primary network address, presumed to be the first nic
INET_ADDR=$( /sbin/ifconfig eth0 | \
grep 'inet addr:' | cut -d: -f2 | awk '{ print $1}' )
# --- Update things to make sure we have the latest patches ---
# Add EPEL so we can get reasonably recent packages
rpm -Uvh http://download.fedoraproject.org/pub/epel/6/i386/epel-release-6-8.noarch.rpm
#
# Update CentOS
#
# why do we need this?
rpm -ivh http://repo.webtatic.com/yum/centos/5/`uname -i`/webtatic-release-5-1.noarch.rpm
yum -y update
# --- Install all the packages we can with yum --- #
yum -y install openssl-devel pkgconfig gcc gcc-c++ kernel-devel make git
yum -y install python-whisper python-carbon graphite-web python-memcached python-ldap httpd memcached
yum -y install zlib-devel curl curl-devel openssl wget perl-ExtUtils-Embed tk gettext
###
### This is probably where the C* specific stuff goes...
###
# --- add DataStax yum repo ---
perl -e'print <<EOF
[datastax]
name= DataStax Repo for Apache Cassandra
baseurl=http://rpm.datastax.com/community
enabled=1
gpgcheck=0
EOF' >/etc/yum.repos.d/datastax.repo
# --- install C* via yum ---
yum -y install cassandra20
# run on reboot
chkconfig --add cassandra
# cassandra configuration
[[ -e /home/cassandra ]] || mkdir /home/cassandra
# link in config
ln -s /etc/cassandra/conf /home/cassandra/conf
# move the commit logs to seperate disk
mv /var/lib/cassandra/commitlog /clogs/
# and link it back
ln -s /clogs/commitlog /var/lib/cassandra/commitlog
ln -s /clogs/commitlog /home/cassandra/commitlog
# finally, same for the logs
mv /var/log/cassandra /clogs/processlog
ln -s /clogs/processlog /var/log/cassandra
ln -s /clogs/processlog /home/cassandra/processlog
# move data to seperate disk
chown -R cassandra.cassandra /cdata1
rmdir /var/lib/cassandra/data
ln -s /cdata1 /var/lib/cassandra/data
mkdir /home/cassandra/data
ln -s /cdata1 /home/cassandra/data/1
echo "Finished building cassandra home folder:"
ls -daltr /home/cassandra/* /home/cassandra/data/*
# ===== BEGIN MANUAL STEP =====
# run syncdb to setup the db and prime the authentication model (if you're using the DB model)
echo "*** Starting manual configuration of graphite admin db ***"
python /usr/lib/python2.6/site-packages/graphite/manage.py syncdb
# ===== END MANUAL STEP =====
echo "*** completed manual configuration step ***"
# --- Allow HTTP through firewall --- #
#iptables -I INPUT 5 -m state --state NEW -p tcp --dport 80 -j ACCEPT
#iptables-save > /etc/sysconfig/iptables
# actually, let's just turn it off for DEMO purposes
chkconfig iptables off
service iptables stop
# === Below needs to be re-validated for 6.4 === #
# --- Install git ---
#yum install -y gcc zlib-devel curl curl-devel openssl
GIT_VERSION=1.8.4.2
wget http://kernel.org/pub/software/scm/git/git-${GIT_VERSION}.tar.bz2 && \
tar xjf git-${GIT_VERSION}.tar.bz2 && \
pushd git-${GIT_VERSION} && \
./configure && make && make install && \
popd && \
rm -rf git*
# --- Install NodeJS ---
#yum install -y gcc-c++
git clone https://github.com/joyent/node.git &&\
pushd node &&\
./configure && make && make install &&\
popd &&\
rm -rf node
# --- Install the Node Package Manager ---
curl https://npmjs.org/install.sh | sh
# don't need SSL for fetching packages via npm
if [[ ! -e /usr/local/etc/npmrc ]]; then
echo 'registry = "http://registry.npmjs.org/"' > /usr/local/etc/npmrc
else
echo 'registry = "http://registry.npmjs.org/"' >> /usr/local/etc/npmrc
fi
# --- Installs via npm
npm install express && \
npm install forever -g && \
npm install forever-monitor -g
# --- Install StatsD ---
pushd /opt &&\
git clone https://github.com/etsy/statsd.git &&\
cd statsd &&\
cat exampleConfig.js | perl -pe"s/graphite.example.com/$INET_ADDR/" >local.js &&\
popd
# copy the the statsd config example to create the config file
# unless you used non-default ports for some other feature of the system, the defaults in the config file are fine
#cp exampleConfig.js local.js
#perl -pi.ORIG -e"s/graphite.example.com/$INET_ADDR/" local.js
# ---- CRON Configuration ----
# setup node to run after reboot
#START_NODE_CMD='/bin/env forever start --sourceDir /root/statsd node statsd.js local.js'
STATSD_PATH=/opt/statsd
STATSD_LOG_PATH=/var/log/statsd
[[ -e /var/log/statsd ]] || mkdir $STATSD_LOG_PATH
START_NODE_CMD="/bin/env forever start"
START_NODE_CMD="$START_NODE_CMD -al $STATSD_LOG_PATH/forever.log" # forever logfile
START_NODE_CMD="$START_NODE_CMD -ao $STATSD_LOG_PATH/statsd.log" # STDOUT
START_NODE_CMD="$START_NODE_CMD -ae $STATSD_LOG_PATH/statsd.log" # STDERR
START_NODE_CMD="$START_NODE_CMD $STATSD_PATH/stats.js $STATSD_PATH/local.js &" # job
echo "@reboot ${START_NODE_CMD}" | crontab -u root -
# fixes for graphite permissions
chown -R apache.apache /var/lib/graphite-web
echo 0 >/selinux/enforce
python /usr/lib/python2.6/site-packages/graphite/manage.py syncdb
# setup graphite and carbon-* to run on reboot
for service in cassandra carbon-cache carbon-aggregator httpd memcached; do
chkconfig --level 345 $service on;
done
# ---- mung configuration files ----
# cassandra.yaml
perl -pi~ \
-e";s/\Q- seeds: \"127.0.0.1\"\E/- seeds: \"$CLUSTER_SEEDS\"/" \
-e";s/\Qcluster_name: 'Test Cluster'\E/cluster_name: '$CLUSTER_NAME'/" \
-e";s/\Qlisten_address: localhost\E/listen_address: $INET_ADDR/" \
-e";s/\Qrpc_address: localhost\E/rpc_address: $INET_ADDR/" \
-e';s/\Qin_memory_compaction_limit_in_mb: 64\E/in_memory_compaction_limit_in_mb: 128/' \
/home/cassandra/conf/cassandra.yaml
# cassandra log4j
perl -pi~ \
-e';s/(\Qlog4j.rootLogger=INFO,stdout,R\E)/$1,SYSLOG_LOCAL1/' \
/etc/cassandra/conf/log4j-server.properties
perl -e'print <<EOF
#
# additional appender for distributed syslog
#
log4j.appender.SYSLOG_LOCAL1=org.apache.log4j.net.SyslogAppender
log4j.appender.SYSLOG_LOCAL1.threshold=INFO
log4j.appender.SYSLOG_LOCAL1.syslogHost=hol-syslog
log4j.appender.SYSLOG_LOCAL1.facility=LOCAL1
log4j.appender.SYSLOG_LOCAL1.facilityPrinting=false
log4j.appender.SYSLOG_LOCAL1.layout=log4j.PatternLayout
log4j.appender.SYSLOG_LOCAL1.layout.ConversionPattern=%5p ${logging.hostname} [%t] %c:%L - %m%n
EOF' >>/etc/cassandra/conf/log4j-server.properties
# apache (httpd)
perl -pi~ \
-e";s/\Q#ServerName www.example.com:80\E/ServerName $INET_ADDR/" \
/etc/httpd/conf/httpd.conf
echo "service configuration complete"
chkconfig
# ---- Start things up ----
service cassandra restart
# start graphite
service carbon-cache restart
service memcached restart
service httpd restart
# --- Start StatsD ---
$START_NODE_CMD
echo "All installs completed. Complete any manual configuration, verify then reboot and do it again! :)"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment