Created
January 21, 2014 19:25
-
-
Save ndimiduk/8546533 to your computer and use it in GitHub Desktop.
Automation for managing multiple RegionServer processes running on a single host
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
# | |
# Use fabric to manage multi-RS HDP2 deploys. Assumes gsInstaller. | |
# | |
from __future__ import with_statement | |
from contextlib import closing | |
from fabric.api import * | |
from fabric.contrib.files import exists, sed | |
from StringIO import StringIO | |
from tempfile import mkstemp | |
import re | |
env.hosts = env.hosts or ['hor18n35','hor18n36','hor18n39'] | |
configs_base = '/grid/1/ndimiduk' | |
logs_base = '/grid/0/var/log' | |
run_base = '/grid/0/var/run' | |
# | |
# for installing processes | |
# | |
def ensure_base(): | |
if not exists('%s/conf-0' % configs_base): | |
run('ln -s /etc/hbase/conf %s/conf-0' % configs_base) | |
def existing_heap(config_path): | |
line = run("grep '^export HBASE_REGIONSERVER_OPTS' %s/hbase-env.sh" % config_path) | |
m = re.match(".* -Xmx(.*)m .*", line) | |
return m.group(1) | |
def patch_conf(config_path, n, new_heap=None): | |
old_heap = existing_heap(config_path) | |
new_heap = new_heap or old_heap | |
content = """diff -ru /etc/hbase/conf/hbase-env.sh conf-1/hbase-env.sh | |
--- /etc/hbase/conf/hbase-env.sh 2013-11-04 23:45:44.647363875 +0000 | |
+++ conf-1/hbase-env.sh 2013-11-04 23:51:03.421243785 +0000 | |
@@ -45,7 +45,7 @@ | |
# | |
# export HBASE_JMX_BASE="-Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.authenticate=false" | |
export HBASE_MASTER_OPTS="-Xmx1024m -Djava.security.auth.login.config=/etc/hbase/conf/master_jaas.conf" | |
-export HBASE_REGIONSERVER_OPTS="-server -XX:ParallelGCThreads=4 -XX:+UseParNewGC -Xmn200m -XX:CMSInitiatingOccupancyFraction=80 -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps -Xloggc:/grid/0/var/log/hbase/gc.log-`date +'%Y%m%d%H%M'` -XX:ErrorFile=/grid/0/var/log/hbase/hs_err_pid%p.log -Xmx{old_heap}m -Djava.security.auth.login.config=/etc/hbase/conf/region_server_jaas.conf" | |
+export HBASE_REGIONSERVER_OPTS="-server -XX:ParallelGCThreads=4 -XX:+UseParNewGC -Xmn200m -XX:CMSInitiatingOccupancyFraction=80 -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps -Xloggc:/grid/0/var/log/hbase-{n}/gc.log-`date +'%Y%m%d%H%M'` -XX:ErrorFile=/grid/0/var/log/hbase-1/hs_err_pid%p.log -Xmx{new_heap}m -Djava.security.auth.login.config=/etc/hbase/conf/region_server_jaas.conf" | |
# export HBASE_THRIFT_OPTS="$HBASE_JMX_BASE -Dcom.sun.management.jmxremote.port=10103" | |
# export HBASE_ZOOKEEPER_OPTS="$HBASE_JMX_BASE -Dcom.sun.management.jmxremote.port=10104" | |
@@ -56,7 +56,7 @@ | |
# export HBASE_SSH_OPTS="-o ConnectTimeout=1 -o SendEnv=HBASE_CONF_DIR" | |
# Where log files are stored. $HBASE_HOME/logs by default. | |
-export HBASE_LOG_DIR=/grid/0/var/log/hbase | |
+export HBASE_LOG_DIR=/grid/0/var/log/hbase-{n} | |
# A string representing this instance of hbase. $USER by default. | |
# export HBASE_IDENT_STRING=$USER | |
@@ -65,7 +65,7 @@ | |
# export HBASE_NICENESS=10 | |
# The directory where pid files are stored. /tmp by default. | |
-export HBASE_PID_DIR=/grid/0/var/run/hbase | |
+export HBASE_PID_DIR=/grid/0/var/run/hbase-{n} | |
# Seconds to sleep between slave commands. Unset by default. This | |
# can be useful in large clusters, where, e.g., slave rsyncs can | |
diff -ru /etc/hbase/conf/hbase-site.xml conf-1/hbase-site.xml | |
--- /etc/hbase/conf/hbase-site.xml 2013-11-04 23:26:55.000000000 +0000 | |
+++ conf-1/hbase-site.xml 2013-11-05 00:00:39.318109262 +0000 | |
@@ -21,6 +21,14 @@ | |
--> | |
<configuration> | |
<property> | |
+ <name>hbase.regionserver.port</name> | |
+ <value>6002{n}</value> | |
+ </property> | |
+ <property> | |
+ <name>hbase.regionserver.info.port</name> | |
+ <value>6003{n}</value> | |
+ </property> | |
+ <property> | |
<name>hbase.rootdir</name> | |
<value>hdfs://hor18n37.gq1.ygridcore.net:8020/apps/hbase/data</value> | |
<description>The directory shared by region servers and into | |
@@ -48,7 +56,7 @@ | |
</property> | |
<property> | |
<name>hbase.tmp.dir</name> | |
- <value>/grid/0/var/log/hbase</value> | |
+ <value>/grid/0/var/log/hbase-{n}</value> | |
<description>Temporary directory on the local filesystem. | |
Change this setting to point to a location more permanent | |
than '/tmp' (The '/tmp' directory is often cleared on | |
""".format(n=n, old_heap=old_heap, new_heap=new_heap) | |
patch = StringIO() | |
patch.write(content) | |
put(patch, '/tmp/conf.patch') | |
run('patch -d %s -p4 < /tmp/conf.patch' % config_path) | |
@task | |
def ensure_n(n, force=False): | |
"Ensure the nth RegionServer is configured on each host." | |
if int(n) == 0: | |
ensure_base() | |
return | |
conf_path = '%s/conf-%s' % (configs_base, n) | |
log_path = '%s/hbase-%s' % (logs_base, n) | |
run_path = '%s/hbase-%s' % (run_base, n) | |
if not exists(conf_path) or force: | |
run('mkdir -p %s' % conf_path) | |
run('cp /etc/hbase/conf/* %s/' % conf_path) | |
patch_conf(conf_path, n) | |
if not exists(log_path) or force: | |
sudo('mkdir -p %s' % log_path) | |
sudo('chown hbase:hadoop %s' % log_path) | |
sudo('chmod g+w %s' % log_path) | |
if not exists(run_path) or force: | |
sudo('mkdir -p %s' % run_path) | |
sudo('chown hbase:hadoop %s' % run_path) | |
sudo('chmod g+w %s' % run_path) | |
@task | |
def set_heapsize_n(size,n): | |
conf_path = '%s/conf-%s' % (configs_base, n) | |
sed('%s/hbase-env.sh' % conf_path, '^(export HBASE_REGIONSERVER_OPTS.*-Xmx)(.*)(m.*)$', '\\1%s\\3' % size, use_sudo=True) | |
# | |
# for starting and stopping processes | |
# | |
def hbase_daemon(process, command, conf=None): | |
cmd = '/usr/lib/hbase/bin/hbase-daemon.sh' | |
if conf: | |
cmd += ' --config %s' % conf | |
cmd += ' %s' % command | |
cmd += ' %s' % process | |
sudo(cmd, user='hbase') | |
def start_rs(conf): | |
hbase_daemon('regionserver', 'start', conf) | |
def stop_rs(conf): | |
hbase_daemon('regionserver', 'stop', conf) | |
def rm_config(conf): | |
run('rm -rf %s' % conf) | |
def do_n(n, fn): | |
conf = run('ls -1d %s/conf-*' % configs_base).split()[int(n)] | |
fn(conf) | |
@task | |
def list_configs(): | |
return run('ls -1d %s/conf-*' % configs_base).split() | |
@task | |
def rm_config_n(n): | |
"Delete the specified config directory." | |
do_n(n, rm_config) | |
@task | |
def start_n(n): | |
"Start the nth RegionServer process on each host." | |
do_n(n, start_rs) | |
@task | |
def stop_n(n): | |
"Stop the nth RegionServer process on each host." | |
do_n(n, stop_rs) | |
@task | |
def start_all(): | |
"Start all discovered RegionServer processes on all hosts." | |
confs = run('ls -1d %s/conf-*' % configs_base).split() | |
if not confs: puts('No managed configurations found.') | |
for c in confs: | |
hbase_daemon('regionserver', 'start', conf=c) | |
@task | |
def stop_all(): | |
"Stop all discovered RegionServer processes on all hosts." | |
confs = run('ls -1d %s/conf-*' % configs_base).split() | |
if not confs: puts('No managed configurations found.') | |
for c in confs: | |
hbase_daemon('regionserver', 'stop', conf=c) | |
# | |
# for running tests | |
# | |
@task | |
def testSeqWrite(n, heap): | |
local("hbase org.apache.hadoop.hbase.PerformanceEvaluation --nomapred --rows=10000000 --compress=LZO --presplit=24 sequentialWrite 3 &> /grid/1/ndimiduk/multirs-3x{n}-10Mrows-24splits-LZO-sequentialWrite-3n-Xmx{heap}-1.log".format(n=n, heap=heap)) | |
@task | |
def testRandWrite(n, heap): | |
local("hbase org.apache.hadoop.hbase.PerformanceEvaluation --nomapred --rows=10000000 --compress=LZO --presplit=24 randomWrite 3 &> /grid/1/ndimiduk/multirs-3x{n}-10Mrows-24splits-LZO-randomWrite-3n-Xmx{heap}-1.log".format(n=n, heap=heap)) | |
@task | |
def testScan(n, heap): | |
local("hbase org.apache.hadoop.hbase.PerformanceEvaluation --nomapred --rows=10000000 --compress=LZO --presplit=24 sequentialWrite 3") | |
local("hbase org.apache.hadoop.hbase.PerformanceEvaluation --nomapred --rows=10000000 scan 3 &> /grid/1/ndimiduk/multirs-3x{n}-10Mrows-24splits-LZO-scan-3n-Xmx{heap}-1.log".format(n=n, heap=heap)) | |
@task | |
def testRandRead(n, heap): | |
local("hbase org.apache.hadoop.hbase.PerformanceEvaluation -Dhbase.rs.cacheblocksonwrite=true --nomapred --rows=10000000 --compress=LZO --presplit=24 sequentialWrite 3") | |
for x in [1,2,3]: | |
local("hbase org.apache.hadoop.hbase.PerformanceEvaluation --nomapred --rows=10000000 --sampleRate=0.01 randomRead 3 &> /grid/1/ndimiduk/multirs-3x{n}-10Mrows-24splits-LZO-randomRead-3n-Xmx{heap}-{x}.log".format(x=x, n=n, heap=heap)) | |
@task | |
def testBigRandRead(n, heap): | |
# local("hbase org.apache.hadoop.hbase.PerformanceEvaluation -Dhbase.rs.cacheblocksonwrite=true --nomapred --rows=100000000 --compress=LZO --presplit=24 sequentialWrite 3") | |
for x in [1,2,3]: | |
local("hbase org.apache.hadoop.hbase.PerformanceEvaluation --nomapred --rows=100000000 --sampleRate=0.001 randomRead 3 &> /grid/1/ndimiduk/multirs-3x{n}-10Mrows-24splits-LZO-randomRead-3n-Xmx{heap}-{x}.log".format(x=x, n=n, heap=heap)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment