Last active
December 13, 2017 08:02
-
-
Save nsabharwal/683adf76260ea9010532 to your computer and use it in GitHub Desktop.
Cluster install
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
—————————————————————————————————————— | |
*** Initial setup before Ambari setup *** | |
—————————————————————————————————————— | |
OS checks: | |
yum install wget | |
wget -qO- --no-check-certificate https://github.com/hortonworks/HDP-Public-Utilities/raw/master/Installation/install_tools.sh | bash | |
cd hdp and create Hostdetail.txt with all the host entries ( use FQDN ) | |
handy command, in case we are using hosts file | |
cat /etc/hosts | awk '{ print $2'} ( note: It may be $2 or $3) | |
Once Hostdetail.txt is under /root/hdp , test the scripts | |
ssh-keygen and follow the prompt | |
chmod 700 .ssh | |
cd .ssh | |
cat id_rsa.pub >> authorized_keys | |
chmod 600 authorized_keys | |
scp authorized_keys to all the nodes under .ssh | |
or try ssh-copy-id -i ~/.ssh/id_rsa.pub remote-host | |
/etc/hosts - All hosts in your system must be configured for DNS and Reverse DNS. | |
If DNS can’t be setup then set entries in /etc/hosts file | |
./run_command.sh 'hostname' | |
./copy_file.sh source_filename destination_filename ( for example: ./copy_file.sh /etc/hosts /etc/hosts) | |
*** IPtables *** | |
./run_command.sh 'chkconfig iptables off' | |
./run_command.sh '/etc/init.d/iptables stop' | |
*** Disable SELinux and PackageKit and check the UMASK Value *** | |
./run_command.sh 'setenforce 0' | |
./run_command.sh 'umask 022' | |
*** Disable THP *** | |
In ambari host vi /etc/rc.local and add following lines | |
if test -f /sys/kernel/mm/transparent_hugepage/enabled; then | |
echo never > /sys/kernel/mm/transparent_hugepage/enabled | |
fi | |
if test -f /sys/kernel/mm/transparent_hugepage/defrag; then echo never > /sys/kernel/mm/transparent_hugepage/defrag | |
fi | |
./copy_file.sh /etc/rc.local /etc/rc.local | |
./run_command.sh '/etc/rc.local' | |
verify: look for always madvise [never] | |
./run_command.sh 'cat /sys/kernel/mm/transparent_hugepage/enabled' | |
./run_command.sh 'cat /sys/kernel/mm/transparent_hugepage/defrag' | |
*** ntp *** | |
./run_command.sh '/etc/init.d/ntpd status ' | |
if not installed then | |
./run_command.sh ' yum -y install ntp ' | |
./run_command.sh '/etc/init.d/ntpd start ' | |
./run_command.sh ' chkconfig ntpd on' | |
*** swap *** | |
cat /proc/sys/vm/swappiness | |
sysctl -w vm.swappiness=0 | |
./run_command.sh ' sysctl -w vm.swappiness=0' | |
*** Check the Maximum Open File Descriptors ***** | |
recommended maximum number of open file descriptors is 10000, | |
./run_command.sh 'sysctl fs.file-max' | |
./run_command.sh 'sysctl -w fs.file-max=300000' | |
./run_command.sh 'cat /proc/sys/fs/file-max' | |
echo fs.file-max=300000 >> /etc/sysctl.con | |
grep fs.file-max /etc/sysctl.conf | |
—————————————————————————————————————— | |
*** Ambari Installation *** | |
—————————————————————————————————————— | |
http://doc.hortonworks.com | |
Typical Ambari install | |
—————————————————————————————————————— | |
*** Tune the cluster *** | |
http://docs.hortonworks.com/HDPDocuments/HDP2/HDP-2.1.1/bk_installing_manually_book/content/rpm-chap1-11.html | |
—————————————————————————————————————— | |
*** HDFS and NN Testing *** | |
—————————————————————————————————————— | |
TestDFSIO | |
su - hdfs | |
export YARN_EXAMPLES=$YARN_HOME/usr/hdp/current/hadoop-mapreduce-client/ | |
yarn jar $YARN_EXAMPLES/hadoop-mapreduce-client-jobclient-tests.jar TestDFSIO -write -nrFiles 100 -fileSize 100 | |
yarn jar $YARN_EXAMPLES/hadoop-mapreduce-client-jobclient-tests.jar TestDFSIO -read -nrFiles 100 -fileSize 100 | |
yarn jar $YARN_EXAMPLES/hadoop-mapreduce-client-jobclient-tests.jar TestDFSIO -clean | |
Terasort | |
su - hdfs | |
export YARN_EXAMPLES=$YARN_HOME/usr/hdp/current/hadoop-mapreduce-client/ | |
yarn jar $YARN_EXAMPLES/hadoop-mapreduce-examples.jar teragen 1000000 /benchmarks/terasort/terasort-input | |
yarn jar $YARN_EXAMPLES/hadoop-mapreduce-examples.jar terasort /benchmarks/terasort/terasort-input /benchmarks/terasort/terasort-output | |
yarn jar $YARN_EXAMPLES/hadoop-mapreduce-examples.jar teravalidate /benchmarks/terasort/terasort-output /benchmarks/terasort/teravalidate-output | |
Read/Write Data | |
*** TeraGen *** | |
hdfs dfs -mkdir /benchmarks | |
hdfs dfs -mkdir /benchmarks/terasort | |
# This will generate 1,000,000 100 byte records as input for Terasort | |
# use find / -name hadoop-mapreduce-examples.jar to exact location | |
#user -Dmapred.reduce.tasks=#ofmappers | |
hadoop jar /usr/hdp/current/hadoop-mapreduce/hadoop-mapreduce-examples.jar teragen -Dmapred.map.tasks=30 1000000 /benchmarks/terasort/terasort-input | |
*** TeraSort *** | |
# Sort the 1,000,000 records generated by TeraGen | |
hadoop jar /usr/hdp/2.2.3.0-2610/hadoop-mapreduce/hadoop-mapreduce-examples.jar terasort -Dmapred.reduce.tasks=30 /benchmarks/terasort/terasort-input /benchmarks/terasort/terasort-output | |
*** TeraValidate *** | |
# Validate that the sort was successful and correct | |
hadoop jar /usr/hdp/2.2.3.0-2610/hadoop-mapreduce/hadoop-mapreduce-examples.jar teravalidate /benchmarks/terasort/terasort-output /benchmarks/terasort/teravalidate-output | |
*** NameNode Exercise *** | |
login as hdfs user | |
*** TestDFSIO Write Test *** | |
# -fileSize argument is, by default, in units of MB. This should write 10 GB of files | |
hadoop jar /usr/hdp/2.2.3.0-2610/hadoop-mapreduce/hadoop-mapreduce-client-jobclient-2.6.0.2.2.3.0-2610-tests.jar TestDFSIO -write -nrFiles 100 -fileSize 100 | |
TestDFSIO Read Test | |
hadoop jar /usr/hdp/2.2.3.0-2610/hadoop-mapreduce/hadoop-mapreduce-client-jobclient-2.6.0.2.2.3.0-2610-tests.jar TestDFSIO -read -nrFiles 100 -fileSize 100 | |
TestDFSIO Cleanup | |
hadoop jar /usr/hdp/2.2.3.0-2610/hadoop-mapreduce/hadoop-mapreduce-client-jobclient-2.6.0.2.2.3.0-2610-tests.jar TestDFSIO -clean | |
—————————————————————————————————————— | |
*** HA - Enable and Test *** | |
—————————————————————————————————————— | |
NN test: | |
Login to ambari and check for Active Name Node http://<hostname>:8080/#/main/services/HDFS/summary | |
ssh into Active NN | |
ps -ef | grep namenode | |
kill -9 <pid> | |
Login to Ambari and you see Active Node changing | |
Terasort test | |
# Sort the 1,000,000 records generated by TeraGen | |
hadoop jar /usr/hdp/2.2.3.0-2610/hadoop-mapreduce/hadoop-mapreduce-examples.jar terasort /benchmarks/terasort/terasort-input /benchmarks/terasort/terasort-output | |
- look for application id or application url | |
- | |
—————————————————————————————————————— | |
*** Initial Environment Setup for POC testing *** | |
—————————————————————————————————————— | |
./run_command.sh 'groupadd hadoopusers' | |
./run_command.sh 'groupadd analyst' | |
./run_command.sh 'groupadd shipment_analyst' | |
./run_command.sh 'grep hadoopusers\|analyst /etc/group' | |
./run_command.sh 'useradd -g hadoopusers platfora' | |
./run_command.sh 'useradd -g hadoopusers 353559' | |
./run_command.sh 'useradd -g hadoopusers 687671' | |
./run_command.sh 'grep platfora\|353559\|687671 /etc/passwd' | |
HDFS setup | |
hdfs dfs -mkdir /user/platfora | |
hdfs dfs -chown -R platfora:hdfs /user/platfora | |
hdfs dfs -mkdir /user/353559 | |
hdfs dfs -chown -R 353559:hdfs /user/platfora | |
hdfs dfs -mkdir /user/687671 | |
hdfs dfs -chown -R 687671:hdfs /user/platfora | |
hdfs dfs -mkdir /tmp/hadoop-platfora | |
hdfs dfs -mkdir /tmp/353559 | |
hdfs dfs -mkdir /tmp/687671 | |
hdfs dfs -chown -R platfora:hdfs /tmp/hadoop-platfora | |
hdfs dfs -chown -R 353559:hdfs /tmp/353559 | |
hdfs dfs -chown -R 687671:hdfs /tmp/687671 | |
hdfs dfsadmin -refreshUserToGroupsMappings | |
hdfs dfs -ls /tmp/ | |
hdfs dfs -ls /user/ | |
—————————————————————————————————————— | |
*** HDFS ACLs *** | |
—————————————————————————————————————— | |
dfs.namenode.acls.enabled = true in hdfs-site.xml | |
hdfs dfs -mkdir /data | |
hdfs dfs -mkdir /data/shipment | |
hdfs dfs -mkdir /data/revenue | |
hdfs dfs -setfacl -R -m group:shipment_analyst:r-- /data/shipment | |
hdfs dfs -setfacl -R -m group:revenue_analyst:r-- /data/revenue | |
hdfs dfs -getfacl /data | |
hdfs dfs -getfacl /data/shipment | |
hdfs dfs -getfacl /data/revenue | |
Hive Database: login as hdfs user | |
CREATE DATABASE IF NOT EXISTS shipment_schema COMMENT 'Shipment schema' Location '/data/shipment/shipment_schema'; | |
CREATE DATABASE IF NOT EXISTS shipment_schema COMMENT 'Revenue schema' Location '/data/shipment/revenue_schema'; | |
Tables: tracking_info, invoice needs to be created. Followup with the customer. | |
—————————————————————————————————————— | |
*** Ranger setup *** | |
http://docs.hortonworks.com/HDPDocuments/HDP2/HDP-2.2.0/Ranger_U_Guide_v22/Ranger_U_Guide_v22.pdf | |
—————————————————————————————————————— | |
HDFS | |
create HDFS policy | |
Hive | |
create HIVE policy | |
HBASE | |
create HBASE policy | |
Storm | |
Knox | |
—————————————————————————————————————— | |
*** Views *** | |
—————————————————————————————————————— | |
Installation | |
cd /var/lib/ambari-server/resources/views/ | |
wget http://public-repo-1.hortonworks.com/HDP-LABS/Projects/Views/tp1/files-0.1.0-tp1.jar | |
wget http://public-repo-1.hortonworks.com/HDP-LABS/Projects/Views/tp1/hive-0.2.0-tp1.jar | |
wget http://public-repo-1.hortonworks.com/HDP-LABS/Projects/Views/tp1/pig-0.1.0-tp1.jar | |
wget http://public-repo-1.hortonworks.com/HDP-LABS/Projects/Views/tp1/capacity-scheduler-0.3.0-tp1.jar | |
core-site.xml | |
hadoop.proxyuser.root.groups=* | |
hadoop.proxyuser.root.hosts=* | |
hadoop.proxyuser.hcat.groups=* | |
hadoop.proxyuser.hcat.hosts=* | |
webhcat-site.xml | |
webhcat.proxyuser.root.groups=* | |
webhcat.proxyuser.root.hosts=* | |
hdfs dfs -mkdir -p /user/admin/ | |
hdfs dfs -chown admin:hdfs /user/admin | |
Job | |
yarn.ats.url* - get this from ambari ( Yarn —> yarn.timeline-service.webapp.address, http://<host>:8188) | |
yarn.resourcemanager.url* ( RM address http://<rm_host>:8088 ) | |
Tez | |
YARN Timeline Server URL* ( Yarn —> yarn.timeline-service.webapp.address, http://<host>:8188) | |
yarn.resourcemanager.url* ( RM address http://<rm_host>:8088 ) | |
Slider | |
ambari.server.url* http://ambari_server:8080/api/v1/clusters/cluster_name | |
ambari username and password | |
Hive | |
webhdfs.url* webhdfs://active_namenode:50070 | |
webhcat.url* http://webhcat_host:50111/templeton/v1 | |
scripts.dir* /user/${username}/hive/scripts | |
jobs.dir* /user/${username}/hive/jobs | |
hive.host* Hiveserver2 | |
hive.port* 10000 | |
hive.auth auth=NONE;user=${username} | |
Capacity scheudler | |
http://ambari_server:8080/api/v1/clusters/cluster_name | |
File | |
WebHDFS FileSystem URI* hdfs-site.xml —> dfs.namenode.http-address for example: webhdfs://master2:50070 ( HA consideration - Active NN ) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment