Skip to content

Instantly share code, notes, and snippets.

@arya-oss
Last active July 1, 2019 06:22
Show Gist options
  • Save arya-oss/db6b16461563c5171cf3423f936e7394 to your computer and use it in GitHub Desktop.
Save arya-oss/db6b16461563c5171cf3423f936e7394 to your computer and use it in GitHub Desktop.
Automated script for Hadoop Installation
{
"hadoop_src":"hadoop-2.7.1.tar.gz",
"hadoop_folder": "hadoop-2.7.1",
"hadoop_path": "Softi/hadoop-2.7.1",
"__comment" : "hadoop_path with respect home folder",
"core": [
{
"name": "fs.default.name",
"value": "hdfs://localhost:54310"
}
],
"yarn": [
{
"name": "yarn.nodemanager.aux-services",
"value": "mapreduce_shuffle"
}
],
"hdfs": [
{
"name": "dfs.replication",
"value": "1"
},
{
"name": "dfs.name.dir",
"value": "file:///home/hduser/hadoopinfra/namenode"
},
{
"name": "dfs.data.dir",
"value": "file:///home/hduser/hadoopinfra/datanode"
}
],
"mapred": [
{
"name": "mapreduce.jobtracker.address",
"value": "localhost:54311"
},
{
"name": "mapreduce.framework.name",
"value": "yarn"
}
]
}
#!/usr/bin/env python
import os
import sys
import xml.etree.ElementTree
hadoop_zip = None
user_home=os.path.expanduser('~')
hadoop_home = None
def core_file(conf):
filename = os.path.join(hadoop_home, 'etc/hadoop/core-site.xml')
et = xml.etree.ElementTree.parse(filename)
for m in conf:
for k,v in m.iteritems():
ptag = xml.etree.ElementTree.SubElement(et.getroot(), 'property')
ntag = xml.etree.ElementTree.SubElement(ptag, 'name')
ntag.text = k
vtag = xml.etree.ElementTree.SubElement(ptag, 'value')
vtag.text = v
et.write(filename)
print 'Core conf finished'
def hdfs_file(conf):
filename = os.path.join(hadoop_home, 'etc/hadoop/hdfs-site.xml')
et = xml.etree.ElementTree.parse(filename)
for m in conf:
for k,v in m.iteritems():
ptag = xml.etree.ElementTree.SubElement(et.getroot(), 'property')
ntag = xml.etree.ElementTree.SubElement(ptag, 'name')
ntag.text = k
vtag = xml.etree.ElementTree.SubElement(ptag, 'value')
vtag.text = v
et.write(filename)
print 'HDFS conf finished'
def yarn_file(conf):
filename = os.path.join(hadoop_home, 'etc/hadoop/yarn-site.xml')
et = xml.etree.ElementTree.parse(filename)
for m in conf:
for k,v in m.iteritems():
ptag = xml.etree.ElementTree.SubElement(et.getroot(), 'property')
ntag = xml.etree.ElementTree.SubElement(ptag, 'name')
ntag.text = k
vtag = xml.etree.ElementTree.SubElement(ptag, 'value')
vtag.text = v
et.write(filename)
print 'Yarn file finished'
def mapred_file(conf):
infile = os.path.join(hadoop_home, 'etc/hadoop/mapred-site.xml.template')
outfile = os.path.join(hadoop_home, 'etc/hadoop/mapred-site.xml')
et = xml.etree.ElementTree.parse(infile)
for m in conf:
for k,v in m.iteritems():
ptag = xml.etree.ElementTree.SubElement(et.getroot(), 'property')
ntag = xml.etree.ElementTree.SubElement(ptag, 'name')
ntag.text = k
vtag = xml.etree.ElementTree.SubElement(ptag, 'value')
vtag.text = v
et.write(outfile)
print 'Mapred file finished'
def which(program):
def is_exe(fpath):
return os.path.isfile(fpath) and os.access(fpath, os.X_OK)
fpath, fname = os.path.split(program)
if fpath:
if is_exe(program):
return program
else:
for path in os.environ["PATH"].split(os.pathsep):
path = path.strip('"')
exe_file = os.path.join(path, program)
if is_exe(exe_file):
return exe_file
return None
def write_key():
pub_key = os.path.join(user_home, '.ssh/id_rsa.pub')
auth_keys = os.path.join(user_home, '.ssh/authorized_keys')
if os.path.exists(pub_key) == False:
os.system('ssh-keygen -t RSA -P ""')
os.chmod(auth_keys, stat.S_IRUSR | stat.S_IWUSR)
with open(pub_key, 'r') as f:
key = f.read()
with open(auth_keys, 'w+') as fw:
fw.write(key)
print 'writing key finished'
def hadoop_env(java_home):
filename = os.path.join(hadoop_home, 'etc/hadoop/hadoop-env.sh')
with open (filename, 'w+') as f:
f.write('JAVA_HOME='+java_home)
print 'hadoop_env finished'
import json
import shutil
import stat
if __name__=='__main__':
if sys.argv[1] is None or len(sys.argv) == 1:
print 'Usage: python setup.py config.json'
sys.exit(1)
data = None
with open(sys.argv[1], 'r') as f:
data = f.read()
data = json.loads(data)
hadoop_zip = data['hadoop_src']
hadoop_folder = data['hadoop_folder']
hadoop_home = os.path.join(user_home, data['hadoop_path'])
if which('ssh') is None:
print 'Dependancies Installing SSH Server'
os.system('sudo apt-get install openssh-server')
print 'Generate and pairing keys..'
write_key()
if which('java') is None:
print 'Dependancies installing Java 8'
os.system('sudo apt-get install openjdk-8-jdk')
os.system('tar -xzvf ' + hadoop_zip)
shutil.move(hadoop_folder, hadoop_home)
hadoop_home = os.path.join(hadoop_home, hadoop_folder)
core_file(data['core'])
hdfs_file(data['hdfs'])
yarn_file(data['yarn'])
mapred_file(data['mapred'])
hadoop_env = '''
#!/bin/sh
export HADOOP_HOME={0}
export HADOOP_MAPRED_HOME=$HADOOP_HOME
export HADOOP_COMMON_HOME=$HADOOP_HOME
export HADOOP_HDFS_HOME=$HADOOP_HOME
export YARN_HOME=$HADOOP_HOME
export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
export PATH=$PATH:$HADOOP_HOME/sbin:$HADOOP_HOME/bin
export HADOOP_INSTALL=$HADOOP_HOME
'''.format(hadoop_home)
os.chmod('hadoop_env', stat.S_IRWXU|stat.S_IRGRP|stat.S_IROTH)
with open('hadoop_env', 'w') as f:
f.write (hadoop_env)
print 'To activate hadoop env \nusage: source hadoop_env'
@arya-oss
Copy link
Author

arya-oss commented Jul 1, 2019

Hi Prashanth, this script I wrote long ago for setting up Hadoop (1 Node only) on local system in pseudo-distributed mode. I'm not sure it will work now.
Pre-requisites:

  1. Ubuntu Os (Debian based)
  2. OpenSSH Server
  3. Java 8
  4. Hadoop Archive file
  5. Python 2

This script is setting up 4 hadoop config file and 1 environment file. You can do it manually also, just read hadoop documentation.
Thanks

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment