Skip to content

Instantly share code, notes, and snippets.

@shrijeet
Created June 7, 2012 00:48
Show Gist options
  • Save shrijeet/2885785 to your computer and use it in GitHub Desktop.
Save shrijeet/2885785 to your computer and use it in GitHub Desktop.
Hadoop rack awareness helper script
#!/usr/bin/env python
import sys
import os
"""
Modify this config section based on needs
1) hostrack_data : a file containing lines, a line represents one host
2) field_sep: separator used for fields in one line
3) log: disable/enable logging
The search can be performed on either of host,external-ip OR internal-ip
"""
config = {
'hostrack_data': os.getenv('HADOOP_HOME','/usr/lib/hadoop-0.20') + '/conf/rack.data',
'field_sep' : ',',
'log' : False
}
'''
Rack ids in Hadoop are hierarchical and look like path names.
By default, every node has a rack id of /default-rack.
'''
DEFAULT_TOP_SWITCH = "/root"
DEFAULT_RACK = "/default-rack"
class RackDataParser:
def __init__(self):
self.rackfile = config['hostrack_data']
self.rack_maps = self.__load_rackdata()
log("loaded %d entries in search index" % len(self.rack_maps[0]))
def lookup(self, key):
for map in self.rack_maps:
if map.has_key(key):
return map[key]
return None
def __load_rackdata(self):
rack_in = open(self.rackfile, "r")
rack_lines = rack_in.readlines()
host = {}
intip = {}
extip = {}
linenum = 0
for line in rack_lines:
linenum = linenum + 1
try:
entry = self.__parse_rackline(line)
host[entry[0].host] = entry[1]
intip[entry[0].extip] = entry[1]
extip[entry[0].intip] = entry[1]
except Exception, parse_e:
print ("Line number %d has problem : %s" % (linenum, str(parse_e)))
sys.exit(99)
maps = []
maps.append(host)
maps.append(intip)
maps.append(extip)
return maps
def __parse_rackline(self, line):
line = line.rstrip()
fields = line.split(config['field_sep'])
if len(fields) != 4:
raise ValueError("data line:-> [%s] is not in format host,extip,intip,switch" % \
(line))
return (HostDataEntry(fields[0],fields[1],fields[2]),fields[3])
class HostDataEntry:
def __init__(self,host=None,extip=None,intip=None):
if(host is None and extip is None and intip is None):
raise ValueError("atleast one of the host, extip and intip should be non null")
self.host = host
self.extip = extip
self.intip = intip
def __str__(self):
return str(self.host)+","+str(self.extip)+","+str(self.intip)
def log(message):
if config['log']:
print message
if __name__ == '__main__':
if (len(sys.argv) < 2):
print "One (or more) argument(s) needed, either of <host,external-ip or internal-ip>"
sys.exit(99)
parser = RackDataParser()
results = []
for key in sys.argv[1:]:
log("searching for key %s" % (sys.argv[1]))
result = parser.lookup(key)
if result is not None and len(result) > 0:
results.append(DEFAULT_TOP_SWITCH + "/" + str(result))
else:
results.append(DEFAULT_RACK)
print ' '.join(results)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment