Created
June 7, 2012 00:48
-
-
Save shrijeet/2885785 to your computer and use it in GitHub Desktop.
Hadoop rack awareness helper script
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import sys | |
import os | |
""" | |
Modify this config section based on needs | |
1) hostrack_data : a file containing lines, a line represents one host | |
2) field_sep: separator used for fields in one line | |
3) log: disable/enable logging | |
The search can be performed on either of host,external-ip OR internal-ip | |
""" | |
config = { | |
'hostrack_data': os.getenv('HADOOP_HOME','/usr/lib/hadoop-0.20') + '/conf/rack.data', | |
'field_sep' : ',', | |
'log' : False | |
} | |
''' | |
Rack ids in Hadoop are hierarchical and look like path names. | |
By default, every node has a rack id of /default-rack. | |
''' | |
DEFAULT_TOP_SWITCH = "/root" | |
DEFAULT_RACK = "/default-rack" | |
class RackDataParser: | |
def __init__(self): | |
self.rackfile = config['hostrack_data'] | |
self.rack_maps = self.__load_rackdata() | |
log("loaded %d entries in search index" % len(self.rack_maps[0])) | |
def lookup(self, key): | |
for map in self.rack_maps: | |
if map.has_key(key): | |
return map[key] | |
return None | |
def __load_rackdata(self): | |
rack_in = open(self.rackfile, "r") | |
rack_lines = rack_in.readlines() | |
host = {} | |
intip = {} | |
extip = {} | |
linenum = 0 | |
for line in rack_lines: | |
linenum = linenum + 1 | |
try: | |
entry = self.__parse_rackline(line) | |
host[entry[0].host] = entry[1] | |
intip[entry[0].extip] = entry[1] | |
extip[entry[0].intip] = entry[1] | |
except Exception, parse_e: | |
print ("Line number %d has problem : %s" % (linenum, str(parse_e))) | |
sys.exit(99) | |
maps = [] | |
maps.append(host) | |
maps.append(intip) | |
maps.append(extip) | |
return maps | |
def __parse_rackline(self, line): | |
line = line.rstrip() | |
fields = line.split(config['field_sep']) | |
if len(fields) != 4: | |
raise ValueError("data line:-> [%s] is not in format host,extip,intip,switch" % \ | |
(line)) | |
return (HostDataEntry(fields[0],fields[1],fields[2]),fields[3]) | |
class HostDataEntry: | |
def __init__(self,host=None,extip=None,intip=None): | |
if(host is None and extip is None and intip is None): | |
raise ValueError("atleast one of the host, extip and intip should be non null") | |
self.host = host | |
self.extip = extip | |
self.intip = intip | |
def __str__(self): | |
return str(self.host)+","+str(self.extip)+","+str(self.intip) | |
def log(message): | |
if config['log']: | |
print message | |
if __name__ == '__main__': | |
if (len(sys.argv) < 2): | |
print "One (or more) argument(s) needed, either of <host,external-ip or internal-ip>" | |
sys.exit(99) | |
parser = RackDataParser() | |
results = [] | |
for key in sys.argv[1:]: | |
log("searching for key %s" % (sys.argv[1])) | |
result = parser.lookup(key) | |
if result is not None and len(result) > 0: | |
results.append(DEFAULT_TOP_SWITCH + "/" + str(result)) | |
else: | |
results.append(DEFAULT_RACK) | |
print ' '.join(results) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment