Skip to content

Instantly share code, notes, and snippets.

@nwwells
Created June 13, 2012 20:31
Show Gist options
  • Save nwwells/2926336 to your computer and use it in GitHub Desktop.
Save nwwells/2926336 to your computer and use it in GitHub Desktop.
A script to populate a mongo database with lots of statistics about nodes
from uuid import uuid1
import random
import json
import time
import sys
import pymongo
import datetime
start = time.time()
#Parse args
parser = argparse.ArgumentParser(
description='Generate Node statistics and write to the mongo database at seattle')
parser.add_argument('total_threads',
default=4)
parser.add_argument('start',
action='store_const',
const=True,
dest='create_samples',
default=False)
try:
args = parser.parse_args()
except IOError as error:
print
print error.args[1] + ": " + error.filename
print
exit(error.args[0])
def main(args):
#open Mongo connection
coll = pymongo.Connection('seattle', 27017).Moab.NodeStats
coll.drop()
# constants
states = ["Down", "Idle", "Running", "Idle", "Running", "Idle", "Running"]
cats = [
"Active",
"BatchFailure",
"Benchmark",
"EmergencyMaintenance",
"GridReservation",
"HardwareFailure",
"HardwareMaintenance",
"Idle",
"JobReservation",
"NetworkFailure",
"Other",
"OtherFailure",
"PersonalReservation",
"Site1",
"Site2",
"Site3",
"Site4",
"Site5",
"Site6",
"Site7",
"Site8",
"SoftwareFailure",
"SoftwareMaintenance",
"StandingReservation",
"StorageFailure",
"UserReservation",
"VPC"
]
# keep data for 5 years (assuming 30 minute stat interval)
# time
now = time.time()
count = 0
#for i in xrange(5 * 365 * 24 * 2):
for i in xrange(1000):
if i % 50 == 0:
sys.stdout.write("\ntimes: ")
sys.stdout.write(".")
timestamp = now - i * 1800
iterations = random.randrange(3,4)
# for each node
for j in xrange(1000):
#_id
node = {"_id": str(uuid1())}
#nodename
node["name"] = "node%d" % j
#time
node["time"] = datetime.datetime.fromtimestamp(timestamp)
#state
node["state"] = random.choice(states)
#category
node["category"] = random.choice(cats)
#iterations
node["iterations"] = iterations
#intervalSampleCount
node["intervalSampleCount"] = iterations
#startJobCount
node["startJobCount"] = random.randrange(0,10)
#failedJobCount
node["failedJobCount"] = min(random.randrange(0,5), node["startJobCount"])
#resources
dProcs = random.randrange(0,32)
dMem = random.randrange(0,256)
resources = [{
"id": "procs",
"configured": 32,
"dedicated": dProcs,
"available": 32 - dProcs
},{
"id": "mem",
"configured": 256,
"dedicated": dMem,
"available": 256 - dMem
}
]
node["resources"] = resources
#metrics
metrics = [{
"id": "cpuLoad",
"average": random.uniform(0,resources[0]["dedicated"]),
"maximum": random.triangular(0,32,resources[0]["dedicated"]),
"minimum": random.random()
},{
"id": "memUtil",
"average": random.uniform(0.1,resources[1]["dedicated"]),
"maximum": random.triangular(0,256,resources[1]["dedicated"]),
"minimum": random.uniform(0, 0.1)
},{
"id": "pwatts",
"average": random.randrange(132,435),
"maximum": random.randrange(435,600),
"minimum": random.randrange(100,132)
},{
"id": "threadcount",
"average": random.randrange(15,44),
"maximum": random.randrange(44,124),
"minimum": random.randrange(10,15)
},{
"id": "pingtime",
"average": random.randrange(132,435),
"maximum": random.randrange(435,600),
"minimum": random.randrange(80,132)
}
]
node["metrics"] = metrics
coll.insert(node)
count += 1
end = time.time()
print "\nCreated %i records" % count
print "Elapsed time: %i seconds" % (end - start)
main(args)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment