ChristinaLK · November 18, 2024 17:53
diff --git a/xdmod-parser.py b/xdmod-parser.py
 import pandas as pd

 ## data was downloaded from XDMod, the Jobs by User dashboard

 datafile = "Jobs__by_User_2024-10-01_to_2024-10-31_aggregate.csv"
 outfile = "Jobs__by_User_2024-10-01_to_2024-10-31_subset.csv"

 ## build a dictionary with all the data

 # needed conditionals to control flow
 istitle = False
 processing = False
 iscols = False
 label = ""
 cols = list()

 # dictionary to populate
 d = dict()

 # logic to populate dictionary
 with open(datafile) as f:
    for l in f:
        line = l.strip()
        # normal case
        if processing and line != "---------": 
            #print("normal")
            vals = line.split(',')
            d[label][cols[0]].append(vals[0])
            d[label][cols[1][1:-1]].append(vals[1])
        # unset processing
        elif processing and line == "---------":
            #print("end of data")
            processing = False
        # set cols (which will set processing)
        elif not processing and line == "---------":
            #print("pre cols")
            iscols = True
        elif iscols: 
            #print("cols")
            vals = line
            cols = line.split(",")
            label = cols[1][1:-1]
            d[label] = dict()
            d[label][cols[0]] = []
            d[label][cols[1][1:-1]] = []
            iscols = False
            processing = True

 # subset with the pieces we want
 keys_we_want = ["Wait Hours: Per Job",
                "CPU Hours: Total",
               "Number of Jobs Running",
               "Job Size: Per Job (Core Count)"]
 list_of_series = []
 for k in keys_we_want:
    #print(k)
    tmpdf = pd.DataFrame(d[k])
    #tmpdf.head()
    list_of_series.append(tmpdf)

 ## create a dataframe
 data = list_of_series[0]
 for df in list_of_series[1:]:
    #print(df.head())
    data = data.join(df.set_index('User'), on = "User", how = "outer")

 data.to_csv(outfile)
	import pandas as pd

	## data was downloaded from XDMod, the Jobs by User dashboard

	datafile = "Jobs__by_User_2024-10-01_to_2024-10-31_aggregate.csv"
	outfile = "Jobs__by_User_2024-10-01_to_2024-10-31_subset.csv"

	## build a dictionary with all the data

	# needed conditionals to control flow
	istitle = False
	processing = False
	iscols = False
	label = ""
	cols = list()

	# dictionary to populate
	d = dict()

	# logic to populate dictionary
	with open(datafile) as f:
	for l in f:
	line = l.strip()
	# normal case
	if processing and line != "---------":
	#print("normal")
	vals = line.split(',')
	d[label][cols[0]].append(vals[0])
	d[label][cols[1][1:-1]].append(vals[1])
	# unset processing
	elif processing and line == "---------":
	#print("end of data")
	processing = False
	# set cols (which will set processing)
	elif not processing and line == "---------":
	#print("pre cols")
	iscols = True
	elif iscols:
	#print("cols")
	vals = line
	cols = line.split(",")
	label = cols[1][1:-1]
	d[label] = dict()
	d[label][cols[0]] = []
	d[label][cols[1][1:-1]] = []
	iscols = False
	processing = True

	# subset with the pieces we want
	keys_we_want = ["Wait Hours: Per Job",
	"CPU Hours: Total",
	"Number of Jobs Running",
	"Job Size: Per Job (Core Count)"]
	list_of_series = []
	for k in keys_we_want:
	#print(k)
	tmpdf = pd.DataFrame(d[k])
	#tmpdf.head()
	list_of_series.append(tmpdf)

	## create a dataframe
	data = list_of_series[0]
	for df in list_of_series[1:]:
	#print(df.head())
	data = data.join(df.set_index('User'), on = "User", how = "outer")

	data.to_csv(outfile)