Skip to content

Instantly share code, notes, and snippets.

@bwhite
Forked from anonymous/stat.py
Created January 23, 2012 20:42
Show Gist options
  • Save bwhite/1665440 to your computer and use it in GitHub Desktop.
Save bwhite/1665440 to your computer and use it in GitHub Desktop.
import json
def filter_by_completed(users, results, num_tasks=100):
new_users = {}
for user_id, user_data in users.items():
assert user_data.get('tasks_finished', 0) <= num_tasks
if user_data.get('tasks_finished', 0) == num_tasks:
new_users[user_id] = user_data
new_results = dict((result_id, result_data)
for result_id, result_data in results.items() if result_data['user_id'] in new_users)
print('filter_by_completed: Users[%d,%d] Results[%d,%d]' % (len(users), len(new_users), len(results), len(new_results)))
return new_users, new_results
def main():
users = json.load(open('users.js'))
results = json.load(open('results.js'))
users, results = filter_by_completed(users, results)
json.dump(users, open('users.js', 'w'))
json.dump(results, open('results.js', 'w'))
main()
import numpy as np
import matplotlib.pyplot as plt
from pylab import *
from collections import defaultdict
def confusion_matrix(results, filename='confmat.png', N=16):
# calculate the confusion matrix
conf_mat = np.zeros( (N,N + 1) )
for k,v in results.items():
event_id = lambda event: int(event[1:])
x = event_id(v['event']) if v.has_key('event') else 16
y = event_id(v['user_event']) if v.has_key('user_event') else 16
# if x==16 or y==16: continue
conf_mat[x][y] += 1
# normalize the confusion matrix
norm_conf = [map(lambda x: float(x)/sum(i), i) for i in conf_mat]
# draw the confusion matrix
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
ax.set_yticks(np.arange(N))
ax.set_yticklabels(['E0%02d' % i for i in range(N)])
ax.set_xticks(np.arange(N + 1))
ax.set_xticklabels(['E0%02d' % i for i in range(N)] + ['skip'], rotation='vertical')
res = ax.imshow(array(norm_conf), interpolation='nearest')
cb = fig.colorbar(res)
# number annotation
for i, cas in enumerate(norm_conf):
for j, c in enumerate(cas):
if c>0:
plt.text(j-.2, i+.2, int(round(c * 100)), fontsize=12)
plt.title('Event Confusion matrix')
savefig(filename, format="png")
# plt.show()
def time_attempt(results, filename='time_attempt.png'):
U = defaultdict(list)
for k, v in results.items():
if 'end_time' in v:
U[v['user_id']].append((v['start_time'], v['end_time'] - v['start_time']))
for k, v in U.items():
v.sort()
times = defaultdict(list)
for vs in U.values():
for attempt_num, (start_time, diff) in enumerate(vs):
times[attempt_num].append(diff)
median_time = [np.median(vs) for attempt_num, vs in sorted(times.items())]
clf()
plot(median_time)
xlabel('number of attempts')
ylabel('time taken (s)')
plt.title('Median response time (over all users) as a function of # of attempts')
savefig(filename, format='png')
# show()
def acc_user(results, filename='acc_user.png'):
U = {}
for k, v in results.items():
if v.has_key('user_event'):
U[v['user_id']] = [0, 0]
for k, v in results.items():
if v.has_key('end_time'):
U[v['user_id']][1] += 1
if v['event'] == v['user_event']:
U[v['user_id']][0] += 1
x = U.keys()
y = [ float(v[0]) / v[1] for _, v in U.items()]
x, y = zip(*sorted(zip(x, y), key=lambda x: x[1]))
# draw bar chart
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
width = .5;
ind = np.arange(len(y))
rects1 = ax.bar(np.arange(len(y)), y, width)
ax.set_xticks(ind+width*.5)
ax.set_xticklabels( x, rotation='vertical')
plt.title('Average User Accuracy (over all videos)')
savefig(filename, format='png')
# plt.show()
def time_class(results, attr='user_id', filename='time_attr.png'):
filename = 'time_%s.png'% attr
def group(results, attr='event'):
U = defaultdict(list)
for _, v in results.items():
U[v[attr]].append(v)
return U
U = group(results,attr)
avg_time = [] # average time
keys = []
for k,d in U.items():
t = 0
cnt = 0
for v in d:
if v.has_key('end_time'):
t += v['end_time'] - v['start_time']
cnt += 1
if cnt > 0:
avg_time.append(float(t) / cnt) # average time taken
keys.append(k)
keys, avg_time = zip(*sorted(zip(keys,avg_time), key=lambda x: x[1]))
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
ind = np.arange(len(avg_time))
width = 0.5
ax.bar(ind, avg_time, width)
ax.set_ylabel('average time taken (s)')
ax.set_xlabel(attr)
ax.set_xticks(ind+width)
ax.set_xticklabels(keys, rotation='vertical')
plt.title('Average User Time (over all videos)')
savefig(filename, format="png")
# plt.show()
def acc_time(results, filename='acc_time.png'):
# group time interval...
def group_time(results):
# 5 seconds interval...
M = 15
U = [ [] for i in range(M) ]
for _, v in results.items():
if v.has_key('end_time'):
if v['end_time']==v['start_time']:
print v['start_time'], v['end_time'], v['user_id']
U[min(int((v['end_time'] - v['start_time']) / 5), len(U)-1)].append(v)
keys = ['%d-%d'%(i*5,(i+1)*5) for i in range(M-1)]
keys.append('>= %d' % (M*5))
return (keys, U)
(keys,U) = group_time(results)
acc = []
frac = []
for d in U:
c = cnt = 0
for v in d:
c += 1 if v['event']==v['user_event'] else 0
cnt += 1
acc.append(float(c) / max(cnt,1))
frac.append( (c,cnt) )
fig = plt.figure(figsize=(10,9))
ax = fig.add_subplot(1,1,1)
ind = np.arange(len(acc))
width = 1
rects = ax.bar(ind, acc, width)
ax.set_ylabel('average accuracy')
ax.set_xlabel('time taken (s)')
ax.set_xticks(ind+width*.5)
ax.set_xticklabels(keys, rotation='vertical')
for i in range(len(rects)):
height = rects[i].get_height()
ax.text(rects[i].get_x()+rects[i].get_width()/2., 1.05*(height+0.01),
'%d / %d'%(frac[i][0],frac[i][1]), ha='center',
va='bottom', rotation='vertical')
gca().set_ylim([0,1])
plt.title('Average Accuracy vs Time (for an individual video)')
savefig(filename, format="png")
# plt.show()
def calculate_statistics(results):
acc_time(results)
confusion_matrix(results)
time_class(results)
time_attempt(results)
acc_user(results)
if __name__ == "__main__":
import json
with open('results.js', 'r') as f:
results = json.loads(f.read())
calculate_statistics(results)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment