-
-
Save bwhite/1665440 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
def filter_by_completed(users, results, num_tasks=100): | |
new_users = {} | |
for user_id, user_data in users.items(): | |
assert user_data.get('tasks_finished', 0) <= num_tasks | |
if user_data.get('tasks_finished', 0) == num_tasks: | |
new_users[user_id] = user_data | |
new_results = dict((result_id, result_data) | |
for result_id, result_data in results.items() if result_data['user_id'] in new_users) | |
print('filter_by_completed: Users[%d,%d] Results[%d,%d]' % (len(users), len(new_users), len(results), len(new_results))) | |
return new_users, new_results | |
def main(): | |
users = json.load(open('users.js')) | |
results = json.load(open('results.js')) | |
users, results = filter_by_completed(users, results) | |
json.dump(users, open('users.js', 'w')) | |
json.dump(results, open('results.js', 'w')) | |
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import matplotlib.pyplot as plt | |
from pylab import * | |
from collections import defaultdict | |
def confusion_matrix(results, filename='confmat.png', N=16): | |
# calculate the confusion matrix | |
conf_mat = np.zeros( (N,N + 1) ) | |
for k,v in results.items(): | |
event_id = lambda event: int(event[1:]) | |
x = event_id(v['event']) if v.has_key('event') else 16 | |
y = event_id(v['user_event']) if v.has_key('user_event') else 16 | |
# if x==16 or y==16: continue | |
conf_mat[x][y] += 1 | |
# normalize the confusion matrix | |
norm_conf = [map(lambda x: float(x)/sum(i), i) for i in conf_mat] | |
# draw the confusion matrix | |
fig = plt.figure() | |
ax = fig.add_subplot(1,1,1) | |
ax.set_yticks(np.arange(N)) | |
ax.set_yticklabels(['E0%02d' % i for i in range(N)]) | |
ax.set_xticks(np.arange(N + 1)) | |
ax.set_xticklabels(['E0%02d' % i for i in range(N)] + ['skip'], rotation='vertical') | |
res = ax.imshow(array(norm_conf), interpolation='nearest') | |
cb = fig.colorbar(res) | |
# number annotation | |
for i, cas in enumerate(norm_conf): | |
for j, c in enumerate(cas): | |
if c>0: | |
plt.text(j-.2, i+.2, int(round(c * 100)), fontsize=12) | |
plt.title('Event Confusion matrix') | |
savefig(filename, format="png") | |
# plt.show() | |
def time_attempt(results, filename='time_attempt.png'): | |
U = defaultdict(list) | |
for k, v in results.items(): | |
if 'end_time' in v: | |
U[v['user_id']].append((v['start_time'], v['end_time'] - v['start_time'])) | |
for k, v in U.items(): | |
v.sort() | |
times = defaultdict(list) | |
for vs in U.values(): | |
for attempt_num, (start_time, diff) in enumerate(vs): | |
times[attempt_num].append(diff) | |
median_time = [np.median(vs) for attempt_num, vs in sorted(times.items())] | |
clf() | |
plot(median_time) | |
xlabel('number of attempts') | |
ylabel('time taken (s)') | |
plt.title('Median response time (over all users) as a function of # of attempts') | |
savefig(filename, format='png') | |
# show() | |
def acc_user(results, filename='acc_user.png'): | |
U = {} | |
for k, v in results.items(): | |
if v.has_key('user_event'): | |
U[v['user_id']] = [0, 0] | |
for k, v in results.items(): | |
if v.has_key('end_time'): | |
U[v['user_id']][1] += 1 | |
if v['event'] == v['user_event']: | |
U[v['user_id']][0] += 1 | |
x = U.keys() | |
y = [ float(v[0]) / v[1] for _, v in U.items()] | |
x, y = zip(*sorted(zip(x, y), key=lambda x: x[1])) | |
# draw bar chart | |
fig = plt.figure() | |
ax = fig.add_subplot(1,1,1) | |
width = .5; | |
ind = np.arange(len(y)) | |
rects1 = ax.bar(np.arange(len(y)), y, width) | |
ax.set_xticks(ind+width*.5) | |
ax.set_xticklabels( x, rotation='vertical') | |
plt.title('Average User Accuracy (over all videos)') | |
savefig(filename, format='png') | |
# plt.show() | |
def time_class(results, attr='user_id', filename='time_attr.png'): | |
filename = 'time_%s.png'% attr | |
def group(results, attr='event'): | |
U = defaultdict(list) | |
for _, v in results.items(): | |
U[v[attr]].append(v) | |
return U | |
U = group(results,attr) | |
avg_time = [] # average time | |
keys = [] | |
for k,d in U.items(): | |
t = 0 | |
cnt = 0 | |
for v in d: | |
if v.has_key('end_time'): | |
t += v['end_time'] - v['start_time'] | |
cnt += 1 | |
if cnt > 0: | |
avg_time.append(float(t) / cnt) # average time taken | |
keys.append(k) | |
keys, avg_time = zip(*sorted(zip(keys,avg_time), key=lambda x: x[1])) | |
fig = plt.figure() | |
ax = fig.add_subplot(1,1,1) | |
ind = np.arange(len(avg_time)) | |
width = 0.5 | |
ax.bar(ind, avg_time, width) | |
ax.set_ylabel('average time taken (s)') | |
ax.set_xlabel(attr) | |
ax.set_xticks(ind+width) | |
ax.set_xticklabels(keys, rotation='vertical') | |
plt.title('Average User Time (over all videos)') | |
savefig(filename, format="png") | |
# plt.show() | |
def acc_time(results, filename='acc_time.png'): | |
# group time interval... | |
def group_time(results): | |
# 5 seconds interval... | |
M = 15 | |
U = [ [] for i in range(M) ] | |
for _, v in results.items(): | |
if v.has_key('end_time'): | |
if v['end_time']==v['start_time']: | |
print v['start_time'], v['end_time'], v['user_id'] | |
U[min(int((v['end_time'] - v['start_time']) / 5), len(U)-1)].append(v) | |
keys = ['%d-%d'%(i*5,(i+1)*5) for i in range(M-1)] | |
keys.append('>= %d' % (M*5)) | |
return (keys, U) | |
(keys,U) = group_time(results) | |
acc = [] | |
frac = [] | |
for d in U: | |
c = cnt = 0 | |
for v in d: | |
c += 1 if v['event']==v['user_event'] else 0 | |
cnt += 1 | |
acc.append(float(c) / max(cnt,1)) | |
frac.append( (c,cnt) ) | |
fig = plt.figure(figsize=(10,9)) | |
ax = fig.add_subplot(1,1,1) | |
ind = np.arange(len(acc)) | |
width = 1 | |
rects = ax.bar(ind, acc, width) | |
ax.set_ylabel('average accuracy') | |
ax.set_xlabel('time taken (s)') | |
ax.set_xticks(ind+width*.5) | |
ax.set_xticklabels(keys, rotation='vertical') | |
for i in range(len(rects)): | |
height = rects[i].get_height() | |
ax.text(rects[i].get_x()+rects[i].get_width()/2., 1.05*(height+0.01), | |
'%d / %d'%(frac[i][0],frac[i][1]), ha='center', | |
va='bottom', rotation='vertical') | |
gca().set_ylim([0,1]) | |
plt.title('Average Accuracy vs Time (for an individual video)') | |
savefig(filename, format="png") | |
# plt.show() | |
def calculate_statistics(results): | |
acc_time(results) | |
confusion_matrix(results) | |
time_class(results) | |
time_attempt(results) | |
acc_user(results) | |
if __name__ == "__main__": | |
import json | |
with open('results.js', 'r') as f: | |
results = json.loads(f.read()) | |
calculate_statistics(results) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment