tinybike · September 20, 2015 09:11
diff --git a/report-stats.py b/report-stats.py
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-

 from __future__ import division
 import numpy as np

 np.set_printoptions(linewidth=120,
                    suppress=True,
                    formatter={"float": "{: 0.6f}".format})

 NUM_TRIALS = 5000
 NUM_EVENTS = 2500
 NUM_REPORTERS = 5000
 TOTAL_REP = 11000000
 REPORTS_PER_EVENT = 20
 REPORTS_PER_TOKEN = REPORTS_PER_EVENT * NUM_EVENTS / TOTAL_REP
 DEBUG = False

 def initiate(numEvents, numReporters):
    # need a bunch of random events w/ random volumes
    listVol = np.random.randint(1, 100000, size=numEvents)
    volFrac = listVol / np.sum(listVol)

    # need a bunch of reporters w/ random rep #s
    repFrac = np.random.random(numReporters)
    repFrac /= np.sum(repFrac)
    reporterList = repFrac * TOTAL_REP
    eventsExpected = REPORTS_PER_TOKEN * reporterList
    eventsExpected[eventsExpected > numEvents] = numEvents
    eventsActually = np.zeros(numReporters)
    for n in range(0, numReporters):
        # threshold = 0.5/(1 + np.exp(-10*volFrac)) * reporterList[n] * REPORTS_PER_TOKEN
        threshold = np.sqrt(volFrac) * reporterList[n] * REPORTS_PER_TOKEN
        eventsActually[n] = np.sum(np.random.random(numEvents) < threshold)
        if DEBUG:
            print n, '|', np.round(repFrac[n], 6), '\t', threshold, '\t', eventsActually[n]

    difference = eventsExpected - eventsActually
    difference[difference < 0] = 0
    percentError = (eventsExpected - eventsActually) / eventsExpected * 100
    percentError[percentError < 0] = 0
    worst = np.argmax(difference)
    adjusted = [min(eventsExpected[n], eventsActually[n]) for n in range(0, numReporters)]
    numReportsError = np.abs(numEvents*REPORTS_PER_EVENT - np.sum(adjusted)) / numEvents / REPORTS_PER_EVENT * 100
    smallestIndex = np.argmin(listVol)
    smallest = listVol[smallestIndex]
    actualSmallest = adjusted[smallestIndex]
    errorSmallest = np.abs(eventsExpected[smallestIndex] - adjusted[smallestIndex]) / eventsExpected[smallestIndex]

    if DEBUG:
        print
        print "Expected reports per token:", REPORTS_PER_TOKEN
        print "Total expected reports:", REPORTS_PER_TOKEN * TOTAL_REP
        print
        print "Rep amounts:"
        print reporterList
        print
        print "Rep fractions:"
        print repFrac
        print
        print "Total rep:", TOTAL_REP
        print
        print "Expected:", eventsExpected
        print "Actually:", eventsActually
        print "Adjusted:", adjusted
        print
        print "Expected total:", np.sum(eventsExpected)
        print "Actual total:", np.sum(eventsActually)
        print "Adjusted total:", np.sum(adjusted)
        print
        print "Difference:", difference
        print "Percent error:", percentError
        print

    return(np.max(difference), actualSmallest, errorSmallest, eventsExpected[worst],
           eventsActually[worst], repFrac[worst], np.mean(difference),
           np.mean(percentError), np.max(percentError), numReportsError)


 if __name__ == "__main__":
    # last is worst case difference between number of events
    # selected to report on and what we expect
    last = np.zeros(NUM_TRIALS)
    actualSmallest = np.zeros(NUM_TRIALS)
    errorSmallest = np.zeros(NUM_TRIALS)
    worstExpected = np.zeros(NUM_TRIALS)
    worstActually = np.zeros(NUM_TRIALS)
    worstFrac = np.zeros(NUM_TRIALS)
    meanAbsoluteDifference = np.zeros(NUM_TRIALS)
    meanPercentError = np.zeros(NUM_TRIALS)
    maxPercentError = np.zeros(NUM_TRIALS)
    numReportsError = np.zeros(NUM_TRIALS)

    for i in range(0, NUM_TRIALS):
        (last[i], actualSmallest[i], errorSmallest[i], worstExpected[i],
         worstActually[i], worstFrac[i], meanAbsoluteDifference[i],
         meanPercentError[i], maxPercentError[i], numReportsError[i]) = initiate(NUM_EVENTS, NUM_REPORTERS)

    print "Results for", NUM_TRIALS, "trials with", NUM_EVENTS, "events and", NUM_REPORTERS, "reporters (median +/- standard deviation):"
    print " - Total number of reports % error:", np.median(numReportsError), "+/-", np.std(numReportsError)
    print " - Mean absolute difference (expected - actual):", np.median(meanAbsoluteDifference), "+/-", np.std(meanAbsoluteDifference)
    print " - Mean percent error:", np.median(meanPercentError), "+/-", np.std(meanPercentError)
    print " - Reports on smallest-volume event (expected " + str(REPORTS_PER_EVENT) + "):", np.median(actualSmallest), "+/-", np.std(actualSmallest)
    print " - Errors on smallest-volume event:", np.median(errorSmallest), "+/-", np.std(errorSmallest)
    print
    print "Worst-case results:"
    print " - Maximum difference:", np.median(last), "+/-", np.std(last)
    print " - Maximum percent error:", np.median(meanPercentError), "+/-", np.std(meanPercentError)
    print " - Expected reports for last reporter:", np.median(worstExpected), "+/-", np.std(worstExpected)
    print " - Actual reports for last reporter:", np.median(worstActually), "+/-", np.std(worstActually)
    print " - Rep fraction owned by the last reporter:", np.median(worstFrac), "+/-", np.std(worstFrac)
	#!/usr/bin/env python
	# -- coding: utf-8 --

	from __future__ import division
	import numpy as np

	np.set_printoptions(linewidth=120,
	suppress=True,
	formatter={"float": "{: 0.6f}".format})

	NUM_TRIALS = 5000
	NUM_EVENTS = 2500
	NUM_REPORTERS = 5000
	TOTAL_REP = 11000000
	REPORTS_PER_EVENT = 20
	REPORTS_PER_TOKEN = REPORTS_PER_EVENT * NUM_EVENTS / TOTAL_REP
	DEBUG = False

	def initiate(numEvents, numReporters):
	# need a bunch of random events w/ random volumes
	listVol = np.random.randint(1, 100000, size=numEvents)
	volFrac = listVol / np.sum(listVol)

	# need a bunch of reporters w/ random rep #s
	repFrac = np.random.random(numReporters)
	repFrac /= np.sum(repFrac)
	reporterList = repFrac * TOTAL_REP
	eventsExpected = REPORTS_PER_TOKEN * reporterList
	eventsExpected[eventsExpected > numEvents] = numEvents
	eventsActually = np.zeros(numReporters)
	for n in range(0, numReporters):
	# threshold = 0.5/(1 + np.exp(-10volFrac)) reporterList[n] * REPORTS_PER_TOKEN
	threshold = np.sqrt(volFrac) * reporterList[n] * REPORTS_PER_TOKEN
	eventsActually[n] = np.sum(np.random.random(numEvents) < threshold)
	if DEBUG:
	print n, '\|', np.round(repFrac[n], 6), '\t', threshold, '\t', eventsActually[n]

	difference = eventsExpected - eventsActually
	difference[difference < 0] = 0
	percentError = (eventsExpected - eventsActually) / eventsExpected * 100
	percentError[percentError < 0] = 0
	worst = np.argmax(difference)
	adjusted = [min(eventsExpected[n], eventsActually[n]) for n in range(0, numReporters)]
	numReportsError = np.abs(numEventsREPORTS_PER_EVENT - np.sum(adjusted)) / numEvents / REPORTS_PER_EVENT 100
	smallestIndex = np.argmin(listVol)
	smallest = listVol[smallestIndex]
	actualSmallest = adjusted[smallestIndex]
	errorSmallest = np.abs(eventsExpected[smallestIndex] - adjusted[smallestIndex]) / eventsExpected[smallestIndex]

	if DEBUG:
	print
	print "Expected reports per token:", REPORTS_PER_TOKEN
	print "Total expected reports:", REPORTS_PER_TOKEN * TOTAL_REP
	print
	print "Rep amounts:"
	print reporterList
	print
	print "Rep fractions:"
	print repFrac
	print
	print "Total rep:", TOTAL_REP
	print
	print "Expected:", eventsExpected
	print "Actually:", eventsActually
	print "Adjusted:", adjusted
	print
	print "Expected total:", np.sum(eventsExpected)
	print "Actual total:", np.sum(eventsActually)
	print "Adjusted total:", np.sum(adjusted)
	print
	print "Difference:", difference
	print "Percent error:", percentError
	print

	return(np.max(difference), actualSmallest, errorSmallest, eventsExpected[worst],
	eventsActually[worst], repFrac[worst], np.mean(difference),
	np.mean(percentError), np.max(percentError), numReportsError)


	if __name__ == "__main__":
	# last is worst case difference between number of events
	# selected to report on and what we expect
	last = np.zeros(NUM_TRIALS)
	actualSmallest = np.zeros(NUM_TRIALS)
	errorSmallest = np.zeros(NUM_TRIALS)
	worstExpected = np.zeros(NUM_TRIALS)
	worstActually = np.zeros(NUM_TRIALS)
	worstFrac = np.zeros(NUM_TRIALS)
	meanAbsoluteDifference = np.zeros(NUM_TRIALS)
	meanPercentError = np.zeros(NUM_TRIALS)
	maxPercentError = np.zeros(NUM_TRIALS)
	numReportsError = np.zeros(NUM_TRIALS)

	for i in range(0, NUM_TRIALS):
	(last[i], actualSmallest[i], errorSmallest[i], worstExpected[i],
	worstActually[i], worstFrac[i], meanAbsoluteDifference[i],
	meanPercentError[i], maxPercentError[i], numReportsError[i]) = initiate(NUM_EVENTS, NUM_REPORTERS)

	print "Results for", NUM_TRIALS, "trials with", NUM_EVENTS, "events and", NUM_REPORTERS, "reporters (median +/- standard deviation):"
	print " - Total number of reports % error:", np.median(numReportsError), "+/-", np.std(numReportsError)
	print " - Mean absolute difference (expected - actual):", np.median(meanAbsoluteDifference), "+/-", np.std(meanAbsoluteDifference)
	print " - Mean percent error:", np.median(meanPercentError), "+/-", np.std(meanPercentError)
	print " - Reports on smallest-volume event (expected " + str(REPORTS_PER_EVENT) + "):", np.median(actualSmallest), "+/-", np.std(actualSmallest)
	print " - Errors on smallest-volume event:", np.median(errorSmallest), "+/-", np.std(errorSmallest)
	print
	print "Worst-case results:"
	print " - Maximum difference:", np.median(last), "+/-", np.std(last)
	print " - Maximum percent error:", np.median(meanPercentError), "+/-", np.std(meanPercentError)
	print " - Expected reports for last reporter:", np.median(worstExpected), "+/-", np.std(worstExpected)
	print " - Actual reports for last reporter:", np.median(worstActually), "+/-", np.std(worstActually)
	print " - Rep fraction owned by the last reporter:", np.median(worstFrac), "+/-", np.std(worstFrac)
No results found