pbstark · September 17, 2016 21:36 · nealmcb · Jan 21, 2019
diff --git a/aus_16_audit.py b/aus_16_audit.py
 # Calculate various hypotheticals for the 2016 Australian Senatorial election, for Tasmanian results.
 # P.B. Stark, 17 September 2016

 from __future__ import division, print_function
 import math
 import numpy as np
 import scipy as sp
 from scipy import stats  # distributions
 from scipy import special # special functions
 from scipy import random # random variables, distributions, etc.
 from scipy.optimize import brentq
 from scipy.stats import (binom, hypergeom)
 import permute.utils

 cl = 0.95  # confidence level for these calculations
 alpha = 1-cl # corresponding risk limit

 valid_votes = 339159   # total votes in Tasmania
 invalid_votes = 12221  # invalid votes, blank ballots, etc.
 ballots = valid_votes + invalid_votes # total ballots

 marginv = 141 # votes separating runner-up from getting a seat
 margindv = marginv/ballots  # "diluted" margin in votes 
                            # (i.e., if an error can decrease the margin by 1 or 2)

 marginb = 71  # minimum number ballots that would need to have errors to alter the outcome
 margindb = marginb/ballots  # "diluted" margin in ballots

 print('upper bound on the diluted margin in ballots:', margindb)
 print('upper bound on the diluted margin in votes:', margindv)

 # Measured risk based on observing no errors.
 # for sample of size n, chance of observing no errors if the true error rate is large
 # enough to change the outcome is (1-marginp)^n

 smallest_sample = 1000
 largest_sample = 16000
 sample_increment = 1000

 print('sample size, measured risk if no errors are observed in the sample:')
 for n in np.arange(smallest_sample, largest_sample+1, sample_increment):
    print(n, (1-margindb)**n)

 # upper bounds on the error rate for various sample sizes
 x = 0

 print('sample size, upper 95% confidence bound for error rate if no errors are observed in the sample:')
 for n in np.arange(smallest_sample, largest_sample+1, sample_increment):
    print(n, permute.utils.binom_conf_interval(n, x, cl=cl, alternative="upper"))
    
 # Suppose that x errors are observed in a sample of 2500 ballots.
 # lower confidence bounds on population error rate for various k

 print('errors observed in a sample of 2500 ballots, lower 95% confidence bound on error rate:')
 n = 2500
 for x in np.arange(1, 11):
    print(x, permute.utils.binom_conf_interval(n, x, cl=cl, alternative="lower")) 
    
 # Initial sample size for a risk-limiting audit, assuming the true error rates are zero

 def minSampleSize(ballots, u, alpha=0.05, gamma=0.95):
    '''
    find smallest sample size for risk-limit alpha, using cushion gamma \in (0,1)
    1/alpha = (gamma/(1-1/(ballots*u))+1-gamma)**n
    Input: 
        ballots: number of ballots cast in the contest
        u:       upper bound on overstatement per ballot
        gamma:   hedge against finding a ballot that attains the upper bound. Larger values give
                 less protection
        alpha:   risk limit
    '''
    return math.ceil(math.log(1.0/alpha) / math.log(gamma/(1.0-1.0/(ballots*u)) + 1.0 - gamma))
    
 print('Initial sample size for RLA with risk limit', alpha, ':', \
      minSampleSize(ballots, 2/marginv, alpha=alpha))
	# Calculate various hypotheticals for the 2016 Australian Senatorial election, for Tasmanian results.
	# P.B. Stark, 17 September 2016

	from __future__ import division, print_function
	import math
	import numpy as np
	import scipy as sp
	from scipy import stats # distributions
	from scipy import special # special functions
	from scipy import random # random variables, distributions, etc.
	from scipy.optimize import brentq
	from scipy.stats import (binom, hypergeom)
	import permute.utils

	cl = 0.95 # confidence level for these calculations
	alpha = 1-cl # corresponding risk limit

	valid_votes = 339159 # total votes in Tasmania
	invalid_votes = 12221 # invalid votes, blank ballots, etc.
	ballots = valid_votes + invalid_votes # total ballots

	marginv = 141 # votes separating runner-up from getting a seat
	margindv = marginv/ballots # "diluted" margin in votes
	# (i.e., if an error can decrease the margin by 1 or 2)

	marginb = 71 # minimum number ballots that would need to have errors to alter the outcome
	margindb = marginb/ballots # "diluted" margin in ballots

	print('upper bound on the diluted margin in ballots:', margindb)
	print('upper bound on the diluted margin in votes:', margindv)

	# Measured risk based on observing no errors.
	# for sample of size n, chance of observing no errors if the true error rate is large
	# enough to change the outcome is (1-marginp)^n

	smallest_sample = 1000
	largest_sample = 16000
	sample_increment = 1000

	print('sample size, measured risk if no errors are observed in the sample:')
	for n in np.arange(smallest_sample, largest_sample+1, sample_increment):
	print(n, (1-margindb)**n)

	# upper bounds on the error rate for various sample sizes
	x = 0

	print('sample size, upper 95% confidence bound for error rate if no errors are observed in the sample:')
	for n in np.arange(smallest_sample, largest_sample+1, sample_increment):
	print(n, permute.utils.binom_conf_interval(n, x, cl=cl, alternative="upper"))

	# Suppose that x errors are observed in a sample of 2500 ballots.
	# lower confidence bounds on population error rate for various k

	print('errors observed in a sample of 2500 ballots, lower 95% confidence bound on error rate:')
	n = 2500
	for x in np.arange(1, 11):
	print(x, permute.utils.binom_conf_interval(n, x, cl=cl, alternative="lower"))

	# Initial sample size for a risk-limiting audit, assuming the true error rates are zero

	def minSampleSize(ballots, u, alpha=0.05, gamma=0.95):
	'''
	find smallest sample size for risk-limit alpha, using cushion gamma \in (0,1)
	1/alpha = (gamma/(1-1/(ballotsu))+1-gamma)*n
	Input:
	ballots: number of ballots cast in the contest
	u: upper bound on overstatement per ballot
	gamma: hedge against finding a ballot that attains the upper bound. Larger values give
	less protection
	alpha: risk limit
	'''
	return math.ceil(math.log(1.0/alpha) / math.log(gamma/(1.0-1.0/(ballots*u)) + 1.0 - gamma))

	print('Initial sample size for RLA with risk limit', alpha, ':', \
	minSampleSize(ballots, 2/marginv, alpha=alpha))