Created
September 17, 2016 21:36
-
-
Save pbstark/58653bbc26f269d4588ea7cd5b2e12bf to your computer and use it in GitHub Desktop.
calculations for hypothetical scenarios in auditing the 2106 Australian senatorial election, Tasmanian portion
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Calculate various hypotheticals for the 2016 Australian Senatorial election, for Tasmanian results. | |
# P.B. Stark, 17 September 2016 | |
from __future__ import division, print_function | |
import math | |
import numpy as np | |
import scipy as sp | |
from scipy import stats # distributions | |
from scipy import special # special functions | |
from scipy import random # random variables, distributions, etc. | |
from scipy.optimize import brentq | |
from scipy.stats import (binom, hypergeom) | |
import permute.utils | |
cl = 0.95 # confidence level for these calculations | |
alpha = 1-cl # corresponding risk limit | |
valid_votes = 339159 # total votes in Tasmania | |
invalid_votes = 12221 # invalid votes, blank ballots, etc. | |
ballots = valid_votes + invalid_votes # total ballots | |
marginv = 141 # votes separating runner-up from getting a seat | |
margindv = marginv/ballots # "diluted" margin in votes | |
# (i.e., if an error can decrease the margin by 1 or 2) | |
marginb = 71 # minimum number ballots that would need to have errors to alter the outcome | |
margindb = marginb/ballots # "diluted" margin in ballots | |
print('upper bound on the diluted margin in ballots:', margindb) | |
print('upper bound on the diluted margin in votes:', margindv) | |
# Measured risk based on observing no errors. | |
# for sample of size n, chance of observing no errors if the true error rate is large | |
# enough to change the outcome is (1-marginp)^n | |
smallest_sample = 1000 | |
largest_sample = 16000 | |
sample_increment = 1000 | |
print('sample size, measured risk if no errors are observed in the sample:') | |
for n in np.arange(smallest_sample, largest_sample+1, sample_increment): | |
print(n, (1-margindb)**n) | |
# upper bounds on the error rate for various sample sizes | |
x = 0 | |
print('sample size, upper 95% confidence bound for error rate if no errors are observed in the sample:') | |
for n in np.arange(smallest_sample, largest_sample+1, sample_increment): | |
print(n, permute.utils.binom_conf_interval(n, x, cl=cl, alternative="upper")) | |
# Suppose that x errors are observed in a sample of 2500 ballots. | |
# lower confidence bounds on population error rate for various k | |
print('errors observed in a sample of 2500 ballots, lower 95% confidence bound on error rate:') | |
n = 2500 | |
for x in np.arange(1, 11): | |
print(x, permute.utils.binom_conf_interval(n, x, cl=cl, alternative="lower")) | |
# Initial sample size for a risk-limiting audit, assuming the true error rates are zero | |
def minSampleSize(ballots, u, alpha=0.05, gamma=0.95): | |
''' | |
find smallest sample size for risk-limit alpha, using cushion gamma \in (0,1) | |
1/alpha = (gamma/(1-1/(ballots*u))+1-gamma)**n | |
Input: | |
ballots: number of ballots cast in the contest | |
u: upper bound on overstatement per ballot | |
gamma: hedge against finding a ballot that attains the upper bound. Larger values give | |
less protection | |
alpha: risk limit | |
''' | |
return math.ceil(math.log(1.0/alpha) / math.log(gamma/(1.0-1.0/(ballots*u)) + 1.0 - gamma)) | |
print('Initial sample size for RLA with risk limit', alpha, ':', \ | |
minSampleSize(ballots, 2/marginv, alpha=alpha)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Thanks for this code!
Running it in I get
ImportError: No module named permute.utils
. The Python code for the permute module seems to be described in http://statlab.github.io/permute/permute.pdfIs the code available somewhere? Or at least the
permute.utils.binom_conf_interval()
method?Also, is it written for Python 3, Python 2, or both?