Last active
November 2, 2020 16:18
-
-
Save mzandvliet/1e1ea63ebcbbcc65577c64a845d49664 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import math | |
import random; | |
from sympy import * | |
''' | |
=== Toying with Benford's law === | |
https://en.wikipedia.org/wiki/Benford's_law | |
Introductory video by Numberphile: www.youtube.com/watch?v=XXjlR2OK1kM | |
Some example datasets where Benford's law holds: https://testingbenfordslaw.com | |
''' | |
def benford(): | |
''' | |
Imagine we have 60 students, each with pen, paper and dice. | |
Each student rolls a 6-sided die a given number of times. | |
They multiply each of their rolls together, for example: | |
product of dice rolls = 1 * 5 * 4 * 3 * 4 * 3 * 6 * ... * 4 = 472 | |
When they are done we have a list of 60 large numbers | |
The leading digit of any number can be any of [0 ... 9], and so we | |
count how many times 1 is the leading number, 2 is the leading number, | |
and so on. For example, in the list: | |
[ | |
472, | |
102, | |
11, | |
1, | |
30001, | |
170 | |
] | |
we have 4 numbers leading with 1 | |
we have 1 number leading with 3 | |
we have 1 number leading with 4 | |
So in this list 4/6 * 100 = 60% of the numbers have 1 as the leading digit | |
When I run this program I get: | |
0 : 0.0 percent | |
1 : 23.333333333333332 percent | |
2 : 20.0 percent | |
3 : 13.333333333333334 percent | |
4 : 16.666666666666664 percent | |
5 : 8.333333333333332 percent | |
6 : 8.333333333333332 percent | |
7 : 5.0 percent | |
8 : 1.6666666666666667 percent | |
9 : 3.3333333333333335 percent | |
When I run this program with a larger amount of students and dice rolls, I get: | |
0 : 0.0 percent | |
1 : 33.33333333333333 percent | |
2 : 18.333333333333332 percent | |
3 : 11.666666666666666 percent | |
4 : 10.0 percent | |
5 : 11.666666666666666 percent | |
6 : 10.0 percent | |
7 : 1.6666666666666667 percent | |
8 : 1.6666666666666667 percent | |
9 : 1.6666666666666667 percent | |
''' | |
numStudents = 60 | |
numRolls = 15 | |
# Calculate the maximum amount of digits needed to write | |
# the products we'll get. | |
maxDigits = math.ceil(math.log(6 ** numRolls, 10)) | |
print("max digits: %s" % (maxDigits)) | |
# We will track for each possible leading digits how many | |
# times we see it, and store it in this array | |
digitCounts = [0] * 10 | |
# Each student... | |
for student in range(numStudents): | |
# Rolls their dice and multiplies the results of the rolls together | |
product = 1 | |
for roll in range(numRolls): | |
value = random.randint(1, 6) | |
product *= value | |
# And we keep track of wich value the leading digit of the product had | |
digitCounts[first_digit_value(product, maxDigits)] += 1 | |
# Print the results in the console | |
for i in range(0, 10): | |
print("%s : %s percent"%(i, digitCounts[i] / numStudents * 100.0)) | |
def is_first_digit_one(number, maxDigits): | |
''' | |
Returns True if the leading digit of number is 1, False if not. | |
Parameters: | |
number (int): A decimal integer | |
maxDigits (int): A natural number indicating the maximum amount of digits to check | |
''' | |
# Run through all decimal digits from lowest to highest, keep track of which | |
# last non-zero number was seen | |
result = False | |
for d in range(maxDigits): | |
digit = get_digit(number, d) | |
if (digit == 1): | |
result = True | |
elif (digit > 1): | |
result = False | |
return result | |
def first_digit_value(number, maxDigits): | |
''' | |
Returns the value of the leading non-zero digit in number | |
Parameters: | |
number (int): A decimal integer | |
maxDigits (int): A natural number indicating the maximum amount of digits to check | |
''' | |
digit = 0 | |
for d in range(maxDigits): | |
digitValue = get_digit(number, d) | |
if (digitValue > 0): | |
digit = digitValue | |
return digit | |
def first_digit(number, maxDigits): | |
''' | |
Returns the index (position) of the leading non-zero digit in number | |
Parameters: | |
number (int): A decimal integer | |
maxDigits (int): A natural number indicating the maximum amount of digits to check | |
''' | |
digit = 0 | |
for d in range(maxDigits): | |
digitValue = get_digit(number, d) | |
if (digitValue > 0): | |
digit = d | |
return digit | |
def get_digit(number, n): | |
''' | |
Returns the value of the n'th digit in number | |
Parameters: | |
number (int): A decimal integer | |
n (int): A natural number indicating the index of the digit in number to check | |
''' | |
return number // 10**n % 10 | |
def benford_estimate(): | |
# Benford's function for estimating the prevalence of leading digit | |
base = 10 # base 10, decimal | |
a = symbols('a') | |
benf = log((a+1)/a, base) | |
for i in range(base): | |
print("%s = %s"%(i, benf.evalf(subs={a: 1}))) | |
def main(): | |
init_printing(pretty_print=True, use_unicode=True, num_columns=180) | |
benford() | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment