Last active
June 15, 2024 07:58
-
-
Save dacr/facaa509c582efc65efab05c7f7013f9 to your computer and use it in GitHub Desktop.
Benford law experiments / published by https://github.com/dacr/code-examples-manager #3a44812b-bbbd-4382-9fdb-a50269bdbe98/ee8598cbbe713506c3ad5ad11bb8b7ea59b164e
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// summary : Benford law experiments | |
// keywords : scala, math, benfordlaw, benford, @testable | |
// publish : gist | |
// authors : David Crosson | |
// license : Apache NON-AI License Version 2.0 (https://raw.githubusercontent.com/non-ai-licenses/non-ai-licenses/main/NON-AI-APACHE2) | |
// id : 3a44812b-bbbd-4382-9fdb-a50269bdbe98 | |
// created-on : 2020-12-06T08:41:25Z | |
// managed-by : https://github.com/dacr/code-examples-manager | |
// run-with : scala-cli $file | |
// --------------------- | |
//> using scala "3.4.2" | |
//> using dep "org.scalatest::scalatest:3.2.18" | |
//> using dep "com.lihaoyi::requests:0.8.2" | |
//> using objectWrapper | |
// --------------------- | |
/* | |
Benford low wikipedia : https://en.wikipedia.org/wiki/Benford%27s_law | |
*/ | |
import org.scalatest._,flatspec._,matchers._ | |
import scala.math._ | |
def benfordReferenceDistributionForDigit(digit:Int):Double = log10(1d+1d/digit) | |
def benfordReferenceDistribution():Iterable[(Int,Double)] = { | |
1.to(9).map(d => d->benfordReferenceDistributionForDigit(d)) | |
} | |
// the lower is better, 0 means fully compliant | |
def benfordConfidence(input:Iterable[String]):Double = { | |
??? | |
} | |
def figureFrequencies(input:Iterable[String], pos:Int=0):Map[Int,Double] = { | |
val digits = | |
input | |
.map(_.filter(ch => ch.isDigit && ch != '0')) | |
.flatMap(_.drop(pos).headOption) | |
val digitsCount = digits.size.toDouble | |
digits | |
.to(List) | |
.groupBy(ch => ch.toInt-48) | |
.view | |
.mapValues(chs => chs.length/digitsCount) | |
.toMap | |
} | |
class FigureFrequenciesTest extends AnyFlatSpec with should.Matchers { | |
override def suiteName: String = "FigureFrequenciesTest" | |
"Benford law compute function" should "return the right frequencies" in { | |
val f1 = figureFrequencies(1.to(9).map(_.toString)) | |
info("of course in that particular case, benford law is not verified") | |
f1.foreach{ case (ch, freq) => freq shouldBe 0.1d +- 0.02d} | |
} | |
"Benford law" should "" in { | |
val postalCodes = | |
requests | |
.get("https://www.data.gouv.fr/fr/datasets/r/79e327d3-b7af-479a-8e41-8a2e82b61c3a") | |
.lines() | |
.drop(1) // first line == the CSV labels | |
.map(_.split(";", 4)) | |
.filter(_.length == 4) | |
.map(_(2)) | |
val frequencies = figureFrequencies(postalCodes) | |
info("unsatisfied on france town postal codes") | |
frequencies.toList.sortBy{case (d,f)=> d}.foreach(f => info(f.toString)) | |
} | |
} | |
org.scalatest.tools.Runner.main(Array("-oDF", "-s", classOf[FigureFrequenciesTest].getName)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment