Last active
March 29, 2025 08:39
-
-
Save dacr/0943af77e448644b5cf6a4917b04df26 to your computer and use it in GitHub Desktop.
postal code opendata data sources. / published by https://github.com/dacr/code-examples-manager #095ffa72-b4d9-4f3d-85b2-b3e69a302ac4/7c7a80c72417661d07db06c3777ce9e1ced08535
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// summary : postal code opendata data sources. | |
// keywords : scala, opendata, data-analysis, requests, postal-codes, @testable | |
// publish : gist | |
// authors : David Crosson | |
// license : Apache NON-AI License Version 2.0 (https://raw.githubusercontent.com/non-ai-licenses/non-ai-licenses/main/NON-AI-APACHE2) | |
// id : 095ffa72-b4d9-4f3d-85b2-b3e69a302ac4 | |
// created-on : 2020-10-10T16:21:18Z | |
// managed-by : https://github.com/dacr/code-examples-manager | |
// run-with : scala-cli $file | |
// --------------------- | |
//> using scala 3.6.4 | |
//> using dep com.lihaoyi::requests:0.9.0 | |
//> using dep com.lihaoyi::os-lib:0.11.4 | |
// --------------------- | |
val openDataPostalCodesHome = "https://www.data.gouv.fr/fr/datasets/base-officielle-des-codes-postaux/" | |
//val openDataPostalCodesDataSourceURI = "https://www.data.gouv.fr/fr/datasets/r/3062548d-f510-4ded-ba38-a64126a5331b" | |
val openDataPostalCodesDataSourceURI = "https://datanova.laposte.fr/data-fair/api/v1/datasets/laposte-hexasmal/metadata-attachments/base-officielle-codes-postaux.csv" | |
val cachedResponseFile = os.pwd / "base-officielle-codes-postaux.csv" | |
// --------------------------------------------------------------------------------------------------------------------- | |
case class Point( | |
latitude: Double, | |
longitude: Double | |
) | |
case class PostalCode( | |
townCode: String, | |
townName: String, | |
postalCode: String, | |
secondaryTownName: Option[String], | |
deliveryLabel: Option[String], | |
gps: Option[Point] | |
) { | |
val countyCode = townCode.take(if (townCode.startsWith("97")) 3 else 2) | |
} | |
def stringToGPS(input: String): Option[Point] = { | |
input.split(",").map(_.trim) match { | |
case Array(latitude, longitude) => | |
for { | |
lat <- latitude.toDoubleOption | |
lon <- longitude.toDoubleOption | |
} yield Point(lat, lon) | |
case _ => None | |
} | |
} | |
def stringToPostalCode(input: String): Option[PostalCode] = { | |
input.trim // with some basic hack parsing to support both format, opendata and laposte ones | |
.replaceAll("\",\"", ";") | |
.replaceAll("^\"(.*)\"$", "$1") | |
.replaceAll("\",,\"", ";;") | |
.replaceAll("\",,", ";;") | |
.split(";") match { | |
case Array(townCode, townName, postalCode, "", deliveryLabel, position) => | |
Option(PostalCode(townCode, townName, postalCode, None, Some(deliveryLabel).filter(_.trim.nonEmpty), stringToGPS(position))) | |
case Array(townCode, townName, postalCode, secondaryTownName, deliveryLabel, position) => | |
Option(PostalCode(townCode, townName, postalCode, Some(secondaryTownName).filter(_.trim.nonEmpty), Some(deliveryLabel).filter(_.trim.nonEmpty), stringToGPS(position))) | |
case Array(townCode, townName, postalCode, secondaryTownName, deliveryLabel) => | |
Option(PostalCode(townCode, townName, postalCode, Some(secondaryTownName).filter(_.trim.nonEmpty), Some(deliveryLabel).filter(_.trim.nonEmpty), None)) | |
case Array(townCode, townName, postalCode, secondaryTownName) => | |
Option(PostalCode(townCode, townName, postalCode, Some(secondaryTownName).filter(_.trim.nonEmpty), None, None)) | |
case data => | |
println("Unmanaged input : " + data.mkString(";")) | |
None | |
} | |
} | |
val postalCodes = { | |
val response = | |
if (os.exists(cachedResponseFile)) os.read(cachedResponseFile).split("\n").toVector | |
else { | |
val data = requests.get(openDataPostalCodesDataSourceURI) | |
os.write(cachedResponseFile, data) | |
data.lines() | |
} | |
response | |
.drop(1) // first line == the CSV labels | |
.flatMap(stringToPostalCode) | |
} | |
val townByCounty = postalCodes.toList.groupMap(_.countyCode)(_.townName) | |
val longestTownName = postalCodes.maxByOption(_.townName.count(_.isLetter)) | |
val shortestTownName = postalCodes.minByOption(_.townName.count(_.isLetter)) | |
val countyWithMostTowns = townByCounty.maxByOption { case (countyCode, towns) => towns.size }.map { case (countyCode, towns) => countyCode -> towns.size } | |
postalCodes | |
.sortBy(_.townCode) | |
.foreach(println) | |
println( | |
s"""postalCodeCount : ${postalCodes.size} | |
|longestTownName : ${longestTownName} | |
|shortestTownName : ${shortestTownName} | |
|countyWithMostTowns : ${countyWithMostTowns} | |
|""".stripMargin | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment