Last active
February 3, 2026 20:22
-
-
Save dacr/0943af77e448644b5cf6a4917b04df26 to your computer and use it in GitHub Desktop.
postal code opendata data sources. / published by https://github.com/dacr/code-examples-manager #095ffa72-b4d9-4f3d-85b2-b3e69a302ac4/58d1825c7f66e58b15bd87c7876a59e05a8c6c60
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // summary : postal code opendata data sources. | |
| // keywords : scala, opendata, data-analysis, requests, postal-codes, @testable | |
| // publish : gist | |
| // authors : David Crosson | |
| // license : Apache License Version 2.0 (https://www.apache.org/licenses/LICENSE-2.0.txt) | |
| // id : 095ffa72-b4d9-4f3d-85b2-b3e69a302ac4 | |
| // created-on : 2020-10-10T16:21:18Z | |
| // managed-by : https://github.com/dacr/code-examples-manager | |
| // run-with : scala-cli $file | |
| // --------------------- | |
| //> using scala 3.6.4 | |
| //> using dep com.lihaoyi::requests:0.9.0 | |
| //> using dep com.lihaoyi::os-lib:0.11.4 | |
| // --------------------- | |
| val openDataPostalCodesHome = "https://www.data.gouv.fr/fr/datasets/base-officielle-des-codes-postaux/" | |
| //val openDataPostalCodesDataSourceURI = "https://www.data.gouv.fr/fr/datasets/r/3062548d-f510-4ded-ba38-a64126a5331b" | |
| val openDataPostalCodesDataSourceURI = "https://datanova.laposte.fr/data-fair/api/v1/datasets/laposte-hexasmal/metadata-attachments/base-officielle-codes-postaux.csv" | |
| val cachedResponseFile = os.pwd / "base-officielle-codes-postaux.csv" | |
| // --------------------------------------------------------------------------------------------------------------------- | |
| case class Point( | |
| latitude: Double, | |
| longitude: Double | |
| ) | |
| case class PostalCode( | |
| townCode: String, | |
| townName: String, | |
| postalCode: String, | |
| secondaryTownName: Option[String], | |
| deliveryLabel: Option[String], | |
| gps: Option[Point] | |
| ) { | |
| val countyCode = townCode.take(if (townCode.startsWith("97")) 3 else 2) | |
| } | |
| def stringToGPS(input: String): Option[Point] = { | |
| input.split(",").map(_.trim) match { | |
| case Array(latitude, longitude) => | |
| for { | |
| lat <- latitude.toDoubleOption | |
| lon <- longitude.toDoubleOption | |
| } yield Point(lat, lon) | |
| case _ => None | |
| } | |
| } | |
| def stringToPostalCode(input: String): Option[PostalCode] = { | |
| input.trim // with some basic hack parsing to support both format, opendata and laposte ones | |
| .replaceAll("\",\"", ";") | |
| .replaceAll("^\"(.*)\"$", "$1") | |
| .replaceAll("\",,\"", ";;") | |
| .replaceAll("\",,", ";;") | |
| .split(";") match { | |
| case Array(townCode, townName, postalCode, "", deliveryLabel, position) => | |
| Option(PostalCode(townCode, townName, postalCode, None, Some(deliveryLabel).filter(_.trim.nonEmpty), stringToGPS(position))) | |
| case Array(townCode, townName, postalCode, secondaryTownName, deliveryLabel, position) => | |
| Option(PostalCode(townCode, townName, postalCode, Some(secondaryTownName).filter(_.trim.nonEmpty), Some(deliveryLabel).filter(_.trim.nonEmpty), stringToGPS(position))) | |
| case Array(townCode, townName, postalCode, secondaryTownName, deliveryLabel) => | |
| Option(PostalCode(townCode, townName, postalCode, Some(secondaryTownName).filter(_.trim.nonEmpty), Some(deliveryLabel).filter(_.trim.nonEmpty), None)) | |
| case Array(townCode, townName, postalCode, secondaryTownName) => | |
| Option(PostalCode(townCode, townName, postalCode, Some(secondaryTownName).filter(_.trim.nonEmpty), None, None)) | |
| case data => | |
| println("Unmanaged input : " + data.mkString(";")) | |
| None | |
| } | |
| } | |
| val postalCodes = { | |
| val response = | |
| if (os.exists(cachedResponseFile)) os.read(cachedResponseFile).split("\n").toVector | |
| else { | |
| val data = requests.get(openDataPostalCodesDataSourceURI) | |
| os.write(cachedResponseFile, data) | |
| data.lines() | |
| } | |
| response | |
| .drop(1) // first line == the CSV labels | |
| .flatMap(stringToPostalCode) | |
| } | |
| val townByCounty = postalCodes.toList.groupMap(_.countyCode)(_.townName) | |
| val longestTownName = postalCodes.maxByOption(_.townName.count(_.isLetter)) | |
| val shortestTownName = postalCodes.minByOption(_.townName.count(_.isLetter)) | |
| val countyWithMostTowns = townByCounty.maxByOption { case (countyCode, towns) => towns.size }.map { case (countyCode, towns) => countyCode -> towns.size } | |
| postalCodes | |
| .sortBy(_.townCode) | |
| .foreach(println) | |
| println( | |
| s"""postalCodeCount : ${postalCodes.size} | |
| |longestTownName : ${longestTownName} | |
| |shortestTownName : ${shortestTownName} | |
| |countyWithMostTowns : ${countyWithMostTowns} | |
| |""".stripMargin | |
| ) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment