Last active
June 21, 2024 21:09
-
-
Save dacr/8464684e2bc8bcbeb3d9ca0e65109439 to your computer and use it in GitHub Desktop.
First try with open-data data sources with ssl certificate hacks / published by https://github.com/dacr/code-examples-manager #9c7bc7c6-f6b3-4559-b1e9-2a81b4fac66f/8ff4af7e3d72ec874877c78461f451fe3797926a
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// summary : First try with open-data data sources with ssl certificate hacks | |
// keywords : scala, opendata, data-analysis, akka, sttp, sslhack | |
// publish : gist | |
// authors : David Crosson | |
// license : Apache NON-AI License Version 2.0 (https://raw.githubusercontent.com/non-ai-licenses/non-ai-licenses/main/NON-AI-APACHE2) | |
// id : 9c7bc7c6-f6b3-4559-b1e9-2a81b4fac66f | |
// created-on : 2018-06-20T22:01:59Z | |
// managed-by : https://github.com/dacr/code-examples-manager | |
// execution : scala 2.12 ammonite script (http://ammonite.io/) - run as follow 'amm scriptname.sc' | |
import java.io.{ByteArrayInputStream, FileInputStream} | |
import java.security.{KeyStore, SecureRandom} | |
import $ivy.`com.softwaremill.sttp::core:1.2.1` | |
import $ivy.`com.softwaremill.sttp::akka-http-backend:1.2.1` | |
import $ivy.`com.typesafe.akka::akka-stream:2.5.13` | |
import com.softwaremill.sttp._ | |
import com.softwaremill.sttp.akkahttp._ | |
import akka.stream.scaladsl.Source | |
import akka.util.ByteString | |
import akka.http.scaladsl.{ConnectionContext, HttpsConnectionContext} | |
import com.typesafe.sslconfig.akka.AkkaSSLConfig | |
import javax.net.ssl._ | |
import java.security.cert.{Certificate, CertificateFactory, X509Certificate} | |
import scala.concurrent.Await | |
import scala.concurrent.duration._ | |
object TestThat { | |
def noCertificateCheckContext():HttpsConnectionContext = { | |
// NEVER DO THAT !!! JUST QUICK HACK BECAUSE OF www.data.gouv.fr/datanova.legroupe.laposte.fr certificate issues... | |
val trustfulSslContext: SSLContext = { | |
object NoCheckX509TrustManager extends X509TrustManager { | |
override def checkClientTrusted(chain: Array[X509Certificate], authType: String):Unit = {} | |
override def checkServerTrusted(chain: Array[X509Certificate], authType: String):Unit = {} | |
override def getAcceptedIssuers = Array[X509Certificate]() | |
} | |
val context = SSLContext.getInstance("TLS") | |
context.init(Array[KeyManager](), Array(NoCheckX509TrustManager), null) | |
context | |
} | |
ConnectionContext.https(trustfulSslContext) | |
} | |
// load the default keystore, and add missing certificates for Certinomis, better deal than previous function ! | |
def customCertificateCheck() = { | |
val certStore = KeyStore.getInstance(KeyStore.getDefaultType) | |
val fis = new FileInputStream(scala.util.Properties.javaHome+"/lib/security/cacerts") | |
certStore.load(fis, null) | |
//certStore.load(null, null) | |
val rootCA = // Certinomis-RootCA | |
"""-----BEGIN CERTIFICATE----- | |
|MIIFkjCCA3qgAwIBAgIBATANBgkqhkiG9w0BAQsFADBaMQswCQYDVQQGEwJGUjET | |
|MBEGA1UEChMKQ2VydGlub21pczEXMBUGA1UECxMOMDAwMiA0MzM5OTg5MDMxHTAb | |
|BgNVBAMTFENlcnRpbm9taXMgLSBSb290IENBMB4XDTEzMTAyMTA5MTcxOFoXDTMz | |
|MTAyMTA5MTcxOFowWjELMAkGA1UEBhMCRlIxEzARBgNVBAoTCkNlcnRpbm9taXMx | |
|FzAVBgNVBAsTDjAwMDIgNDMzOTk4OTAzMR0wGwYDVQQDExRDZXJ0aW5vbWlzIC0g | |
|Um9vdCBDQTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBANTMCQosP5L2 | |
|fxSeC5yaah1AMGT9qt8OHgZbn1CF6s2Nq0Nn3rD6foCWnoR4kkjW4znuzuRZWJfl | |
|LieY6pOod5tK8O90gC3rMB+12ceAnGInkYjwSond3IjmFPnVAy//ldu9n+ws+hQV | |
|WZUKxkd8aRi5pwP5ynapz8dvtF4F/u7BUrJ1Mofs7SlmO/NKFoL21prbcpjp3vDF | |
|TKWrteoB4owuZH9kb/2jJZOLyKIOSY008B/sWEUuNKqEUL3nskoTuLAPrjhdsKkb | |
|5nPJWqHZZkCqqU2mNAKthH6yI8H7KsZn9DS2sJVqM09xRLWtwHkziOC/7aOgFLSc | |
|CbAK42C++PhmiM1b8XcF4LVzbsF9Ri6OSyemzTUK/eVNfaoqoynHWmgE6OXWk6Ri | |
|wsXm9E/G+Z8ajYJJGYrKWUM66A0ywfRMEwNvbqY/kXPLynNvEiCL7sCCeN5LLsJJ | |
|wx3tFvYk9CcbXFcx3FXuqB5vbKziRcxXV4p1VxngtViZSTYxPDMBbRZKzbgqg4SG | |
|m/lg0h9tkQPTYKbVPZrdd5A9NaSfD171UkRpucC63M9933zZxKyGIjK8e2uR73r4 | |
|F2iw4lNVYC2vPsKD2NkJK/DAZNuHi5HMkesE/Xa0lZrmFAYb1TQdvtj/dBxThZng | |
|WVJKYe2InmtJiUZ+IFrZ50rlau7SZRFDAgMBAAGjYzBhMA4GA1UdDwEB/wQEAwIB | |
|BjAPBgNVHRMBAf8EBTADAQH/MB0GA1UdDgQWBBTvkUz1pcMw6C8I6tNxIqSSaHh0 | |
|2TAfBgNVHSMEGDAWgBTvkUz1pcMw6C8I6tNxIqSSaHh02TANBgkqhkiG9w0BAQsF | |
|AAOCAgEAfj1U2iJdGlg+O1QnurrMyOMaauo++RLrVl89UM7g6kgmJs95Vn6RHJk/ | |
|0KGRHCwPT5iVWVO90CLYiF2cN/z7ZMF4jIuaYAnq1fohX9B0ZedQxb8uuQsLrbWw | |
|F6YSjNRieOpWauwK0kDDPAUwPk2Ut59KA9N9J0u2/kTO+hkzGm2kQtHdzMjI1xZS | |
|g081lLMSVX3l4kLr5JyTCcBMWwerx20RoFAXlCOotQqSD7J6wWAsOMwaplv/8gzj | |
|qh8c3LigkyfeY+N/IZ865Z764BNqdeuWXGKRlI5nU7aJ+BIJy29SWwNyhlCVCNSN | |
|h4YVH5Uk2KRvms6knZtt0rJ2BobGVgjF6wnaNsIbW0G+YSrjcOa4pvi2WsS9Iff/ | |
|ql+hbHY5ZtbqTFXhADObE5hjyW/QASAJN1LnDE8+zbz1X5YnpyACleAu6AdBBR8V | |
|btaw5BngDwKTACdyxYvRVB9dSsNAl35VpnzBMwQUAR1JIGkLGZOdblgi90AMRgwj | |
|Y/M50n92Uaf0yKHxDHYiI0ZSKS3io0EHVmmY0gUJvGnHWmHNj4FgFU2A3ZDifcRQ | |
|8ow7bkrHxuaAKzyBvBGAFhAn1/DNP3nMcyrDflOR1m749fPH0FFNjkulW+YZFzvW | |
|gQncItzujrnEj1PhZ7szuIgVRs/taTX/dQ1G885x4cVrhkIGuUE= | |
|-----END CERTIFICATE-----""".stripMargin | |
val easyCA = // Certinomis-EasyCA.crt | |
"""-----BEGIN CERTIFICATE----- | |
|MIIGEjCCA/qgAwIBAgIUEFAa9YehyfjXD8x9cCJL9R/spn8wDQYJKoZIhvcNAQEL | |
|BQAwWjELMAkGA1UEBhMCRlIxEzARBgNVBAoTCkNlcnRpbm9taXMxFzAVBgNVBAsT | |
|DjAwMDIgNDMzOTk4OTAzMR0wGwYDVQQDExRDZXJ0aW5vbWlzIC0gUm9vdCBDQTAe | |
|Fw0xMzEwMjExMDEyNDRaFw0yMzEwMjExMDEyNDRaMFoxCzAJBgNVBAYTAkZSMRMw | |
|EQYDVQQKEwpDZXJ0aW5vbWlzMRcwFQYDVQQLEw4wMDAyIDQzMzk5ODkwMzEdMBsG | |
|A1UEAxMUQ2VydGlub21pcyAtIEVhc3kgQ0EwggIiMA0GCSqGSIb3DQEBAQUAA4IC | |
|DwAwggIKAoICAQCutAI/wFp8v2QIyg4fMVkhmykdDwYvlL6URQEGlFpCPtOWv1T9 | |
|v6ubzvHS4oQttB4y2m5Ta67Sena4rm7fQVtSNbJHkEhcYvz2bpIKbt5Cw/8G3tfq | |
|AExi1lPAsdg2ZjR/H9zPdtUm7U5AJT364ux+dImv8SuNjFwi1WnPftHHqc3ox5WN | |
|HDQOoYLkXojfl1otE928G8NWm5YnVOOzOsyZEN/9BPsCST9qS37LBMNFjo3EbL7H | |
|EMKtBMvMbGCH1X+kBTq+GPE7B4UNISqiMXWnhqSYdrs55b7b+esJUaPhC9eTpfMx | |
|bH+bvsPJjrpfSiaKrgkdo2HaiOEIs0glS2v+usqWUq7bCS35KXkM5NsZ0PMD/JYR | |
|t6Iq4e2v2WX5gntuKTadusF1EyPkNRnrssYatvQPmUZ8J//QwlUvUiEXQLuBkElb | |
|T4W/39co5IxWNW2HyTESGA6gownOpTUITuJr7+I9XKzRqI8qZNnBSwkW7YVtkJre | |
|nEKKze8SLGfYNSIC3SuDgjYHefHJUlYWiQvwOASiLsAbtBzt7nB71QOrctHk0Ku0 | |
|CUNJOqJd1kCO5LJEvghM1hjOHh/lBx0SGlXvsE88U0sF7tJ9UMDq8LZvswPH6rXZ | |
|PZBBsvrhYbLihrmvQn5Ykx99EOGu5mljYlPoelhUn3tlBkQZFFtl2zb7ZwIDAQAB | |
|o4HPMIHMMA4GA1UdDwEB/wQEAwIBBjAPBgNVHRMBAf8EBTADAQH/MB0GA1UdDgQW | |
|BBQsxeMgL6sKEdb3OtdRePRsj7EAWTARBgNVHSAECjAIMAYGBFUdIAAwVgYDVR0f | |
|BE8wTTBLoEmgR4ZFaHR0cDovL2NybC5pZ2MtZzMuY2VydGlub21pcy5jb20vUkFD | |
|SU5FX0czL2NybC9BQ19SYWNpbmVfRzMtY3JsLTEuY3JsMB8GA1UdIwQYMBaAFO+R | |
|TPWlwzDoLwjq03EipJJoeHTZMA0GCSqGSIb3DQEBCwUAA4ICAQA4YU33C2OoDs2a | |
|D9YNUpzdhmbi72MXJl0aKTnftnz987grxvA11KCEut2Ma4dXb1pfiFwBNC9wiLPg | |
|1E5mbCoPVAQA2X7bA0kNeIMCfZccTU7cX4a4KNIGUj0tPYc86Yp7+u0CnFcK+erK | |
|B6dEbPyKQqBs9hJ++2tV0gWZBi1RU4CHpv00bzA+R7s9SDct0c7Z496mLDpEW8gV | |
|Hur/OhMrGOiEh9TjnVW4xnTfGql4ney6tvOdec68LJEWKGocaLUzrHfevVQJ4tYx | |
|FjuEsBh9ikYG3D+YXud8+kB9nectgkpBD6VsFvkliQVA6pElE+CBy1CCHtr6hx4r | |
|BLln+t3lN3yLPhX0oqduvP6jdnwFXnIJdKSqcsl6bVENXlSZg4CgbhIECKRNYfoe | |
|Pq036vlThZETqm3b7rZpmrGJhMeO370eJmxn+RdRh00gs6p8G1zji7ju3pKtovGi | |
|4X2ipaL5AitYUxU8ewbX7HsaZDJ9aY+b+FZrUfCSM3l4FpLSO8hejZMChg/kyKa2 | |
|wlOAA3+8cgNsncBmAJrxN8KYYNR8uw6nckYIQ2c9WJuU/w3KTDBJTOCUfFTK4Emi | |
|ev8jaFR6avqZIZczMh1SnXa55AIAOoxKdw0zVZjC1mQvZ+8MC+E4E3QS32p3L/C/ | |
|5zJzhb/zFMxGQvLz4dmY9O8PmJOmgQ== | |
|-----END CERTIFICATE----- | |
|""".stripMargin | |
def loadCert(cert:String): Certificate = { | |
val rootCAInputStream = new ByteArrayInputStream(cert.getBytes) | |
CertificateFactory.getInstance("X.509").generateCertificate(rootCAInputStream) | |
} | |
certStore.setCertificateEntry("root ca", loadCert(rootCA)) | |
certStore.setCertificateEntry("easy ca", loadCert(easyCA)) | |
val certManagerFactory = TrustManagerFactory.getInstance("SunX509") | |
certManagerFactory.init(certStore) | |
val context = SSLContext.getInstance("TLS") | |
context.init(null, certManagerFactory.getTrustManagers, new SecureRandom) | |
ConnectionContext.https(context) | |
} | |
import scala.concurrent.ExecutionContext.Implicits.global | |
implicit val actorSystem = akka.actor.ActorSystem("MySystem") | |
implicit val sttpBackend = AkkaHttpBackend.usingActorSystem( | |
actorSystem = actorSystem, | |
customHttpsContext = Some(customCertificateCheck()) | |
) | |
// code postaux https://www.data.gouv.fr/fr/datasets/base-officielle-des-codes-postaux/ | |
val postalCodeURI = uri"https://www.data.gouv.fr/fr/datasets/r/554590ab-ae62-40ac-8353-ee75162c05ee" | |
// previous redirect to next | |
//val postalCodeURI = uri"https://datanova.legroupe.laposte.fr/explore/dataset/laposte_hexasmal/download/?format=csv&timezone=Europe/Berlin&use_labels_for_header=true" | |
case class PostalCode( | |
townCode: String, | |
townName: String, | |
postalCode: String, | |
deliveryLabel: String, | |
position: String | |
) { | |
val countyCode = townCode.take(if (townCode.startsWith("97")) 3 else 2) | |
} | |
object PostalCode { | |
def apply(args: Array[String]): PostalCode = { | |
args match { | |
case Array(townCode, townName, postalCode, deliveryLabel, _, position) => | |
PostalCode(townCode, townName, postalCode, deliveryLabel, position) | |
} | |
} | |
} | |
def postalCodesStream = { | |
sttp | |
.get(postalCodeURI) | |
.response(asString) | |
.send() | |
} | |
def postalCodesFuture = { | |
postalCodesStream | |
.map(_.unsafeBody) | |
.map(_.split("\n")) | |
.map(result => result.map(args => PostalCode(args.split(";")))) | |
} | |
val result = for { | |
postalCodes <- postalCodesFuture | |
} yield { | |
postalCodes.groupBy(_.countyCode) | |
/* scala 2.13 collections | |
val townByCounty = postalCodes.toList.groupMap(_.countyCode)(_.townName) | |
val longestTownName = postalCodes.maxByOption { | |
_.townName.count(_.isLetter) | |
}.map(_.townName) | |
val shortestTownName = postalCodes.minByOption { | |
_.townName.count(_.isLetter) | |
}.map(_.townName) | |
val countyWithMostTowns = townByCounty.maxByOption { case (countyCode, towns) => towns.size } | |
*/ | |
} | |
def andWait:Unit = { | |
Await.ready(result, 120.seconds) | |
for { | |
data <- result | |
(code, postalCodes) <- data.toList.sortBy{case (_,l) => -l.size} | |
townCount = postalCodes.size | |
} { | |
println(s"$code -> $townCount towns") | |
} | |
} | |
} | |
TestThat.andWait |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment