Last active
September 22, 2024 09:53
-
-
Save dacr/ed1264861cb0f0a82afd1bd45c272a92 to your computer and use it in GitHub Desktop.
check neo4j spatial features / published by https://github.com/dacr/code-examples-manager #6a213101-b3ef-4a3e-abbe-cb5c0bcde878/30d92a03d0cfd48f5d6b0b344425534c586c22a2
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// summary : check neo4j spatial features | |
// keywords : scala, neo4j, cypher, spatial, @testable | |
// publish : gist | |
// authors : David Crosson | |
// license : Apache NON-AI License Version 2.0 (https://raw.githubusercontent.com/non-ai-licenses/non-ai-licenses/main/NON-AI-APACHE2) | |
// id : 6a213101-b3ef-4a3e-abbe-cb5c0bcde878 | |
// created-on : 2024-06-20T14:18:48+02:00 | |
// managed-by : https://github.com/dacr/code-examples-manager | |
// run-with : scala-cli $file | |
// --------------------- | |
//> using scala 3.4.2 | |
//> using javaOpt -Xmx5g | |
//> using dep org.neo4j.test:neo4j-harness:5.22.0 | |
//> using dep org.neo4j.driver:neo4j-java-driver:5.22.0 | |
//> using dep org.wvlet.airframe::airframe-ulid:24.7.1 | |
//> using dep com.lihaoyi::requests:0.8.3 | |
//> using dep com.lihaoyi::os-lib:0.10.2 | |
// --------------------- | |
// ===================================================================================== | |
// see opendata-postalcodes.sc gist | |
//val openDataPostalCodesHome = "https://www.data.gouv.fr/fr/datasets/base-officielle-des-codes-postaux/" | |
//val openDataPostalCodesDataSourceURI = "https://www.data.gouv.fr/fr/datasets/r/3062548d-f510-4ded-ba38-a64126a5331b" | |
val postalCodeDataSourceURL = "https://datanova.laposte.fr/data-fair/api/v1/datasets/laposte-hexasmal/metadata-attachments/base-officielle-codes-postaux.csv" | |
val postalCodeFileCache = os.pwd / "base-officielle-codes-postaux.csv" | |
val departmentDataSourceURL = "https://www.data.gouv.fr/fr/datasets/r/70cef74f-70b1-495a-8500-c089229c0254" | |
val departmentFileCache = os.pwd / "departements-france.csv" | |
// --------------------------------------------------------------------------------------------------------------------- | |
case class Point( | |
latitude: Double, | |
longitude: Double | |
) | |
case class Country( | |
name: String | |
) { | |
val id = name.toLowerCase.replaceAll("[^a-z]", "") | |
} | |
case class Region( | |
code: String, | |
name: String, | |
countryId: String | |
) { | |
val id = s"reg$code" | |
} | |
case class Department( | |
code: String, | |
name: String, | |
region: Region | |
) { | |
val id = s"dep$code" | |
} | |
case class Places( | |
townCode: String, | |
townName: String, | |
postalCode: String, | |
secondaryTownName: Option[String], | |
deliveryLabel: Option[String], | |
gps: Option[Point] | |
) { | |
val departmentCode = townCode.take(if (townCode.startsWith("97") || townCode.startsWith("98") || townCode.startsWith("99")) 3 else 2) | |
val departmentId = s"dep$departmentCode" | |
val code = postalCode | |
val name = (List(townName) :++ secondaryTownName :++ deliveryLabel).distinct.mkString(" ") | |
val nameSlug = name.replaceAll("[^A-Z]", "_") | |
val id = s"PLACE_${nameSlug}_$code" // a unique town identifier | |
} | |
def stringToGPS(input: String): Option[Point] = { | |
input.split(",").map(_.trim) match { | |
case Array(latitude, longitude) => | |
for { | |
lat <- latitude.toDoubleOption | |
lon <- longitude.toDoubleOption | |
} yield Point(lat, lon) | |
case _ => None | |
} | |
} | |
def stringToPostalCode(input: String): Places = { | |
input.trim // with some basic hack parsing to support both format, opendata and laposte ones | |
.replaceAll("\",\"", ";") | |
.replaceAll("^\"(.*)\"$", "$1") | |
.replaceAll("\",,\"", ";;") | |
.replaceAll("\",,", ";;") | |
.replaceAll("\"", "") | |
.split(";") match { | |
case Array(townCode, townName, postalCode, "", deliveryLabel, position) => | |
Places(townCode, townName, postalCode, None, Some(deliveryLabel).filter(_.trim.nonEmpty), stringToGPS(position)) | |
case Array(townCode, townName, postalCode, secondaryTownName, deliveryLabel, position) => | |
Places(townCode, townName, postalCode, Some(secondaryTownName).filter(_.trim.nonEmpty), Some(deliveryLabel).filter(_.trim.nonEmpty), stringToGPS(position)) | |
case Array(townCode, townName, postalCode, secondaryTownName, deliveryLabel) => | |
Places(townCode, townName, postalCode, Some(secondaryTownName).filter(_.trim.nonEmpty), Some(deliveryLabel).filter(_.trim.nonEmpty), None) | |
case Array(townCode, townName, postalCode, secondaryTownName) => | |
Places(townCode, townName, postalCode, Some(secondaryTownName).filter(_.trim.nonEmpty), None, None) | |
} | |
} | |
def stringToDepartment(input: String): Department = { | |
input.trim | |
.split(",") match { | |
case Array(code, name, regionCode, regionName) => | |
Department(code, name, Region(regionCode, regionName, countryId = "france")) | |
} | |
} | |
def getData(dataSourceURL: String, dataSourceCacheFile: os.Path): Vector[String] = { | |
if (os.exists(dataSourceCacheFile)) os.read(dataSourceCacheFile).split("\n").toVector | |
else { | |
val data = requests.get(dataSourceURL) | |
os.write(dataSourceCacheFile, data) | |
data.lines() | |
} | |
} | |
val postalCodes = | |
getData(postalCodeDataSourceURL, postalCodeFileCache) | |
.drop(1) // first line == the CSV labels | |
.map(stringToPostalCode) | |
val departments = | |
getData(departmentDataSourceURL, departmentFileCache) | |
.drop(1) // first line == the CSV labels | |
.map(stringToDepartment) | |
val regions = | |
departments | |
.map(_.region) | |
.distinct | |
val countries = | |
Vector(Country("France")) | |
// ===================================================================================== | |
import org.neo4j.driver.{AuthTokens, GraphDatabase, Session} | |
import org.neo4j.harness.Neo4jBuilders | |
import wvlet.airframe.ulid.ULID | |
import scala.util.Using | |
import scala.util.Random.* | |
import scala.util.chaining.* | |
import scala.jdk.CollectionConverters.* | |
// create a file with all cypher statements for regions, departments, cities | |
def dataImportUsingFixture(): String = { | |
val fixtureCountries = | |
countries.map(c => s"""CREATE (${c.id}:Country {id:"${c.id}", name:"${c.name}"})""".stripMargin) | |
val fixtureRegions = | |
regions | |
.map(r => s"""CREATE (${r.id}:Region {id:"${r.id}", name: "${r.name}", code: "${r.code}"}) | |
|CREATE (${r.id}:Region)-[:InCountry]->(${r.countryId}:Country)""".stripMargin) | |
val fixtureDepartments = | |
departments | |
.map(d => s"""CREATE (${d.id}:Department {id:"${d.id}", name: "${d.name}", code: "${d.code}"}) | |
|CREATE (${d.id}:Department)-[:InRegion]->(${d.region.id}:Region)""".stripMargin) | |
val fixtureCities = | |
postalCodes | |
.filter(_.gps.isDefined) | |
.map(pc => s"""CREATE (${pc.id}:Place {id:"${pc.id}", name:"${pc.name}", point:point({latitude:${pc.gps.get.latitude}, longitude: ${pc.gps.get.longitude}}), code:"${pc.code}"}) | |
|CREATE (${pc.id}:Place)-[:InDepartment]->(${pc.departmentId}:Department)""".stripMargin) | |
val fixtures = (fixtureRegions ++ fixtureDepartments ++ fixtureCities).mkString("\n") | |
os.write.over(os.pwd / "neo4j-spatial-features.fixtures", fixtures) | |
fixtures | |
} | |
def createIndexes(session: Session): Unit = { | |
session.run("CREATE INDEX regions_index IF NOT EXISTS FOR (r:Region) ON (r.id)") | |
session.run("CREATE INDEX departments_index IF NOT EXISTS FOR (d:Department) ON (d.id)") | |
session.run("CREATE INDEX places_index IF NOT EXISTS FOR (c:Place) ON (c.id)") | |
session.run("CREATE POINT INDEX places_index_point IF NOT EXISTS FOR (c:Place) ON (c.point)") | |
} | |
def removeIndexes(session: Session): Unit = { | |
session.run("DROP INDEX regions_index IF EXISTS") | |
session.run("DROP INDEX departments_index IF EXISTS") | |
session.run("DROP INDEX places_index IF EXISTS") | |
session.run("DROP INDEX places_index_point IF EXISTS") | |
} | |
def dataImportUsingCode(session: Session): Unit = { | |
val countryQuery = """CREATE (:Country {id:$id, name:$name})""" | |
for (c <- countries) { | |
println("Importing $c") | |
session.run( | |
countryQuery, | |
Map("id" -> c.id, "name" -> c.name).asJava | |
) | |
} | |
val regionCreateQuery = """MATCH (to:Country {id:$countryId}) | |
|CREATE (:Region {id:$id, name: $name, code: $code})-[:InCountry]->(to)""".stripMargin | |
for (r <- regions) { | |
println(s"Importing $r") | |
session.run( | |
regionCreateQuery, | |
Map("id" -> r.id, "name" -> r.name, "code" -> r.code, "countryId" -> r.countryId).asJava | |
) | |
} | |
val departmentCreateQuery = """MATCH (to:Region {id:$regionId}) | |
|CREATE (:Department {id:$id, name: $name, code: $code})-[:InRegion]->(to)""".stripMargin | |
for (d <- departments) { | |
println(s"Importing $d") | |
session.run( | |
departmentCreateQuery, | |
Map("id" -> d.id, "name" -> d.name, "code" -> d.code, "regionId" -> d.region.id).asJava | |
) | |
} | |
val placeCreateQuery = """MATCH (to:Department {id:$depId}) | |
|CREATE (:Place {id:$id, name:$name, point:point({latitude:$lat, longitude: $lon}), code:$code})-[:InDepartment]->(to)""".stripMargin | |
for (c <- postalCodes.filter(_.gps.nonEmpty)) { | |
println(s"Importing $c") | |
session.run( | |
placeCreateQuery, | |
Map("id" -> c.id, "name" -> c.name, "code" -> c.code, "lat" -> c.gps.get.latitude, "lon" -> c.gps.get.longitude, "depId" -> c.departmentId).asJava | |
) | |
} | |
} | |
def dataAnalysisPlacesCount(session: Session): Unit = { | |
val query = """MATCH (n:Place) RETURN count(n) AS n""" | |
val response = session.run(query) | |
val placesCount = response.single().get("n").asInt() | |
println("----------------------------------------------------------") | |
println(s"places count $placesCount") | |
} | |
def dataAnalysisParisBrestDistance(session: Session): Unit = { | |
val query = | |
""" | |
|WITH | |
| point({longitude: 2.333333, latitude: 48.866667}) AS paris, | |
| point({longitude: -4.48333, latitude: 48.400002}) AS brest | |
|RETURN point.distance(paris, brest) as dist | |
|""".stripMargin | |
val dist = session.run(query).single().get("dist").asDouble() | |
println("----------------------------------------------------------") | |
println(s"distance paris-brest $dist meters") | |
} | |
def dataAnalysisPlacesNearFranceCenter(session: Session): Unit = { | |
val query = | |
""" | |
|MATCH (n:Place) | |
|WITH n.point AS point, point({longitude: 2.576461, latitude: 46.513493}) AS center | |
|WHERE point.distance(center, point) < 10000 | |
|RETURN count(point) AS count | |
|""".stripMargin | |
val count = session.run(query).single().get("count").asDouble() | |
println("----------------------------------------------------------") | |
println(s"They are $count places 10km around the center of France") | |
} | |
def dataAnalysisPlacesByRegion(session: Session): Unit = { | |
val query = | |
""" | |
|MATCH (r:Region) | |
|WITH r | |
|MATCH (r)<--(d:Department)<--(c:Place) | |
|RETURN r.name AS regionName, count(c) AS count | |
|ORDER BY count DESC | |
|""".stripMargin | |
val response = session.run(query) | |
val results = response.list().asScala | |
println("----------------------------------------------------------") | |
results.foreach { result => | |
val regionName = result.get("regionName").asString() | |
val placesCount = result.get("count").asInt() | |
println(s"$placesCount places in region $regionName") | |
} | |
} | |
def dataAnalysisPlacesWithinBoundingBox(session: Session): Unit = { | |
val query = | |
""" | |
|MATCH (n:Place) | |
|WITH | |
| n, | |
| point({longitude: 8.613267, latitude: 41.316747}) AS lowerLeft, | |
| point({longitude: 9.624277, latitude: 43.061031}) AS upperRight | |
|WHERE point.withinBBox(n.point, lowerLeft, upperRight) | |
|RETURN count(n) AS count | |
|""".stripMargin | |
val count = session.run(query).single().get("count").asDouble() | |
println("----------------------------------------------------------") | |
println(s"They are $count places in corsica") | |
} | |
def dataAnalysisDensityByZone(session: Session): Unit = { | |
val step = 0.2d | |
val istep = step - 0.0000001d | |
val longitudes = LazyList.iterate(-4.8d)(_ + step).takeWhile(_ < 8.3d) | |
val latitudes = LazyList.iterate(42.2d)(_ + step).takeWhile(_ < 51.2d) | |
val params = Map( | |
"step" -> istep, | |
"latitudes" -> latitudes.toArray, | |
"longitudes" -> longitudes.toArray | |
) | |
val query = | |
"""UNWIND $latitudes as lat | |
|UNWIND $longitudes as lon | |
|MATCH (p:Place) | |
|WHERE point.withinBBox( | |
| p.point, | |
| point({longitude: lon, latitude: lat}), | |
| point({longitude: (lon + $step), latitude: (lat + $step)}) | |
|) | |
|RETURN lat, lon, count(p) AS count | |
|ORDER BY -count | |
|LIMIT 5 | |
|""".stripMargin | |
val countByZone = | |
session | |
.run(query, params.asJava) | |
.list() | |
.asScala | |
.map(r => ((r.get("lat").asDouble(), r.get("lon").asDouble()), r.get("count").asInt())) | |
.toList | |
println("----------------------------------------------------------") | |
println(s"Zone with highest places density (TOP 5)") | |
countByZone.foreach { case ((lat, lon), count) => | |
println(f" $count%d for bbox ($lat%.3f,$lon%.3f)->(${lat + step}%.3f,${lon + step}%.3f)") | |
} | |
} | |
def dataAnalysis(session: Session): Unit = { | |
val started = System.currentTimeMillis() | |
dataAnalysisPlacesCount(session) | |
dataAnalysisParisBrestDistance(session) | |
dataAnalysisPlacesNearFranceCenter(session) | |
dataAnalysisPlacesByRegion(session) | |
dataAnalysisPlacesWithinBoundingBox(session) | |
dataAnalysisDensityByZone(session) | |
val duration = System.currentTimeMillis() - started | |
println(s"everything executed in ${duration}ms") | |
} | |
// ===================================================================================== | |
def withEmbeddedDatabaseAndFixture(): Unit = { | |
val builder = | |
Neo4jBuilders | |
.newInProcessBuilder() | |
.withFixture(dataImportUsingFixture()) // ~67minutes | |
Using(builder.build()) { embedded => | |
Using(GraphDatabase.driver(embedded.boltURI(), AuthTokens.none())) { driver => | |
Using(driver.session()) { session => | |
createIndexes(session) | |
dataAnalysis(session) | |
}.tap(r => println(r)) | |
}.tap(r => println(r)) | |
}.tap(r => println(r)) | |
} | |
// ===================================================================================== | |
def withEmbeddedDatabase(): Unit = { | |
val builder = | |
Neo4jBuilders | |
.newInProcessBuilder() | |
Using(builder.build()) { embedded => | |
Using(GraphDatabase.driver(embedded.boltURI(), AuthTokens.none())) { driver => | |
Using(driver.session()) { session => | |
createIndexes(session) | |
dataImportUsingCode(session) // ~48seconds (~54seconds with indexes on ids) | |
dataAnalysis(session) | |
}.tap(r => println(r)) | |
}.tap(r => println(r)) | |
}.tap(r => println(r)) | |
} | |
// ===================================================================================== | |
def forConsoleUsagesWithExistingNEO4J(): Session = { | |
// for live test, read to copy/paste instructions | |
val session = | |
GraphDatabase | |
.driver("neo4j://127.0.0.1:7687", AuthTokens.basic("neo4j", "neo4jNEO4J")) | |
.session() | |
session | |
} | |
def forConsoleUsages(): Unit = { | |
// for live test, read to copy/paste instructions | |
val builder = Neo4jBuilders.newInProcessBuilder() | |
val embedded = builder.build() | |
val driver = GraphDatabase.driver(embedded.boltURI(), AuthTokens.none()) | |
val session = driver.session() | |
} | |
def moreExamplesReadyToCopyPaste(session: Session): Unit = { | |
// --------------------------------- | |
// an example query, distance between two points WGS84 3D | |
val dist = | |
session | |
.run( | |
"""RETURN | |
|point.distance( | |
| point({longitude: 2.333333, latitude: 48.866667, height: 30.0}), | |
| point({longitude: 2.333333, latitude: 48.866667, height: 32.0}) | |
|) AS dist | |
|""".stripMargin | |
) | |
.single() | |
.get("dist") | |
.asDouble() | |
// --------------------------------- | |
// loop like behavior => in fact cartesian product between row and col | |
val coords = | |
session | |
.run( | |
"""UNWIND [1,2,3,4,5] as row | |
|UNWIND [1,2,3,4,5] as col | |
|RETURN row AS y, col AS x | |
|""".stripMargin | |
) | |
.list() | |
.asScala | |
.map(r => (r.get("x").asInt(), r.get("y").asInt())) | |
// --------------------------------- | |
// 3D Bounding box - WGS84 | |
val record = | |
session | |
.run( | |
"""RETURN | |
| point.withinBBox( | |
| point({longitude: 2.222222, latitude: 48.333333, height: 25.0}), | |
| point({longitude: 2.111111, latitude: 48.111111, height: 20.0}), | |
| point({longitude: 2.333333, latitude: 48.866667, height: 32.0}) | |
| ) AS isIn1, | |
| point.withinBBox( | |
| point({longitude: 2.222222, latitude: 48.333333, height: 19.0}), | |
| point({longitude: 2.111111, latitude: 48.111111, height: 20.0}), | |
| point({longitude: 2.333333, latitude: 48.866667, height: 32.0}) | |
| ) AS isIn2 | |
|""".stripMargin | |
) | |
.single() | |
val isIn1 = record.get("isIn1").asBoolean() | |
val isIn2 = record.get("isIn2").asBoolean() | |
// --------------------------------- | |
} | |
// ===================================================================================== | |
//withEmbeddedDatabaseAndFixture() | |
withEmbeddedDatabase() | |
/* | |
MATCH (n:Place) | |
RETURN count(n) AS n | |
*/ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment