Last active
May 5, 2019 23:40
-
-
Save zouzias/7d16e63a0351f1e546e238ec0a55c229 to your computer and use it in GitHub Desktop.
Spark JDBC DataFrame Example
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
val jdbc = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/wikipedia").option("dbtable", "wikipedia.page").option("user", "root").option("password", "XXXX").load |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Example | |
select row_to_json(row(t, ST_AsGeoJSON(geom))) from (select * from nodes) t limit 10; | |
# Copy to file | |
copy (select row_to_json(row(t, ST_AsGeoJSON(geom))) from (select * from nodes) t) TO '/tmp/osm-swiss-nodes.json' ; | |
# To copy from docker to host | |
docker cp 281153bf0b9a:/tmp/osm-swiss-nodes.json ~ | |
// WAYS | |
copy (select row_to_json(row(t, ST_AsGeoJSON(linestring))) from (select * from ways) t) TO '/tmp/osm-swiss-ways.json' ; | |
docker cp 281153bf0b9a:/tmp/osm-swiss-ways.json ~ |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Spark spark with | |
// | |
// spark-shell --master local[*] --packages org.postgresql:postgresql:9.4.1207.jre7 | |
val connectionUrl = "jdbc:postgresql://46.101.203.224:5432/pgsnapshot" | |
val dbTable = "nodes" | |
val driver = "org.postgresql.Driver" | |
val partitionColumn = "id" | |
val userName = "docker" | |
val password = "docker" | |
// See http://stackoverflow.com/questions/34912763/spark-cannot-find-the-postgres-jdbc-driver | |
// for explanation on the partitioning parameters | |
val options: Map[String, String] = Map("driver" -> driver, | |
"url" -> connectionUrl, | |
"dbtable" -> dbTable, | |
"user" -> userName, | |
"password" -> password, | |
"partitionColumn" -> partitionColumn, | |
"lowerBound" -> "0", | |
"upperBound" -> "3000", | |
"numPartitions" -> "300" | |
) | |
val jdbc = spark.read.format("jdbc").options(options).load | |
jdbc.write.parquet("swiss-nodes.parquet") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment