Last active
September 2, 2021 02:31
-
-
Save stuzero/073a714c6caac7757459372f781c84d2 to your computer and use it in GitHub Desktop.
US County Shapefile loaded in Apache Sedona (GeoSpark)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// ./spark-shell \ | |
// --packages org.apache.sedona:sedona-core-3.0_2.12:1.0.1-incubating,org.apache.sedona:sedona-sql-3.0_2.12:1.0.1-incubating,org.locationtech.jts:jts-core:1.18.2,org.datasyslab:geotools-wrapper:geotools-24.1 \ | |
// --conf spark.serializer=org.apache.spark.serializer.KryoSerializer \ | |
// --conf spark.kryo.registrator=org.apache.sedona.core.serde.SedonaKryoRegistrator | |
import org.apache.sedona.core.formatMapper.shapefileParser.ShapefileReader | |
import org.apache.sedona.core.spatialRDD.SpatialRDD | |
import org.apache.sedona.sql.utils.{Adapter, SedonaSQLRegistrator} | |
import org.locationtech.jts.geom.{Coordinate, Geometry, GeometryFactory} | |
SedonaSQLRegistrator.registerAll(spark) | |
System.setProperty("sedona.global.charset", "utf8") | |
val shapefileInputLocation="/home/ubuntu/geo/tl_2020_us_county" | |
var countyRDD = ShapefileReader.readToGeometryRDD(sc, shapefileInputLocation) | |
var rawSpatialDf = Adapter.toDf(countyRDD, spark) | |
rawSpatialDf.createOrReplaceTempView("rawSpatialDf") | |
var transformSQL = "SELECT geometry AS GEOM, STATEFP, COUNTYFP, COUNTYNS, GEOID, NAME, NAMELSAD, LSAD, CLASSFP, MTFCC, CSAFP, CBSAFP, METDIVFP, FUNCSTAT, CAST(ALAND AS Integer), CAST(AWATER AS Integer), ST_Point(CAST (INTPTLON AS decimal(24,20)),CAST (INTPTLAT AS decimal(24,20))) AS INTPTLATLONG FROM rawSpatialDf" | |
var countyDf = spark.sql(transformSQL.stripMargin) | |
countyDf.printSchema |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment