Last active
November 23, 2020 13:29
-
-
Save jiayuasu/84029bc9877804b876d209936e5b757c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/*---------------------------- GeoSpark 0.6 (or later) Scala API usage ----------------------------*/ | |
/* | |
* If you are writing GeoSpark program in Spark Scala Shell, no need to declare the Spark Context by yourself. | |
* If you are writing a self-contained GeoSpark Scala program, please declare the Spark Context as follows and | |
* stop it at the end of the entire program. | |
*/ | |
import org.apache.spark.SparkContext | |
import org.apache.spark.SparkConf | |
import org.apache.spark.storage.StorageLevel; | |
val conf = new SparkConf().setAppName("Simple Application").setMaster("spark://jias-mbp.mobile.asu.edu:7077"); | |
val sc = new SparkContext(conf) | |
/*---------------------------- Start an example Spatial Range Query without Index ----------------------------*/ | |
import org.datasyslab.geospark.spatialOperator.RangeQuery; | |
import org.datasyslab.geospark.spatialRDD.PointRDD; | |
import com.vividsolutions.jts.geom.Envelope; | |
import org.datasyslab.geospark.enums.FileDataSplitter; | |
val queryEnvelope=new Envelope (-113.79,-109.73,32.99,35.08); | |
/* Range query window format: minX, maxX, minY, maxY*/ | |
val objectRDD = new PointRDD(sc, "/Users/jiayu/VM_Share/resources/arealm.csv", 0, FileDataSplitter.CSV, false, StorageLevel.MEMORY_ONLY); | |
/* | |
* 0 is the starting column of spatial data in the input file. | |
* FileDataSplitter.CSV means the data format is CSV. We CSV, TSV, WKT, GeoJSON and self-defined format mapper. | |
* false means each spatial object doesn't need to carry the original input tuple with it. | |
*/ | |
val resultSize = RangeQuery.SpatialRangeQuery(objectRDD, queryEnvelope, false, false).count(); | |
/* | |
* The O means consider a point only if it is fully covered by the query window when doing query. | |
* The first false means don't consider objects intersect but not fully covered by the query window. | |
* The second false means don't use spatial index. | |
*/ | |
/*---------------------------- End an example Spatial Range Query without Index ----------------------------*/ | |
/*---------------------------- Start an example Spatial Range Query with Index ----------------------------*/ | |
import org.datasyslab.geospark.spatialOperator.RangeQuery; | |
import org.datasyslab.geospark.spatialRDD.PointRDD; | |
import com.vividsolutions.jts.geom.Envelope; | |
import org.datasyslab.geospark.enums.FileDataSplitter; | |
import org.datasyslab.geospark.enums.IndexType; | |
val queryEnvelope=new Envelope (-113.79,-109.73,32.99,35.08); | |
/* Range query window format: minX, maxX, minY, maxY*/ | |
val objectRDD = new PointRDD(sc, "/Users/jiayu/VM_Share/resources/arealm.csv", 0, FileDataSplitter.CSV, false, StorageLevel.MEMORY_ONLY); | |
/* | |
* 0 is the starting column of spatial data in the input file. | |
* FileDataSplitter.CSV enum means the data format is CSV. We CSV, TSV, WKT, GeoJSON and self-defined format mapper. | |
* false means each spatial object doesn't need to carry the original input tuple with it. | |
*/ | |
objectRDD.buildIndex(IndexType.RTREE,false); | |
/* | |
* IndexType.RTREE enum means the index type is R-tree. We support R-Tree index and Quad-Tree index. | |
* false means just build index on original spatial RDD instead of spatial partitioned RDD. ONLY set true when doing Spatial Join Query. | |
*/ | |
val resultSize = RangeQuery.SpatialRangeQuery(objectRDD, queryEnvelope, false, true).count(); | |
/* | |
* The O means consider a point only if it is fully covered by the query window when doing query. | |
* The first false means don't consider objects intersect but not fully covered by the query window. | |
* The true means use spatial index which has been built before. | |
*/ | |
/*---------------------------- End an example Spatial Range Query with Index ----------------------------*/ | |
/*---------------------------- Start an example Spatial KNN Query without Index ----------------------------*/ | |
import org.datasyslab.geospark.spatialOperator.KNNQuery; | |
import org.datasyslab.geospark.spatialRDD.PointRDD; | |
import com.vividsolutions.jts.geom.GeometryFactory; | |
import com.vividsolutions.jts.geom.Point; | |
import com.vividsolutions.jts.geom.Coordinate; | |
import org.datasyslab.geospark.enums.FileDataSplitter; | |
val fact=new GeometryFactory(); | |
val queryPoint=fact.createPoint(new Coordinate(-109.73, 35.08)); | |
/* Range query window format: X Y */ | |
val objectRDD = new PointRDD(sc, "/Users/jiayu/VM_Share/resources/arealm.csv", 0, FileDataSplitter.CSV, false, StorageLevel.MEMORY_ONLY); | |
/* | |
* 0 is the starting column of spatial data in the input file. | |
* FileDataSplitter.CSV means the data format is CSV. We CSV, TSV, WKT, GeoJSON and self-defined format mapper. | |
* false means each spatial object doesn't need to carry the original input tuple with it. | |
*/ | |
val resultSize = KNNQuery.SpatialKnnQuery(objectRDD, queryPoint, 5,false).size(); | |
/* The number 5 means 5 nearest neighbors | |
* The false means don't use spatial index. | |
*/ | |
/*---------------------------- End an example Spatial KNN Query without Index ----------------------------*/ | |
/*---------------------------- Start an example Spatial KNN Query with Index ----------------------------*/ | |
import org.datasyslab.geospark.spatialOperator.KNNQuery; | |
import org.datasyslab.geospark.spatialRDD.PointRDD; | |
import com.vividsolutions.jts.geom.GeometryFactory; | |
import com.vividsolutions.jts.geom.Point; | |
import com.vividsolutions.jts.geom.Coordinate; | |
import org.datasyslab.geospark.enums.FileDataSplitter; | |
import org.datasyslab.geospark.enums.IndexType; | |
val fact=new GeometryFactory(); | |
val queryPoint=fact.createPoint(new Coordinate(-109.73, 35.08)); | |
/* Range query window format: X Y */ | |
val objectRDD = new PointRDD(sc, "/Users/jiayu/VM_Share/resources/arealm.csv", 0, FileDataSplitter.CSV, false, StorageLevel.MEMORY_ONLY); | |
/* | |
* 0 is the starting column of spatial data in the input file. | |
* FileDataSplitter.CSV means the data format is CSV. We CSV, TSV, WKT, GeoJSON and self-defined format mapper. | |
* false means each spatial object doesn't need to carry the original input tuple with it. | |
*/ | |
objectRDD.buildIndex(IndexType.RTREE,false); | |
/* | |
* IndexType.RTREE enum means the index type is R-tree. We support R-Tree index and Quad-Tree index. But Quad-Tree doesn't support KNN. | |
* false means just build index on original spatial RDD instead of spatial partitioned RDD. ONLY set true when doing Spatial Join Query. | |
*/ | |
val resultSize = KNNQuery.SpatialKnnQuery(objectRDD, queryPoint, 5,true).size(); | |
/* The number 5 means 5 nearest neighbors | |
* The true means use spatial index. | |
*/ | |
/*---------------------------- End an example Spatial KNN Query with Index ----------------------------*/ | |
/*---------------------------- Start an example Spatial Join Query without Index ----------------------------*/ | |
import org.datasyslab.geospark.spatialOperator.JoinQuery; | |
import org.datasyslab.geospark.spatialRDD.PointRDD; | |
import org.datasyslab.geospark.spatialRDD.RectangleRDD; | |
import org.datasyslab.geospark.enums.FileDataSplitter; | |
import org.datasyslab.geospark.enums.GridType; | |
val objectRDD = new PointRDD(sc, "/Users/jiayu/VM_Share/resources/arealm.csv", 0, FileDataSplitter.CSV, false, StorageLevel.MEMORY_ONLY); | |
/* | |
* 0 is the starting column of spatial data in the input file. | |
* FileDataSplitter.CSV means the data format is CSV. We CSV, TSV, WKT, GeoJSON and self-defined format mapper. | |
* false means each spatial object doesn't need to carry the original input tuple with it. | |
*/ | |
val rectangleRDD = new RectangleRDD(sc, "/Users/jiayu/VM_Share/resources/zcta510.csv", 0, FileDataSplitter.CSV, false, StorageLevel.MEMORY_ONLY); | |
/* | |
* 0 is the starting column of spatial data in the input file. | |
* FileDataSplitter.CSV means the data format is CSV. We CSV, TSV, WKT, GeoJSON and self-defined format mapper. | |
* false means each spatial object doesn't need to carry the original input tuple with it. | |
*/ | |
objectRDD.spatialPartitioning(GridType.RTREE); | |
/* | |
* GridType.RTREE means use R-Tree spatial partitioning technique. It will take the leaf node boundaries as parition boundary. | |
* We support R-Tree partitioning and Voronoi diagram partitioning. | |
*/ | |
rectangleRDD.spatialPartitioning(objectRDD.grids); | |
/* | |
* Use the partition boundary of objectRDD to repartition the query window RDD, This is mandatory. | |
*/ | |
val resultSize = JoinQuery.SpatialJoinQuery(objectRDD,rectangleRDD,false,false).count(); | |
/* | |
* The first false means don't use spatial index. | |
* The second false means don't consider objects intersect but not fully covered by the rectangles. | |
*/ | |
/*---------------------------- End an example Spatial Join Query without Index ----------------------------*/ | |
/*---------------------------- Start an example Spatial Join Query with Index ----------------------------*/ | |
import org.datasyslab.geospark.spatialOperator.JoinQuery; | |
import org.datasyslab.geospark.spatialRDD.PointRDD; | |
import org.datasyslab.geospark.spatialRDD.RectangleRDD; | |
import org.datasyslab.geospark.enums.FileDataSplitter; | |
import org.datasyslab.geospark.enums.GridType; | |
import org.datasyslab.geospark.enums.IndexType; | |
val objectRDD = new PointRDD(sc, "/Users/jiayu/VM_Share/resources/arealm.csv", 0, FileDataSplitter.CSV, false, StorageLevel.MEMORY_ONLY); | |
/* | |
* 0 is the starting column of spatial data in the input file. | |
* FileDataSplitter.CSV means the data format is CSV. We CSV, TSV, WKT, GeoJSON and self-defined format mapper. | |
* false means each spatial object doesn't need to carry the original input tuple with it. | |
*/ | |
val rectangleRDD = new RectangleRDD(sc, "/Users/jiayu/VM_Share/resources/zcta510.csv", 0, FileDataSplitter.CSV, false); | |
/* | |
* 0 is the starting column of spatial data in the input file. | |
* FileDataSplitter.CSV means the data format is CSV. We CSV, TSV, WKT, GeoJSON and self-defined format mapper. | |
* false means each spatial object doesn't need to carry the original input tuple with it. | |
*/ | |
objectRDD.spatialPartitioning(GridType.RTREE); | |
/* | |
* GridType.RTREE means use R-Tree spatial partitioning technique. It will take the leaf node boundaries as parition boundary. | |
* We support R-Tree partitioning and Voronoi diagram partitioning. | |
*/ | |
objectRDD.buildIndex(IndexType.RTREE,true); | |
/* | |
* IndexType.RTREE enum means the index type is R-tree. We support R-Tree index and Quad-Tree index. But Quad-Tree doesn't support KNN. | |
* True means build index on the spatial partitioned RDD. ONLY set true when doing Spatial Join Query. | |
*/ | |
rectangleRDD.spatialPartitioning(objectRDD.grids); | |
/* | |
* Use the partition boundary of objectRDD to repartition the query window RDD, This is mandatory. | |
*/ | |
val resultSize = JoinQuery.SpatialJoinQuery(objectRDD,rectangleRDD,true, false).count(); | |
/* | |
* The first true means use spatial index. | |
* The second false means don't consider objects intersect but not fully covered by the rectangles. | |
*/ | |
/*---------------------------- End an example Spatial Join Query with Index ----------------------------*/ | |
/*---------------------------- Start an example Distance Join Query without Index ----------------------------*/ | |
import org.datasyslab.geospark.spatialOperator.JoinQuery; | |
import org.datasyslab.geospark.spatialRDD.PointRDD; | |
import org.datasyslab.geospark.spatialRDD.CircleRDD; | |
import org.datasyslab.geospark.enums.FileDataSplitter; | |
import org.datasyslab.geospark.enums.GridType; | |
val objectRDD = new PointRDD(sc, "/Users/jiayu/VM_Share/resources/arealm.csv", 0, FileDataSplitter.CSV, false, StorageLevel.MEMORY_ONLY); | |
/* | |
* 0 is the starting column of spatial data in the input file. | |
* FileDataSplitter.CSV means the data format is CSV. We CSV, TSV, WKT, GeoJSON and self-defined format mapper. | |
* false means each spatial object doesn't need to carry the original input tuple with it. | |
*/ | |
val centerGeometryRDD = new PointRDD(sc, "/Users/jiayu/VM_Share/resources/arealm.csv", 0, FileDataSplitter.CSV, false, StorageLevel.MEMORY_ONLY); | |
/* | |
* 0 is the starting column of spatial data in the input file. | |
* FileDataSplitter.CSV means the data format is CSV. We CSV, TSV, WKT, GeoJSON and self-defined format mapper. | |
* false means each spatial object doesn't need to carry the original input tuple with it. | |
*/ | |
val queryRDD = new CircleRDD(centerGeometryRDD,0.1); | |
/* | |
* 0.1 means the distance between two objects from the two Spatial RDDs. | |
*/ | |
objectRDD.spatialPartitioning(GridType.RTREE); | |
/* | |
* GridType.RTREE means use R-Tree spatial partitioning technique. It will take the leaf node boundaries as parition boundary. | |
* We support R-Tree partitioning and Voronoi diagram partitioning. | |
*/ | |
queryRDD.spatialPartitioning(objectRDD.grids); | |
/* | |
* Use the partition boundary of objectRDD to repartition the query window RDD, This is mandatory. | |
*/ | |
val resultSize = JoinQuery.DistanceJoinQuery(objectRDD,queryRDD,false,false).count(); | |
/* | |
* The first false means don't use spatial index. | |
* The second false means don't consider objects intersect but not fully covered by the rectangles. | |
*/ | |
/*---------------------------- End an example Distance Join Query without Index ----------------------------*/ | |
/*---------------------------- Start an example Distance Join Query with Index ----------------------------*/ | |
import org.datasyslab.geospark.spatialOperator.JoinQuery; | |
import org.datasyslab.geospark.spatialRDD.PointRDD; | |
import org.datasyslab.geospark.spatialRDD.CircleRDD; | |
import org.datasyslab.geospark.enums.FileDataSplitter; | |
import org.datasyslab.geospark.enums.GridType; | |
import org.datasyslab.geospark.enums.IndexType; | |
val objectRDD = new PointRDD(sc, "/Users/jiayu/VM_Share/resources/arealm.csv", 0, FileDataSplitter.CSV, false, StorageLevel.MEMORY_ONLY); | |
/* | |
* 0 is the starting column of spatial data in the input file. | |
* FileDataSplitter.CSV means the data format is CSV. We CSV, TSV, WKT, GeoJSON and self-defined format mapper. | |
* false means each spatial object doesn't need to carry the original input tuple with it. | |
*/ | |
val centerGeometryRDD = new PointRDD(sc, "/Users/jiayu/VM_Share/resources/arealm.csv", 0, FileDataSplitter.CSV, false, StorageLevel.MEMORY_ONLY); | |
/* | |
* 0 is the starting column of spatial data in the input file. | |
* FileDataSplitter.CSV means the data format is CSV. We CSV, TSV, WKT, GeoJSON and self-defined format mapper. | |
* false means each spatial object doesn't need to carry the original input tuple with it. | |
*/ | |
val queryRDD = new CircleRDD(centerGeometryRDD,0.1); | |
/* | |
* 0.1 means the distance between two objects from the two Spatial RDDs. | |
*/ | |
objectRDD.spatialPartitioning(GridType.RTREE); | |
/* | |
* GridType.RTREE means use R-Tree spatial partitioning technique. It will take the leaf node boundaries as parition boundary. | |
* We support R-Tree partitioning and Voronoi diagram partitioning. | |
*/ | |
objectRDD.buildIndex(IndexType.RTREE,true); | |
/* | |
* IndexType.RTREE enum means the index type is R-tree. We support R-Tree index and Quad-Tree index. But Quad-Tree doesn't support KNN. | |
* True means build index on the spatial partitioned RDD. ONLY set true when doing Spatial Join Query. | |
*/ | |
queryRDD.spatialPartitioning(objectRDD.grids); | |
/* | |
* Use the partition boundary of objectRDD to repartition the query window RDD, This is mandatory. | |
*/ | |
val resultSize = JoinQuery.DistanceJoinQuery(objectRDD,queryRDD,true, false).count(); | |
/* | |
* The first true means use spatial index. | |
* The second false means don't consider objects intersect but not fully covered by the rectangles. | |
*/ | |
/*---------------------------- End an example Distance Join Query with Index ----------------------------*/ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment