Created
October 1, 2016 15:14
-
-
Save zouzias/62d16bae30602a00d6275818f6ddcc8f to your computer and use it in GitHub Desktop.
Spark LuceneRDD full text world cities search notebook
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{"paragraphs":[{"text":"%dep\nz.addRepo(\"Spark Packages Repo\").url(\"http://dl.bintray.com/spark-packages/maven\")\nz.load(\"org.zouzias:spark-lucenerdd_2.11:0.2.0\")","dateUpdated":"2016-10-01T14:57:49+0000","config":{"colWidth":12,"graph":{"mode":"table","height":300,"optionOpen":false,"keys":[],"values":[],"groups":[],"scatter":{},"map":{"baseMapType":"Streets","isOnline":true,"pinCols":[]}},"enabled":true,"editorMode":"ace/mode/scala","title":true},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1475323002559_-245827101","id":"20161001-115642_482200633","result":{"code":"ERROR","type":"TEXT","msg":"Must be used before SparkInterpreter (%spark) initialized\nHint: put this paragraph before any Spark code and restart Zeppelin/Interpreter"},"dateCreated":"2016-10-01T11:56:42+0000","dateStarted":"2016-10-01T14:57:49+0000","dateFinished":"2016-10-01T14:57:49+0000","status":"ERROR","progressUpdateIntervalMs":500,"focus":true,"$$hashKey":"object:1352","title":"Load Spark LuceneRDD Jars"},{"text":"\nspark.version\n","dateUpdated":"2016-10-01T14:57:49+0000","config":{"colWidth":12,"graph":{"mode":"table","height":300,"optionOpen":false,"keys":[],"values":[],"groups":[],"scatter":{},"map":{"baseMapType":"Streets","isOnline":true,"pinCols":[]}},"enabled":true,"editorMode":"ace/mode/scala","title":true},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1475323129173_1743876383","id":"20161001-115849_1561773823","dateCreated":"2016-10-01T11:58:49+0000","status":"FINISHED","progressUpdateIntervalMs":500,"$$hashKey":"object:1353","dateFinished":"2016-10-01T14:57:51+0000","dateStarted":"2016-10-01T14:57:49+0000","title":"Verify Spark version","result":{"code":"SUCCESS","type":"TEXT","msg":"\nres24: String = 2.0.0\n"}},{"config":{"colWidth":12,"graph":{"mode":"table","height":300,"optionOpen":false,"keys":[],"values":[],"groups":[],"scatter":{},"map":{"baseMapType":"Streets","isOnline":true,"pinCols":[]}},"enabled":true,"editorMode":"ace/mode/scala","title":true},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1475323220159_1466026381","id":"20161001-120020_440303718","dateCreated":"2016-10-01T12:00:20+0000","status":"FINISHED","progressUpdateIntervalMs":500,"focus":true,"$$hashKey":"object:1525","dateUpdated":"2016-10-01T14:57:49+0000","dateFinished":"2016-10-01T14:57:53+0000","dateStarted":"2016-10-01T14:57:50+0000","title":"Imports for spark-lucenerdd","result":{"code":"SUCCESS","type":"TEXT","msg":"\nimport scala.io.Source\n\nimport org.zouzias.spark.lucenerdd.partition.LuceneRDDPartition\n\nimport org.zouzias.spark.lucenerdd._\n\nimport org.zouzias.spark.lucenerdd.LuceneRDD\n"},"text":"import scala.io.Source\nimport org.zouzias.spark.lucenerdd.partition.LuceneRDDPartition\nimport org.zouzias.spark.lucenerdd._\nimport org.zouzias.spark.lucenerdd.LuceneRDD\n"},{"config":{"colWidth":12,"graph":{"mode":"table","height":300,"optionOpen":false,"keys":[],"values":[],"groups":[],"scatter":{},"map":{"baseMapType":"Streets","isOnline":true,"pinCols":[]}},"enabled":true,"title":true},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1475323400393_-1781422379","id":"20161001-120320_2078137697","dateCreated":"2016-10-01T12:03:20+0000","status":"FINISHED","progressUpdateIntervalMs":500,"focus":true,"$$hashKey":"object:1611","dateUpdated":"2016-10-01T14:57:50+0000","dateFinished":"2016-10-01T14:57:59+0000","dateStarted":"2016-10-01T14:57:51+0000","title":"Load world cities","result":{"code":"SUCCESS","type":"TEXT","msg":"\ncities: Seq[String] = Stream(AEaenekoski, ?)\n\nrdd: org.apache.spark.rdd.RDD[String] = ParallelCollectionRDD[32] at parallelize at <console>:47\n\nluceneRDD: org.zouzias.spark.lucenerdd.LuceneRDD[String] = LuceneRDD[34] at RDD at LuceneRDD.scala:40\n\nres25: luceneRDD.type = LuceneRDD[34] at RDD at LuceneRDD.scala:40\n\nres26: Long = 38106\n"},"text":"val cities = Source.fromURL(\"https://raw.githubusercontent.com/zouzias/spark-lucenerdd/master/src/test/resources/cities.txt\").getLines().toSeq\nval rdd = sc.parallelize(cities)\nval luceneRDD = LuceneRDD(rdd)\nluceneRDD.cache\nluceneRDD.count\n"},{"config":{"colWidth":12,"graph":{"mode":"table","height":300,"optionOpen":false,"keys":[],"values":[],"groups":[],"scatter":{},"map":{"baseMapType":"Streets","isOnline":true,"pinCols":[]}},"enabled":true,"title":true},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1475333792324_-17641549","id":"20161001-145632_657667869","dateCreated":"2016-10-01T14:56:32+0000","status":"FINISHED","progressUpdateIntervalMs":500,"focus":true,"$$hashKey":"object:1818","dateUpdated":"2016-10-01T15:11:58+0000","dateFinished":"2016-10-01T15:12:03+0000","dateStarted":"2016-10-01T15:11:58+0000","title":"Full-text search functionality","result":{"code":"SUCCESS","type":"TEXT","msg":"====================\nSparkScoreDoc(10.8549795,33837,0,Text fields:_1:[Toronto])\n====================\nSparkScoreDoc(1.0,1964,0,Text fields:_1:[Athol])\nSparkScoreDoc(1.0,1963,0,Text fields:_1:[Athni])\nSparkScoreDoc(1.0,1962,0,Text fields:_1:[Athlone])\nSparkScoreDoc(1.0,1961,0,Text fields:_1:[Athis-Mons])\nSparkScoreDoc(1.0,1960,0,Text fields:_1:[Atherton])\nSparkScoreDoc(1.0,1959,0,Text fields:_1:[Atherstone])\nSparkScoreDoc(1.0,1958,0,Text fields:_1:[Athens])\nSparkScoreDoc(1.0,1957,0,Text fields:_1:[Athelstone])\nSparkScoreDoc(1.0,1956,0,Text fields:_1:[Athagarh])\nSparkScoreDoc(1.0,1955,0,Text fields:_1:[Ath])\n====================\nSparkScoreDoc(2.1096375,3606,0,Text fields:_1:[Bern])\nSparkScoreDoc(1.5822282,4552,0,Text fields:_1:[Borne])\nSparkScoreDoc(1.5822282,4550,0,Text fields:_1:[Born])\nSparkScoreDoc(1.5822282,4305,0,Text fields:_1:[Boerne])\nSparkScoreDoc(1.5822282,3734,0,Text fields:_1:[Beynes])\nSparkScoreDoc(1.5822282,3568,0,Text fields:_1:[Berg])\nSparkScoreDoc(1.5822282,3542,0,Text fields:_1:[Bere])\nSparkScoreDoc(1.5822282,3536,0,Text fields:_1:[Berd])\nSparkScoreDoc(1.5822282,3518,0,Text fields:_1:[Berane])\nSparkScoreDoc(1.5822282,3516,0,Text fields:_1:[Bera])\n====================\nSparkScoreDoc(10.449514,29600,0,Text fields:_1:[Santes])\nSparkScoreDoc(6.5309467,29613,0,Text fields:_1:[Sants-Montjuic])\n"},"text":"println(\"=\" * 20)\nluceneRDD.termQuery(\"_1\", \"toronto\").take(10).foreach(println)\n\nprintln(\"=\" * 20)\nluceneRDD.prefixQuery(\"_1\", \"ath\").take(10).foreach(println)\n\nprintln(\"=\" * 20)\nluceneRDD.fuzzyQuery(\"_1\", \"bern\", 2).take(10).foreach(println)\n\nprintln(\"=\" * 20)\nluceneRDD.phraseQuery(\"_1\", \"sant\").take(10).foreach(println)"},{"config":{"colWidth":12,"graph":{"mode":"table","height":300,"optionOpen":false,"keys":[],"values":[],"groups":[],"scatter":{},"map":{"baseMapType":"Streets","isOnline":true,"pinCols":[]}},"enabled":true},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1475333870642_-1603530019","id":"20161001-145750_1024985566","dateCreated":"2016-10-01T14:57:50+0000","status":"READY","progressUpdateIntervalMs":500,"focus":true,"$$hashKey":"object:1950","dateUpdated":"2016-10-01T14:58:01+0000","text":""}],"name":"Spark LuceneRDD","id":"2BXC9TF8J","angularObjects":{"2BXTJKSEW:shared_process":[],"2BX1ANVHB:shared_process":[],"2BWH95CND:shared_process":[],"2BWZ9CBVH:shared_process":[],"2BYHYZ468:shared_process":[],"2BWM5AEUK:shared_process":[],"2BY21WDMU:shared_process":[],"2BWVWZKJ2:shared_process":[],"2BYFQZ1HM:shared_process":[]},"config":{"looknfeel":"default"},"info":{}} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment