Last active
September 21, 2018 18:07
-
-
Save jwkidd3/b2d22072853b5cdd362c56a0ffbfa12d to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| https://jkstoragedata.blob.core.windows.net/data/spark_materials.tar.gz | |
| https://jkstoragedata.blob.core.windows.net/data/products.json.zip | |
| spark-shell --driver-memory 10G --executor-memory 15G --executor-cores 8 | |
| CREATE TABLE `movie`( | |
| `id` int, | |
| `name` string, | |
| `year` int) | |
| COMMENT 'Imported by sqoop on 2018/09/21 06:58:26' | |
| ROW FORMAT SERDE | |
| 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' | |
| WITH SERDEPROPERTIES ( | |
| 'field.delim'='\t', | |
| 'line.delim'='\n', | |
| 'serialization.format'='\t') | |
| STORED AS INPUTFORMAT | |
| 'org.apache.hadoop.mapred.TextInputFormat' | |
| OUTPUTFORMAT | |
| 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' | |
| LOCATION | |
| 'hdfs://quickstart.cloudera:8020/user/hive/warehouse/movie' | |
| TBLPROPERTIES ( | |
| 'COLUMN_STATS_ACCURATE'='true', | |
| 'numFiles'='4', | |
| 'totalSize'='102052', | |
| 'transient_lastDdlTime'='1537538311') | |
| ##Loading Hive table in Spark SQL | |
| val df=sqlContext.sql("select * from movie limit 20") | |
| df.show() | |
| ##Loading JSON with Spark SQL | |
| val json=sqlContext.read.json("products.json") | |
| val df2=json.select("name","price") | |
| df2.show() | |
| df2.write.format("parquet").save("parquet_products.paquet") | |
| val df3=df2.filter(col("name").contains("Body")) | |
| df3.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment