ssimeonov · August 29, 2015 14:25
diff --git a/aREADME.md b/aREADME.md
diff --git a/code-1.3.1.scala b/code-1.3.1.scala
 // This code is designed to be pasted in spark-shell in a *nix environment
 // On Windows, replace sys.env("HOME") with a directory of your choice

 // 1.3.1 code differs from 1.4.1 code only in ctx.jsonFile vs. ctx.read.json

 import java.io.File
 import java.io.PrintWriter
 import org.apache.spark.sql.hive.HiveContext

 val ctx = sqlContext.asInstanceOf[HiveContext]
 import ctx.implicits._

 // Test data
 val json = """{"category" : "A", "num" : 5}"""

 // Load test data in a table called test
 val path = sys.env("HOME") + "/test_data.jsonlines"
 new PrintWriter(path) { write(json); close }
 ctx.jsonFile("file://" + path).saveAsTable("test")

 ctx.sql("select * from test").show

 ctx.sql("drop table test")
 new File(path).delete()
diff --git a/code-1.4.1.scala b/code-1.4.1.scala
 // This code is designed to be pasted in spark-shell in a *nix environment
 // On Windows, replace sys.env("HOME") with a directory of your choice

 // 1.3.1 code differs from 1.4.1 code only in ctx.jsonFile vs. ctx.read.json

 import java.io.File
 import java.io.PrintWriter
 import org.apache.spark.sql.hive.HiveContext

 val ctx = sqlContext.asInstanceOf[HiveContext]
 import ctx.implicits._

 // Test data
 val json = """{"category" : "A", "num" : 5}"""

 // Load test data in a table called test
 val path = sys.env("HOME") + "/test_data.jsonlines"
 new PrintWriter(path) { write(json); close }
 ctx.read.json("file://" + path).saveAsTable("test")

 ctx.sql("select * from test").show

 ctx.sql("drop table test")
 new File(path).delete()
diff --git a/spark-shell-output-1.3.1.txt b/spark-shell-output-1.3.1.txt
 ubuntu@ip-10-88-50-154:~$ ~/spark-1.3.1-bin-hadoop2.6/bin/spark-shell --packages com.databricks:spark-csv_2.10:1.0.3 --driver-memory 52g --conf "spark.driver.extraJavaOptions=-XX:MaxPermSize=512m" --conf "spark.local.dir=/data/spark/tmp"
 Ivy Default Cache set to: /home/ubuntu/.ivy2/cache
 The jars for the packages stored in: /home/ubuntu/.ivy2/jars
 :: loading settings :: url = jar:file:/home/ubuntu/spark-1.3.1-bin-hadoop2.6/lib/spark-assembly-1.3.1-hadoop2.6.0.jar!/org/apache/ivy/core/settings/ivysettings.xml
 com.databricks#spark-csv_2.10 added as a dependency
 :: resolving dependencies :: org.apache.spark#spark-submit-parent;1.0
 	confs: [default]
 	found com.databricks#spark-csv_2.10;1.0.3 in central
 	found org.apache.commons#commons-csv;1.1 in central
 :: resolution report :: resolve 213ms :: artifacts dl 6ms
 	:: modules in use:
 	com.databricks#spark-csv_2.10;1.0.3 from central in [default]
 	org.apache.commons#commons-csv;1.1 from central in [default]
 	---------------------------------------------------------------------
 	|                  |            modules            ||   artifacts   |
 	|       conf       | number| search|dwnlded|evicted|| number|dwnlded|
 	---------------------------------------------------------------------
 	|      default     |   2   |   0   |   0   |   0   ||   2   |   0   |
 	---------------------------------------------------------------------
 :: retrieving :: org.apache.spark#spark-submit-parent
 	confs: [default]
 	0 artifacts copied, 2 already retrieved (0kB/5ms)
 log4j:WARN No appenders could be found for logger (org.apache.hadoop.metrics2.lib.MutableMetricsFactory).
 log4j:WARN Please initialize the log4j system properly.
 log4j:WARN See http://logging.apache.org/log4j/1.2/faq.html#noconfig for more info.
 Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties
 15/07/25 16:39:33 INFO SecurityManager: Changing view acls to: ubuntu
 15/07/25 16:39:33 INFO SecurityManager: Changing modify acls to: ubuntu
 15/07/25 16:39:33 INFO SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users with view permissions: Set(ubuntu); users with modify permissions: Set(ubuntu)
 15/07/25 16:39:33 INFO HttpServer: Starting HTTP Server
 15/07/25 16:39:33 INFO Server: jetty-8.y.z-SNAPSHOT
 15/07/25 16:39:33 INFO AbstractConnector: Started [email protected]:44684
 15/07/25 16:39:33 INFO Utils: Successfully started service 'HTTP class server' on port 44684.
 Welcome to
      ____              __
     / __/__  ___ _____/ /__
    _\ \/ _ \/ _ `/ __/  '_/
   /___/ .__/\_,_/_/ /_/\_\   version 1.3.1
      /_/

 Using Scala version 2.10.4 (OpenJDK 64-Bit Server VM, Java 1.7.0_79)
 Type in expressions to have them evaluated.
 Type :help for more information.
 15/07/25 16:39:36 WARN Utils: Your hostname, ip-10-88-50-154 resolves to a loopback address: 127.0.0.1; using 10.88.50.154 instead (on interface eth0)
 15/07/25 16:39:36 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address
 15/07/25 16:39:36 INFO SparkContext: Running Spark version 1.3.1
 15/07/25 16:39:36 WARN SparkConf: In Spark 1.0 and later spark.local.dir will be overridden by the value set by the cluster manager (via SPARK_LOCAL_DIRS in mesos/standalone and LOCAL_DIRS in YARN).
 15/07/25 16:39:36 INFO SecurityManager: Changing view acls to: ubuntu
 15/07/25 16:39:36 INFO SecurityManager: Changing modify acls to: ubuntu
 15/07/25 16:39:36 INFO SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users with view permissions: Set(ubuntu); users with modify permissions: Set(ubuntu)
 15/07/25 16:39:36 INFO Slf4jLogger: Slf4jLogger started
 15/07/25 16:39:36 INFO Remoting: Starting remoting
 15/07/25 16:39:36 INFO Remoting: Remoting started; listening on addresses :[akka.tcp://[email protected]:44170]
 15/07/25 16:39:36 INFO Utils: Successfully started service 'sparkDriver' on port 44170.
 15/07/25 16:39:36 INFO SparkEnv: Registering MapOutputTracker
 15/07/25 16:39:36 INFO SparkEnv: Registering BlockManagerMaster
 15/07/25 16:39:36 INFO DiskBlockManager: Created local directory at /data/spark/tmp/spark-54b7e951-3b66-4f4b-b65f-5f8aec23e1b3/blockmgr-e09fc5dd-96ac-4931-b1bc-e7696f92d1fd
 15/07/25 16:39:36 INFO MemoryStore: MemoryStore started with capacity 26.9 GB
 15/07/25 16:39:36 INFO HttpFileServer: HTTP File server directory is /data/spark/tmp/spark-07372fe8-8adc-47ea-9ba6-c35bfbe1034b/httpd-21e068d1-79d7-45d9-9ff7-89851ed90525
 15/07/25 16:39:36 INFO HttpServer: Starting HTTP Server
 15/07/25 16:39:36 INFO Server: jetty-8.y.z-SNAPSHOT
 15/07/25 16:39:36 INFO AbstractConnector: Started [email protected]:59966
 15/07/25 16:39:36 INFO Utils: Successfully started service 'HTTP file server' on port 59966.
 15/07/25 16:39:36 INFO SparkEnv: Registering OutputCommitCoordinator
 15/07/25 16:39:37 INFO Server: jetty-8.y.z-SNAPSHOT
 15/07/25 16:39:37 INFO AbstractConnector: Started [email protected]:4040
 15/07/25 16:39:37 INFO Utils: Successfully started service 'SparkUI' on port 4040.
 15/07/25 16:39:37 INFO SparkUI: Started SparkUI at http://10.88.50.154:4040
 15/07/25 16:39:37 INFO SparkContext: Added JAR file:/home/ubuntu/.ivy2/jars/spark-csv_2.10.jar at http://10.88.50.154:59966/jars/spark-csv_2.10.jar with timestamp 1437842377084
 15/07/25 16:39:37 INFO SparkContext: Added JAR file:/home/ubuntu/.ivy2/jars/commons-csv.jar at http://10.88.50.154:59966/jars/commons-csv.jar with timestamp 1437842377085
 15/07/25 16:39:37 INFO Executor: Starting executor ID <driver> on host localhost
 15/07/25 16:39:37 INFO Executor: Using REPL class URI: http://10.88.50.154:44684
 15/07/25 16:39:37 INFO AkkaUtils: Connecting to HeartbeatReceiver: akka.tcp://[email protected]:44170/user/HeartbeatReceiver
 15/07/25 16:39:37 INFO NettyBlockTransferService: Server created on 59753
 15/07/25 16:39:37 INFO BlockManagerMaster: Trying to register BlockManager
 15/07/25 16:39:37 INFO BlockManagerMasterActor: Registering block manager localhost:59753 with 26.9 GB RAM, BlockManagerId(<driver>, localhost, 59753)
 15/07/25 16:39:37 INFO BlockManagerMaster: Registered BlockManager
 15/07/25 16:39:37 INFO SparkILoop: Created spark context..
 Spark context available as sc.
 15/07/25 16:39:37 INFO SparkILoop: Created sql context (with Hive support)..
 SQL context available as sqlContext.

 scala> // This code is designed to be pasted in spark-shell in a *nix environment

 scala> // On Windows, replace sys.env("HOME") with a directory of your choice

 scala>

 scala> import java.io.File
 import java.io.File

 scala> import java.io.PrintWriter
 import java.io.PrintWriter

 scala> import org.apache.spark.sql.hive.HiveContext
 import org.apache.spark.sql.hive.HiveContext

 scala>

 scala> val ctx = sqlContext.asInstanceOf[HiveContext]
 ctx: org.apache.spark.sql.hive.HiveContext = org.apache.spark.sql.hive.HiveContext@2724103a

 scala> import ctx.implicits._
 import ctx.implicits._

 scala>

 scala> // Test data

 scala> val json = """{"category" : "A", "num" : 5}"""
 json: String = {"category" : "A", "num" : 5}

 scala>

 scala> // Load test data in a table called test

 scala> val path = sys.env("HOME") + "/test_data.jsonlines"
 path: String = /home/ubuntu/test_data.jsonlines

 scala> new PrintWriter(path) { write(json); close }
 res0: java.io.PrintWriter = $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anon$1@30438789

 scala> ctx.jsonFile("file://" + path).saveAsTable("test")
 15/07/25 16:39:48 INFO MemoryStore: ensureFreeSpace(229888) called with curMem=0, maxMem=28894769971
 15/07/25 16:39:48 INFO MemoryStore: Block broadcast_0 stored as values in memory (estimated size 224.5 KB, free 26.9 GB)
 15/07/25 16:39:48 INFO MemoryStore: ensureFreeSpace(25473) called with curMem=229888, maxMem=28894769971
 15/07/25 16:39:48 INFO MemoryStore: Block broadcast_0_piece0 stored as bytes in memory (estimated size 24.9 KB, free 26.9 GB)
 15/07/25 16:39:48 INFO BlockManagerInfo: Added broadcast_0_piece0 in memory on localhost:59753 (size: 24.9 KB, free: 26.9 GB)
 15/07/25 16:39:48 INFO BlockManagerMaster: Updated info of block broadcast_0_piece0
 15/07/25 16:39:48 INFO SparkContext: Created broadcast 0 from textFile at JSONRelation.scala:114
 15/07/25 16:39:49 INFO FileInputFormat: Total input paths to process : 1
 15/07/25 16:39:49 INFO SparkContext: Starting job: isEmpty at JsonRDD.scala:51
 15/07/25 16:39:49 INFO DAGScheduler: Got job 0 (isEmpty at JsonRDD.scala:51) with 1 output partitions (allowLocal=true)
 15/07/25 16:39:49 INFO DAGScheduler: Final stage: Stage 0(isEmpty at JsonRDD.scala:51)
 15/07/25 16:39:49 INFO DAGScheduler: Parents of final stage: List()
 15/07/25 16:39:49 INFO DAGScheduler: Missing parents: List()
 15/07/25 16:39:49 INFO DAGScheduler: Submitting Stage 0 (file:///home/ubuntu/test_data.jsonlines MapPartitionsRDD[1] at textFile at JSONRelation.scala:114), which has no missing parents
 15/07/25 16:39:49 INFO MemoryStore: ensureFreeSpace(2704) called with curMem=255361, maxMem=28894769971
 15/07/25 16:39:49 INFO MemoryStore: Block broadcast_1 stored as values in memory (estimated size 2.6 KB, free 26.9 GB)
 15/07/25 16:39:49 INFO MemoryStore: ensureFreeSpace(1976) called with curMem=258065, maxMem=28894769971
 15/07/25 16:39:49 INFO MemoryStore: Block broadcast_1_piece0 stored as bytes in memory (estimated size 1976.0 B, free 26.9 GB)
 15/07/25 16:39:49 INFO BlockManagerInfo: Added broadcast_1_piece0 in memory on localhost:59753 (size: 1976.0 B, free: 26.9 GB)
 15/07/25 16:39:49 INFO BlockManagerMaster: Updated info of block broadcast_1_piece0
 15/07/25 16:39:49 INFO SparkContext: Created broadcast 1 from broadcast at DAGScheduler.scala:839
 15/07/25 16:39:49 INFO DAGScheduler: Submitting 1 missing tasks from Stage 0 (file:///home/ubuntu/test_data.jsonlines MapPartitionsRDD[1] at textFile at JSONRelation.scala:114)
 15/07/25 16:39:49 INFO TaskSchedulerImpl: Adding task set 0.0 with 1 tasks
 15/07/25 16:39:49 INFO TaskSetManager: Starting task 0.0 in stage 0.0 (TID 0, localhost, PROCESS_LOCAL, 1416 bytes)
 15/07/25 16:39:49 INFO Executor: Running task 0.0 in stage 0.0 (TID 0)
 15/07/25 16:39:49 INFO Executor: Fetching http://10.88.50.154:59966/jars/commons-csv.jar with timestamp 1437842377085
 15/07/25 16:39:49 INFO Utils: Fetching http://10.88.50.154:59966/jars/commons-csv.jar to /data/spark/tmp/spark-f5780900-eb97-44a2-b059-c6fd4575626c/userFiles-c0703e69-0529-4aee-8a22-35df425ef484/fetchFileTemp8695051550922133537.tmp
 15/07/25 16:39:49 INFO Executor: Adding file:/data/spark/tmp/spark-f5780900-eb97-44a2-b059-c6fd4575626c/userFiles-c0703e69-0529-4aee-8a22-35df425ef484/commons-csv.jar to class loader
 15/07/25 16:39:49 INFO Executor: Fetching http://10.88.50.154:59966/jars/spark-csv_2.10.jar with timestamp 1437842377084
 15/07/25 16:39:49 INFO Utils: Fetching http://10.88.50.154:59966/jars/spark-csv_2.10.jar to /data/spark/tmp/spark-f5780900-eb97-44a2-b059-c6fd4575626c/userFiles-c0703e69-0529-4aee-8a22-35df425ef484/fetchFileTemp7253144730989888253.tmp
 15/07/25 16:39:49 INFO Executor: Adding file:/data/spark/tmp/spark-f5780900-eb97-44a2-b059-c6fd4575626c/userFiles-c0703e69-0529-4aee-8a22-35df425ef484/spark-csv_2.10.jar to class loader
 15/07/25 16:39:49 INFO HadoopRDD: Input split: file:/home/ubuntu/test_data.jsonlines:0+14
 15/07/25 16:39:49 INFO deprecation: mapred.tip.id is deprecated. Instead, use mapreduce.task.id
 15/07/25 16:39:49 INFO deprecation: mapred.task.id is deprecated. Instead, use mapreduce.task.attempt.id
 15/07/25 16:39:49 INFO deprecation: mapred.task.is.map is deprecated. Instead, use mapreduce.task.ismap
 15/07/25 16:39:49 INFO deprecation: mapred.task.partition is deprecated. Instead, use mapreduce.task.partition
 15/07/25 16:39:49 INFO deprecation: mapred.job.id is deprecated. Instead, use mapreduce.job.id
 15/07/25 16:39:49 INFO Executor: Finished task 0.0 in stage 0.0 (TID 0). 1824 bytes result sent to driver
 15/07/25 16:39:49 INFO TaskSetManager: Finished task 0.0 in stage 0.0 (TID 0) in 126 ms on localhost (1/1)
 15/07/25 16:39:49 INFO TaskSchedulerImpl: Removed TaskSet 0.0, whose tasks have all completed, from pool
 15/07/25 16:39:49 INFO DAGScheduler: Stage 0 (isEmpty at JsonRDD.scala:51) finished in 0.138 s
 15/07/25 16:39:49 INFO DAGScheduler: Job 0 finished: isEmpty at JsonRDD.scala:51, took 0.172547 s
 15/07/25 16:39:49 INFO SparkContext: Starting job: reduce at JsonRDD.scala:54
 15/07/25 16:39:49 INFO DAGScheduler: Got job 1 (reduce at JsonRDD.scala:54) with 2 output partitions (allowLocal=false)
 15/07/25 16:39:49 INFO DAGScheduler: Final stage: Stage 1(reduce at JsonRDD.scala:54)
 15/07/25 16:39:49 INFO DAGScheduler: Parents of final stage: List()
 15/07/25 16:39:49 INFO DAGScheduler: Missing parents: List()
 15/07/25 16:39:49 INFO DAGScheduler: Submitting Stage 1 (MapPartitionsRDD[3] at map at JsonRDD.scala:54), which has no missing parents
 15/07/25 16:39:49 INFO MemoryStore: ensureFreeSpace(3216) called with curMem=260041, maxMem=28894769971
 15/07/25 16:39:49 INFO MemoryStore: Block broadcast_2 stored as values in memory (estimated size 3.1 KB, free 26.9 GB)
 15/07/25 16:39:49 INFO MemoryStore: ensureFreeSpace(2278) called with curMem=263257, maxMem=28894769971
 15/07/25 16:39:49 INFO MemoryStore: Block broadcast_2_piece0 stored as bytes in memory (estimated size 2.2 KB, free 26.9 GB)
 15/07/25 16:39:49 INFO BlockManagerInfo: Added broadcast_2_piece0 in memory on localhost:59753 (size: 2.2 KB, free: 26.9 GB)
 15/07/25 16:39:49 INFO BlockManagerMaster: Updated info of block broadcast_2_piece0
 15/07/25 16:39:49 INFO SparkContext: Created broadcast 2 from broadcast at DAGScheduler.scala:839
 15/07/25 16:39:49 INFO DAGScheduler: Submitting 2 missing tasks from Stage 1 (MapPartitionsRDD[3] at map at JsonRDD.scala:54)
 15/07/25 16:39:49 INFO TaskSchedulerImpl: Adding task set 1.0 with 2 tasks
 15/07/25 16:39:49 INFO TaskSetManager: Starting task 0.0 in stage 1.0 (TID 1, localhost, PROCESS_LOCAL, 1416 bytes)
 15/07/25 16:39:49 INFO TaskSetManager: Starting task 1.0 in stage 1.0 (TID 2, localhost, PROCESS_LOCAL, 1416 bytes)
 15/07/25 16:39:49 INFO Executor: Running task 0.0 in stage 1.0 (TID 1)
 15/07/25 16:39:49 INFO Executor: Running task 1.0 in stage 1.0 (TID 2)
 15/07/25 16:39:49 INFO HadoopRDD: Input split: file:/home/ubuntu/test_data.jsonlines:0+14
 15/07/25 16:39:49 INFO HadoopRDD: Input split: file:/home/ubuntu/test_data.jsonlines:14+15
 15/07/25 16:39:49 INFO Executor: Finished task 1.0 in stage 1.0 (TID 2). 1807 bytes result sent to driver
 15/07/25 16:39:49 INFO TaskSetManager: Finished task 1.0 in stage 1.0 (TID 2) in 116 ms on localhost (1/2)
 15/07/25 16:39:49 INFO BlockManager: Removing broadcast 1
 15/07/25 16:39:49 INFO BlockManager: Removing block broadcast_1_piece0
 15/07/25 16:39:49 INFO MemoryStore: Block broadcast_1_piece0 of size 1976 dropped from memory (free 28894506412)
 15/07/25 16:39:49 INFO BlockManagerInfo: Removed broadcast_1_piece0 on localhost:59753 in memory (size: 1976.0 B, free: 26.9 GB)
 15/07/25 16:39:49 INFO BlockManagerMaster: Updated info of block broadcast_1_piece0
 15/07/25 16:39:49 INFO BlockManager: Removing block broadcast_1
 15/07/25 16:39:49 INFO MemoryStore: Block broadcast_1 of size 2704 dropped from memory (free 28894509116)
 15/07/25 16:39:49 INFO ContextCleaner: Cleaned broadcast 1
 15/07/25 16:39:49 INFO Executor: Finished task 0.0 in stage 1.0 (TID 1). 2096 bytes result sent to driver
 15/07/25 16:39:49 INFO TaskSetManager: Finished task 0.0 in stage 1.0 (TID 1) in 208 ms on localhost (2/2)
 15/07/25 16:39:49 INFO DAGScheduler: Stage 1 (reduce at JsonRDD.scala:54) finished in 0.210 s
 15/07/25 16:39:49 INFO TaskSchedulerImpl: Removed TaskSet 1.0, whose tasks have all completed, from pool
 15/07/25 16:39:49 INFO DAGScheduler: Job 1 finished: reduce at JsonRDD.scala:54, took 0.217666 s
 15/07/25 16:39:49 INFO HiveMetaStore: 0: Opening raw store with implemenation class:org.apache.hadoop.hive.metastore.ObjectStore
 15/07/25 16:39:49 INFO ObjectStore: ObjectStore, initialize called
 15/07/25 16:39:50 INFO Persistence: Property datanucleus.cache.level2 unknown - will be ignored
 15/07/25 16:39:50 INFO Persistence: Property hive.metastore.integral.jdo.pushdown unknown - will be ignored
 15/07/25 16:39:50 WARN Connection: BoneCP specified but not present in CLASSPATH (or one of dependencies)
 15/07/25 16:39:50 WARN Connection: BoneCP specified but not present in CLASSPATH (or one of dependencies)
 15/07/25 16:39:50 INFO ObjectStore: Setting MetaStore object pin classes with hive.metastore.cache.pinobjtypes="Table,StorageDescriptor,SerDeInfo,Partition,Database,Type,FieldSchema,Order"
 15/07/25 16:39:50 INFO MetaStoreDirectSql: MySQL check failed, assuming we are not on mysql: Lexical error at line 1, column 5.  Encountered: "@" (64), after : "".
 15/07/25 16:39:51 INFO Datastore: The class "org.apache.hadoop.hive.metastore.model.MFieldSchema" is tagged as "embedded-only" so does not have its own datastore table.
 15/07/25 16:39:51 INFO Datastore: The class "org.apache.hadoop.hive.metastore.model.MOrder" is tagged as "embedded-only" so does not have its own datastore table.
 15/07/25 16:39:51 INFO Datastore: The class "org.apache.hadoop.hive.metastore.model.MFieldSchema" is tagged as "embedded-only" so does not have its own datastore table.
 15/07/25 16:39:51 INFO Datastore: The class "org.apache.hadoop.hive.metastore.model.MOrder" is tagged as "embedded-only" so does not have its own datastore table.
 15/07/25 16:39:51 INFO Query: Reading in results for query "org.datanucleus.store.rdbms.query.SQLQuery@0" since the connection used is closing
 15/07/25 16:39:51 INFO ObjectStore: Initialized ObjectStore
 15/07/25 16:39:52 INFO HiveMetaStore: Added admin role in metastore
 15/07/25 16:39:52 INFO HiveMetaStore: Added public role in metastore
 15/07/25 16:39:52 INFO HiveMetaStore: No user is added in admin role, since config is empty
 15/07/25 16:39:52 INFO SessionState: No Tez session required at this point. hive.execution.engine=mr.
 15/07/25 16:39:52 INFO SessionState: No Tez session required at this point. hive.execution.engine=mr.
 15/07/25 16:39:52 INFO HiveMetaStore: 0: get_table : db=default tbl=test
 15/07/25 16:39:52 INFO audit: ugi=ubuntu	ip=unknown-ip-addr	cmd=get_table : db=default tbl=test
 15/07/25 16:39:52 INFO HiveMetaStore: 0: get_database: default
 15/07/25 16:39:52 INFO audit: ugi=ubuntu	ip=unknown-ip-addr	cmd=get_database: default
 15/07/25 16:39:52 INFO HiveMetaStore: 0: get_table : db=default tbl=test
 15/07/25 16:39:52 INFO audit: ugi=ubuntu	ip=unknown-ip-addr	cmd=get_table : db=default tbl=test
 15/07/25 16:39:52 INFO MemoryStore: ensureFreeSpace(321846) called with curMem=260855, maxMem=28894769971
 15/07/25 16:39:52 INFO MemoryStore: Block broadcast_3 stored as values in memory (estimated size 314.3 KB, free 26.9 GB)
 15/07/25 16:39:52 INFO MemoryStore: ensureFreeSpace(36241) called with curMem=582701, maxMem=28894769971
 15/07/25 16:39:52 INFO MemoryStore: Block broadcast_3_piece0 stored as bytes in memory (estimated size 35.4 KB, free 26.9 GB)
 15/07/25 16:39:52 INFO BlockManagerInfo: Added broadcast_3_piece0 in memory on localhost:59753 (size: 35.4 KB, free: 26.9 GB)
 15/07/25 16:39:52 INFO BlockManagerMaster: Updated info of block broadcast_3_piece0
 15/07/25 16:39:52 INFO SparkContext: Created broadcast 3 from textFile at JSONRelation.scala:114
 SLF4J: Failed to load class "org.slf4j.impl.StaticLoggerBinder".
 SLF4J: Defaulting to no-operation (NOP) logger implementation
 SLF4J: See http://www.slf4j.org/codes.html#StaticLoggerBinder for further details.
 15/07/25 16:39:52 INFO FileInputFormat: Total input paths to process : 1
 15/07/25 16:39:52 INFO SparkContext: Starting job: runJob at newParquet.scala:689
 15/07/25 16:39:52 INFO DAGScheduler: Got job 2 (runJob at newParquet.scala:689) with 2 output partitions (allowLocal=false)
 15/07/25 16:39:52 INFO DAGScheduler: Final stage: Stage 2(runJob at newParquet.scala:689)
 15/07/25 16:39:52 INFO DAGScheduler: Parents of final stage: List()
 15/07/25 16:39:52 INFO DAGScheduler: Missing parents: List()
 15/07/25 16:39:52 INFO DAGScheduler: Submitting Stage 2 (MapPartitionsRDD[7] at map at JsonRDD.scala:41), which has no missing parents
 15/07/25 16:39:52 INFO MemoryStore: ensureFreeSpace(66216) called with curMem=618942, maxMem=28894769971
 15/07/25 16:39:52 INFO MemoryStore: Block broadcast_4 stored as values in memory (estimated size 64.7 KB, free 26.9 GB)
 15/07/25 16:39:52 INFO MemoryStore: ensureFreeSpace(40069) called with curMem=685158, maxMem=28894769971
 15/07/25 16:39:52 INFO MemoryStore: Block broadcast_4_piece0 stored as bytes in memory (estimated size 39.1 KB, free 26.9 GB)
 15/07/25 16:39:52 INFO BlockManagerInfo: Added broadcast_4_piece0 in memory on localhost:59753 (size: 39.1 KB, free: 26.9 GB)
 15/07/25 16:39:52 INFO BlockManagerMaster: Updated info of block broadcast_4_piece0
 15/07/25 16:39:52 INFO SparkContext: Created broadcast 4 from broadcast at DAGScheduler.scala:839
 15/07/25 16:39:52 INFO DAGScheduler: Submitting 2 missing tasks from Stage 2 (MapPartitionsRDD[7] at map at JsonRDD.scala:41)
 15/07/25 16:39:52 INFO TaskSchedulerImpl: Adding task set 2.0 with 2 tasks
 15/07/25 16:39:52 INFO TaskSetManager: Starting task 0.0 in stage 2.0 (TID 3, localhost, PROCESS_LOCAL, 1416 bytes)
 15/07/25 16:39:52 INFO TaskSetManager: Starting task 1.0 in stage 2.0 (TID 4, localhost, PROCESS_LOCAL, 1416 bytes)
 15/07/25 16:39:52 INFO Executor: Running task 0.0 in stage 2.0 (TID 3)
 15/07/25 16:39:52 INFO Executor: Running task 1.0 in stage 2.0 (TID 4)
 15/07/25 16:39:52 INFO HadoopRDD: Input split: file:/home/ubuntu/test_data.jsonlines:0+14
 15/07/25 16:39:52 INFO HadoopRDD: Input split: file:/home/ubuntu/test_data.jsonlines:14+15
 15/07/25 16:39:52 INFO CodecConfig: Compression: GZIP
 15/07/25 16:39:52 INFO CodecConfig: Compression: GZIP
 15/07/25 16:39:53 INFO ParquetOutputFormat: Parquet block size to 134217728
 15/07/25 16:39:53 INFO ParquetOutputFormat: Parquet block size to 134217728
 15/07/25 16:39:53 INFO ParquetOutputFormat: Parquet page size to 1048576
 15/07/25 16:39:53 INFO ParquetOutputFormat: Parquet page size to 1048576
 15/07/25 16:39:53 INFO ParquetOutputFormat: Parquet dictionary page size to 1048576
 15/07/25 16:39:53 INFO ParquetOutputFormat: Parquet dictionary page size to 1048576
 15/07/25 16:39:53 INFO ParquetOutputFormat: Dictionary is on
 15/07/25 16:39:53 INFO ParquetOutputFormat: Dictionary is on
 15/07/25 16:39:53 INFO ParquetOutputFormat: Validation is off
 15/07/25 16:39:53 INFO ParquetOutputFormat: Validation is off
 15/07/25 16:39:53 INFO ParquetOutputFormat: Writer version is: PARQUET_1_0
 15/07/25 16:39:53 INFO ParquetOutputFormat: Writer version is: PARQUET_1_0
 15/07/25 16:39:53 INFO CodecPool: Got brand-new compressor [.gz]
 15/07/25 16:39:53 INFO CodecPool: Got brand-new compressor [.gz]
 15/07/25 16:39:53 INFO InternalParquetRecordWriter: Flushing mem columnStore to file. allocated memory: 31,457,276
 15/07/25 16:39:53 INFO InternalParquetRecordWriter: Flushing mem columnStore to file. allocated memory: 31,457,297
 15/07/25 16:39:53 INFO FileOutputCommitter: Saved output of task 'attempt_201507251639_0008_r_000001_0' to hdfs://localhost:54310/user/hive/warehouse/test/_temporary/0/task_201507251639_0008_r_000001
 15/07/25 16:39:53 INFO SparkHadoopMapRedUtil: attempt_201507251639_0008_r_000001_0: Committed
 15/07/25 16:39:53 INFO Executor: Finished task 1.0 in stage 2.0 (TID 4). 1792 bytes result sent to driver
 15/07/25 16:39:53 INFO TaskSetManager: Finished task 1.0 in stage 2.0 (TID 4) in 154 ms on localhost (1/2)
 15/07/25 16:39:53 INFO ColumnChunkPageWriteStore: written 56B for [category] BINARY: 1 values, 11B raw, 29B comp, 1 pages, encodings: [RLE, PLAIN, BIT_PACKED]
 15/07/25 16:39:53 INFO ColumnChunkPageWriteStore: written 70B for [num] INT64: 1 values, 14B raw, 29B comp, 1 pages, encodings: [RLE, PLAIN, BIT_PACKED]
 15/07/25 16:39:53 INFO FileOutputCommitter: Saved output of task 'attempt_201507251639_0008_r_000000_0' to hdfs://localhost:54310/user/hive/warehouse/test/_temporary/0/task_201507251639_0008_r_000000
 15/07/25 16:39:53 INFO SparkHadoopMapRedUtil: attempt_201507251639_0008_r_000000_0: Committed
 15/07/25 16:39:53 INFO Executor: Finished task 0.0 in stage 2.0 (TID 3). 1792 bytes result sent to driver
 15/07/25 16:39:53 INFO TaskSetManager: Finished task 0.0 in stage 2.0 (TID 3) in 186 ms on localhost (2/2)
 15/07/25 16:39:53 INFO DAGScheduler: Stage 2 (runJob at newParquet.scala:689) finished in 0.186 s
 15/07/25 16:39:53 INFO TaskSchedulerImpl: Removed TaskSet 2.0, whose tasks have all completed, from pool
 15/07/25 16:39:53 INFO DAGScheduler: Job 2 finished: runJob at newParquet.scala:689, took 0.233624 s
 15/07/25 16:39:53 INFO ParquetFileReader: Initiating action with parallelism: 5
 15/07/25 16:39:53 INFO HiveMetaStore: 0: create_table: Table(tableName:test, dbName:default, owner:ubuntu, createTime:1437842393, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:col, type:array<string>, comment:from deserializer)], location:null, inputFormat:org.apache.hadoop.mapred.SequenceFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe, parameters:{serialization.format=1, path=hdfs://localhost:54310/user/hive/warehouse/test}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{})), partitionKeys:[], parameters:{spark.sql.sources.schema.part.0={"type":"struct","fields":[{"name":"category","type":"string","nullable":true,"metadata":{}},{"name":"num","type":"long","nullable":true,"metadata":{}}]}, EXTERNAL=FALSE, spark.sql.sources.schema.numParts=1, spark.sql.sources.provider=org.apache.spark.sql.parquet}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)
 15/07/25 16:39:53 INFO audit: ugi=ubuntu	ip=unknown-ip-addr	cmd=create_table: Table(tableName:test, dbName:default, owner:ubuntu, createTime:1437842393, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:col, type:array<string>, comment:from deserializer)], location:null, inputFormat:org.apache.hadoop.mapred.SequenceFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe, parameters:{serialization.format=1, path=hdfs://localhost:54310/user/hive/warehouse/test}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{})), partitionKeys:[], parameters:{spark.sql.sources.schema.part.0={"type":"struct","fields":[{"name":"category","type":"string","nullable":true,"metadata":{}},{"name":"num","type":"long","nullable":true,"metadata":{}}]}, EXTERNAL=FALSE, spark.sql.sources.schema.numParts=1, spark.sql.sources.provider=org.apache.spark.sql.parquet}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)
 15/07/25 16:39:53 INFO log: Updating table stats fast for test
 15/07/25 16:39:53 INFO log: Updated size of table test to 1530

 scala>

 scala> ctx.sql("select * from test").show
 15/07/25 16:39:53 INFO ParseDriver: Parsing command: select * from test
 15/07/25 16:39:53 INFO ParseDriver: Parse Completed
 15/07/25 16:39:53 INFO HiveMetaStore: 0: get_table : db=default tbl=test
 15/07/25 16:39:53 INFO audit: ugi=ubuntu	ip=unknown-ip-addr	cmd=get_table : db=default tbl=test
 15/07/25 16:39:54 INFO HiveMetaStore: 0: get_table : db=default tbl=test
 15/07/25 16:39:54 INFO audit: ugi=ubuntu	ip=unknown-ip-addr	cmd=get_table : db=default tbl=test
 15/07/25 16:39:54 INFO MemoryStore: ensureFreeSpace(328206) called with curMem=725227, maxMem=28894769971
 15/07/25 16:39:54 INFO MemoryStore: Block broadcast_5 stored as values in memory (estimated size 320.5 KB, free 26.9 GB)
 15/07/25 16:39:54 INFO MemoryStore: ensureFreeSpace(37143) called with curMem=1053433, maxMem=28894769971
 15/07/25 16:39:54 INFO MemoryStore: Block broadcast_5_piece0 stored as bytes in memory (estimated size 36.3 KB, free 26.9 GB)
 15/07/25 16:39:54 INFO BlockManagerInfo: Added broadcast_5_piece0 in memory on localhost:59753 (size: 36.3 KB, free: 26.9 GB)
 15/07/25 16:39:54 INFO BlockManagerMaster: Updated info of block broadcast_5_piece0
 15/07/25 16:39:54 INFO SparkContext: Created broadcast 5 from NewHadoopRDD at newParquet.scala:478
 15/07/25 16:39:54 INFO deprecation: mapred.max.split.size is deprecated. Instead, use mapreduce.input.fileinputformat.split.maxsize
 15/07/25 16:39:54 INFO deprecation: mapred.min.split.size is deprecated. Instead, use mapreduce.input.fileinputformat.split.minsize
 15/07/25 16:39:54 INFO ParquetRelation2$$anon$1$$anon$2: Using Task Side Metadata Split Strategy
 15/07/25 16:39:54 INFO SparkContext: Starting job: runJob at SparkPlan.scala:122
 15/07/25 16:39:54 INFO DAGScheduler: Got job 3 (runJob at SparkPlan.scala:122) with 1 output partitions (allowLocal=false)
 15/07/25 16:39:54 INFO DAGScheduler: Final stage: Stage 3(runJob at SparkPlan.scala:122)
 15/07/25 16:39:54 INFO DAGScheduler: Parents of final stage: List()
 15/07/25 16:39:54 INFO DAGScheduler: Missing parents: List()
 15/07/25 16:39:54 INFO DAGScheduler: Submitting Stage 3 (MapPartitionsRDD[12] at map at SparkPlan.scala:97), which has no missing parents
 15/07/25 16:39:54 INFO MemoryStore: ensureFreeSpace(2680) called with curMem=1090576, maxMem=28894769971
 15/07/25 16:39:54 INFO MemoryStore: Block broadcast_6 stored as values in memory (estimated size 2.6 KB, free 26.9 GB)
 15/07/25 16:39:54 INFO MemoryStore: ensureFreeSpace(1846) called with curMem=1093256, maxMem=28894769971
 15/07/25 16:39:54 INFO MemoryStore: Block broadcast_6_piece0 stored as bytes in memory (estimated size 1846.0 B, free 26.9 GB)
 15/07/25 16:39:54 INFO BlockManagerInfo: Added broadcast_6_piece0 in memory on localhost:59753 (size: 1846.0 B, free: 26.9 GB)
 15/07/25 16:39:54 INFO BlockManagerMaster: Updated info of block broadcast_6_piece0
 15/07/25 16:39:54 INFO SparkContext: Created broadcast 6 from broadcast at DAGScheduler.scala:839
 15/07/25 16:39:54 INFO DAGScheduler: Submitting 1 missing tasks from Stage 3 (MapPartitionsRDD[12] at map at SparkPlan.scala:97)
 15/07/25 16:39:54 INFO TaskSchedulerImpl: Adding task set 3.0 with 1 tasks
 15/07/25 16:39:54 INFO TaskSetManager: Starting task 0.0 in stage 3.0 (TID 5, localhost, PROCESS_LOCAL, 1642 bytes)
 15/07/25 16:39:54 INFO Executor: Running task 0.0 in stage 3.0 (TID 5)
 15/07/25 16:39:54 INFO ParquetRelation2$$anon$1: Input split: ParquetInputSplit{part: hdfs://localhost:54310/user/hive/warehouse/test/part-r-00001.parquet start: 0 end: 518 length: 518 hosts: [] requestedSchema: message root {
  optional binary category (UTF8);
  optional int64 num;
 }
 readSupportMetadata: {org.apache.spark.sql.parquet.row.metadata={"type":"struct","fields":[{"name":"category","type":"string","nullable":true,"metadata":{}},{"name":"num","type":"long","nullable":true,"metadata":{}}]}, org.apache.spark.sql.parquet.row.requested_schema={"type":"struct","fields":[{"name":"category","type":"string","nullable":true,"metadata":{}},{"name":"num","type":"long","nullable":true,"metadata":{}}]}}}
 15/07/25 16:39:54 WARN ParquetRecordReader: Can not initialize counter due to context is not a instance of TaskInputOutputContext, but is org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl
 15/07/25 16:39:54 INFO InternalParquetRecordReader: RecordReader initialized will read a total of 1 records.
 15/07/25 16:39:54 INFO InternalParquetRecordReader: at row 0. reading next block
 15/07/25 16:39:54 INFO CodecPool: Got brand-new decompressor [.gz]
 15/07/25 16:39:54 INFO InternalParquetRecordReader: block read in memory in 5 ms. row count = 1
 15/07/25 16:39:54 INFO Executor: Finished task 0.0 in stage 3.0 (TID 5). 2025 bytes result sent to driver
 15/07/25 16:39:54 INFO TaskSetManager: Finished task 0.0 in stage 3.0 (TID 5) in 40 ms on localhost (1/1)
 15/07/25 16:39:54 INFO DAGScheduler: Stage 3 (runJob at SparkPlan.scala:122) finished in 0.040 s
 15/07/25 16:39:54 INFO TaskSchedulerImpl: Removed TaskSet 3.0, whose tasks have all completed, from pool
 15/07/25 16:39:54 INFO DAGScheduler: Job 3 finished: runJob at SparkPlan.scala:122, took 0.049022 s
 15/07/25 16:39:54 INFO SparkContext: Starting job: runJob at SparkPlan.scala:122
 15/07/25 16:39:54 INFO DAGScheduler: Got job 4 (runJob at SparkPlan.scala:122) with 1 output partitions (allowLocal=false)
 15/07/25 16:39:54 INFO DAGScheduler: Final stage: Stage 4(runJob at SparkPlan.scala:122)
 15/07/25 16:39:54 INFO DAGScheduler: Parents of final stage: List()
 15/07/25 16:39:54 INFO DAGScheduler: Missing parents: List()
 15/07/25 16:39:54 INFO DAGScheduler: Submitting Stage 4 (MapPartitionsRDD[12] at map at SparkPlan.scala:97), which has no missing parents
 15/07/25 16:39:54 INFO MemoryStore: ensureFreeSpace(2680) called with curMem=1095102, maxMem=28894769971
 15/07/25 16:39:54 INFO MemoryStore: Block broadcast_7 stored as values in memory (estimated size 2.6 KB, free 26.9 GB)
 15/07/25 16:39:54 INFO MemoryStore: ensureFreeSpace(1846) called with curMem=1097782, maxMem=28894769971
 15/07/25 16:39:54 INFO MemoryStore: Block broadcast_7_piece0 stored as bytes in memory (estimated size 1846.0 B, free 26.9 GB)
 15/07/25 16:39:54 INFO BlockManagerInfo: Added broadcast_7_piece0 in memory on localhost:59753 (size: 1846.0 B, free: 26.9 GB)
 15/07/25 16:39:54 INFO BlockManagerMaster: Updated info of block broadcast_7_piece0
 15/07/25 16:39:54 INFO SparkContext: Created broadcast 7 from broadcast at DAGScheduler.scala:839
 15/07/25 16:39:54 INFO DAGScheduler: Submitting 1 missing tasks from Stage 4 (MapPartitionsRDD[12] at map at SparkPlan.scala:97)
 15/07/25 16:39:54 INFO TaskSchedulerImpl: Adding task set 4.0 with 1 tasks
 15/07/25 16:39:54 INFO TaskSetManager: Starting task 0.0 in stage 4.0 (TID 6, localhost, PROCESS_LOCAL, 1641 bytes)
 15/07/25 16:39:54 INFO Executor: Running task 0.0 in stage 4.0 (TID 6)
 15/07/25 16:39:54 INFO ParquetRelation2$$anon$1: Input split: ParquetInputSplit{part: hdfs://localhost:54310/user/hive/warehouse/test/part-r-00002.parquet start: 0 end: 288 length: 288 hosts: [] requestedSchema: message root {
  optional binary category (UTF8);
  optional int64 num;
 }
 readSupportMetadata: {org.apache.spark.sql.parquet.row.metadata={"type":"struct","fields":[{"name":"category","type":"string","nullable":true,"metadata":{}},{"name":"num","type":"long","nullable":true,"metadata":{}}]}, org.apache.spark.sql.parquet.row.requested_schema={"type":"struct","fields":[{"name":"category","type":"string","nullable":true,"metadata":{}},{"name":"num","type":"long","nullable":true,"metadata":{}}]}}}
 15/07/25 16:39:54 WARN ParquetRecordReader: Can not initialize counter due to context is not a instance of TaskInputOutputContext, but is org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl
 15/07/25 16:39:54 INFO InternalParquetRecordReader: RecordReader initialized will read a total of 0 records.
 15/07/25 16:39:54 INFO Executor: Finished task 0.0 in stage 4.0 (TID 6). 1800 bytes result sent to driver
 15/07/25 16:39:54 INFO TaskSetManager: Finished task 0.0 in stage 4.0 (TID 6) in 11 ms on localhost (1/1)
 15/07/25 16:39:54 INFO DAGScheduler: Stage 4 (runJob at SparkPlan.scala:122) finished in 0.011 s
 15/07/25 16:39:54 INFO TaskSchedulerImpl: Removed TaskSet 4.0, whose tasks have all completed, from pool
 15/07/25 16:39:54 INFO DAGScheduler: Job 4 finished: runJob at SparkPlan.scala:122, took 0.017019 s
 category num
 A        5

 scala>

 scala> ctx.sql("drop table test")
 15/07/25 16:39:54 INFO ParseDriver: Parsing command: drop table test
 15/07/25 16:39:54 INFO ParseDriver: Parse Completed
 15/07/25 16:39:54 INFO HiveMetaStore: 0: get_table : db=default tbl=test
 15/07/25 16:39:54 INFO audit: ugi=ubuntu	ip=unknown-ip-addr	cmd=get_table : db=default tbl=test
 15/07/25 16:39:54 INFO PerfLogger: <PERFLOG method=Driver.run from=org.apache.hadoop.hive.ql.Driver>
 15/07/25 16:39:54 INFO PerfLogger: <PERFLOG method=TimeToSubmit from=org.apache.hadoop.hive.ql.Driver>
 15/07/25 16:39:54 INFO Driver: Concurrency mode is disabled, not creating a lock manager
 15/07/25 16:39:54 INFO PerfLogger: <PERFLOG method=compile from=org.apache.hadoop.hive.ql.Driver>
 15/07/25 16:39:54 INFO PerfLogger: <PERFLOG method=parse from=org.apache.hadoop.hive.ql.Driver>
 15/07/25 16:39:54 INFO ParseDriver: Parsing command: DROP TABLE test
 15/07/25 16:39:54 INFO ParseDriver: Parse Completed
 15/07/25 16:39:54 INFO PerfLogger: </PERFLOG method=parse start=1437842394585 end=1437842394585 duration=0 from=org.apache.hadoop.hive.ql.Driver>
 15/07/25 16:39:54 INFO PerfLogger: <PERFLOG method=semanticAnalyze from=org.apache.hadoop.hive.ql.Driver>
 15/07/25 16:39:54 INFO HiveMetaStore: 0: get_table : db=default tbl=test
 15/07/25 16:39:54 INFO audit: ugi=ubuntu	ip=unknown-ip-addr	cmd=get_table : db=default tbl=test
 15/07/25 16:39:54 INFO Driver: Semantic Analysis Completed
 15/07/25 16:39:54 INFO PerfLogger: </PERFLOG method=semanticAnalyze start=1437842394585 end=1437842394626 duration=41 from=org.apache.hadoop.hive.ql.Driver>
 15/07/25 16:39:54 INFO Driver: Returning Hive schema: Schema(fieldSchemas:null, properties:null)
 15/07/25 16:39:54 INFO PerfLogger: </PERFLOG method=compile start=1437842394565 end=1437842394632 duration=67 from=org.apache.hadoop.hive.ql.Driver>
 15/07/25 16:39:54 INFO PerfLogger: <PERFLOG method=Driver.execute from=org.apache.hadoop.hive.ql.Driver>
 15/07/25 16:39:54 INFO Driver: Starting command: DROP TABLE test
 15/07/25 16:39:54 INFO PerfLogger: </PERFLOG method=TimeToSubmit start=1437842394563 end=1437842394647 duration=84 from=org.apache.hadoop.hive.ql.Driver>
 15/07/25 16:39:54 INFO PerfLogger: <PERFLOG method=runTasks from=org.apache.hadoop.hive.ql.Driver>
 15/07/25 16:39:54 INFO PerfLogger: <PERFLOG method=task.DDL.Stage-0 from=org.apache.hadoop.hive.ql.Driver>
 15/07/25 16:39:54 INFO HiveMetaStore: 0: get_table : db=default tbl=test
 15/07/25 16:39:54 INFO audit: ugi=ubuntu	ip=unknown-ip-addr	cmd=get_table : db=default tbl=test
 15/07/25 16:39:54 INFO HiveMetaStore: 0: get_table : db=default tbl=test
 15/07/25 16:39:54 INFO audit: ugi=ubuntu	ip=unknown-ip-addr	cmd=get_table : db=default tbl=test
 15/07/25 16:39:54 INFO HiveMetaStore: 0: drop_table : db=default tbl=test
 15/07/25 16:39:54 INFO audit: ugi=ubuntu	ip=unknown-ip-addr	cmd=drop_table : db=default tbl=test
 15/07/25 16:39:54 INFO HiveMetaStore: 0: get_table : db=default tbl=test
 15/07/25 16:39:54 INFO audit: ugi=ubuntu	ip=unknown-ip-addr	cmd=get_table : db=default tbl=test
 15/07/25 16:39:54 INFO Datastore: The class "org.apache.hadoop.hive.metastore.model.MFieldSchema" is tagged as "embedded-only" so does not have its own datastore table.
 15/07/25 16:39:54 INFO Datastore: The class "org.apache.hadoop.hive.metastore.model.MOrder" is tagged as "embedded-only" so does not have its own datastore table.
 15/07/25 16:39:54 INFO Datastore: The class "org.apache.hadoop.hive.metastore.model.MFieldSchema" is tagged as "embedded-only" so does not have its own datastore table.
 15/07/25 16:39:54 INFO Datastore: The class "org.apache.hadoop.hive.metastore.model.MOrder" is tagged as "embedded-only" so does not have its own datastore table.
 15/07/25 16:39:54 INFO Datastore: The class "org.apache.hadoop.hive.metastore.model.MFieldSchema" is tagged as "embedded-only" so does not have its own datastore table.
 15/07/25 16:39:54 INFO Datastore: The class "org.apache.hadoop.hive.metastore.model.MOrder" is tagged as "embedded-only" so does not have its own datastore table.
 15/07/25 16:39:54 INFO Datastore: The class "org.apache.hadoop.hive.metastore.model.MFieldSchema" is tagged as "embedded-only" so does not have its own datastore table.
 15/07/25 16:39:54 INFO Datastore: The class "org.apache.hadoop.hive.metastore.model.MOrder" is tagged as "embedded-only" so does not have its own datastore table.
 15/07/25 16:39:55 INFO Datastore: The class "org.apache.hadoop.hive.metastore.model.MFieldSchema" is tagged as "embedded-only" so does not have its own datastore table.
 15/07/25 16:39:55 INFO Datastore: The class "org.apache.hadoop.hive.metastore.model.MOrder" is tagged as "embedded-only" so does not have its own datastore table.
 15/07/25 16:39:55 INFO hivemetastoressimpl: deleting  hdfs://localhost:54310/user/hive/warehouse/test
 15/07/25 16:39:55 INFO TrashPolicyDefault: Namenode trash configuration: Deletion interval = 0 minutes, Emptier interval = 0 minutes.
 15/07/25 16:39:55 INFO hivemetastoressimpl: Deleted the diretory hdfs://localhost:54310/user/hive/warehouse/test
 15/07/25 16:39:55 INFO PerfLogger: </PERFLOG method=runTasks start=1437842394647 end=1437842395233 duration=586 from=org.apache.hadoop.hive.ql.Driver>
 15/07/25 16:39:55 INFO PerfLogger: </PERFLOG method=Driver.execute start=1437842394632 end=1437842395233 duration=601 from=org.apache.hadoop.hive.ql.Driver>
 15/07/25 16:39:55 INFO Driver: OK
 15/07/25 16:39:55 INFO PerfLogger: <PERFLOG method=releaseLocks from=org.apache.hadoop.hive.ql.Driver>
 15/07/25 16:39:55 INFO PerfLogger: </PERFLOG method=releaseLocks start=1437842395233 end=1437842395234 duration=1 from=org.apache.hadoop.hive.ql.Driver>
 15/07/25 16:39:55 INFO PerfLogger: </PERFLOG method=Driver.run start=1437842394563 end=1437842395234 duration=671 from=org.apache.hadoop.hive.ql.Driver>
 res3: org.apache.spark.sql.DataFrame = []

 scala> new File(path).delete()
 res4: Boolean = true

 scala>
diff --git a/spark-shell-output-1.4.1.txt b/spark-shell-output-1.4.1.txt
 ubuntu:~$ ~/spark-1.4.1-bin-hadoop2.6/bin/spark-shell --packages com.databricks:spark-csv_2.10:1.0.3 --driver-memory 52g --conf "spark.driver.extraJavaOptions=-XX:MaxPermSize=512m" --conf "spark.local.dir=/data/spark/tmp"
 Ivy Default Cache set to: /home/ubuntu/.ivy2/cache
 The jars for the packages stored in: /home/ubuntu/.ivy2/jars
 :: loading settings :: url = jar:file:/home/ubuntu/spark-1.4.1-bin-hadoop2.6/lib/spark-assembly-1.4.1-hadoop2.6.0.jar!/org/apache/ivy/core/settings/ivysettings.xml
 com.databricks#spark-csv_2.10 added as a dependency
 :: resolving dependencies :: org.apache.spark#spark-submit-parent;1.0
 	confs: [default]
 	found com.databricks#spark-csv_2.10;1.0.3 in central
 	found org.apache.commons#commons-csv;1.1 in central
 :: resolution report :: resolve 212ms :: artifacts dl 7ms
 	:: modules in use:
 	com.databricks#spark-csv_2.10;1.0.3 from central in [default]
 	org.apache.commons#commons-csv;1.1 from central in [default]
 	---------------------------------------------------------------------
 	|                  |            modules            ||   artifacts   |
 	|       conf       | number| search|dwnlded|evicted|| number|dwnlded|
 	---------------------------------------------------------------------
 	|      default     |   2   |   0   |   0   |   0   ||   2   |   0   |
 	---------------------------------------------------------------------
 :: retrieving :: org.apache.spark#spark-submit-parent
 	confs: [default]
 	0 artifacts copied, 2 already retrieved (0kB/6ms)
 log4j:WARN No appenders could be found for logger (org.apache.hadoop.metrics2.lib.MutableMetricsFactory).
 log4j:WARN Please initialize the log4j system properly.
 log4j:WARN See http://logging.apache.org/log4j/1.2/faq.html#noconfig for more info.
 Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties
 15/07/25 15:51:32 INFO SecurityManager: Changing view acls to: ubuntu
 15/07/25 15:51:32 INFO SecurityManager: Changing modify acls to: ubuntu
 15/07/25 15:51:32 INFO SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users with view permissions: Set(ubuntu); users with modify permissions: Set(ubuntu)
 15/07/25 15:51:33 INFO HttpServer: Starting HTTP Server
 15/07/25 15:51:33 INFO Utils: Successfully started service 'HTTP class server' on port 51235.
 Welcome to
      ____              __
     / __/__  ___ _____/ /__
    _\ \/ _ \/ _ `/ __/  '_/
   /___/ .__/\_,_/_/ /_/\_\   version 1.4.1
      /_/

 Using Scala version 2.10.4 (OpenJDK 64-Bit Server VM, Java 1.7.0_79)
 Type in expressions to have them evaluated.
 Type :help for more information.
 15/07/25 15:51:36 WARN Utils: Your hostname, ip-10-88-50-154 resolves to a loopback address: 127.0.0.1; using 10.88.50.154 instead (on interface eth0)
 15/07/25 15:51:36 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address
 15/07/25 15:51:36 INFO SparkContext: Running Spark version 1.4.1
 15/07/25 15:51:36 WARN SparkConf: In Spark 1.0 and later spark.local.dir will be overridden by the value set by the cluster manager (via SPARK_LOCAL_DIRS in mesos/standalone and LOCAL_DIRS in YARN).
 15/07/25 15:51:36 INFO SecurityManager: Changing view acls to: ubuntu
 15/07/25 15:51:36 INFO SecurityManager: Changing modify acls to: ubuntu
 15/07/25 15:51:36 INFO SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users with view permissions: Set(ubuntu); users with modify permissions: Set(ubuntu)
 15/07/25 15:51:36 INFO Slf4jLogger: Slf4jLogger started
 15/07/25 15:51:36 INFO Remoting: Starting remoting
 15/07/25 15:51:36 INFO Remoting: Remoting started; listening on addresses :[akka.tcp://[email protected]:59609]
 15/07/25 15:51:36 INFO Utils: Successfully started service 'sparkDriver' on port 59609.
 15/07/25 15:51:36 INFO SparkEnv: Registering MapOutputTracker
 15/07/25 15:51:36 INFO SparkEnv: Registering BlockManagerMaster
 15/07/25 15:51:36 INFO DiskBlockManager: Created local directory at /data/spark/tmp/spark-bf5752a7-9e85-40dd-8911-6890f416397b/blockmgr-4ca95ec5-6553-4489-aa6a-0754d0ae56ef
 15/07/25 15:51:36 INFO MemoryStore: MemoryStore started with capacity 26.9 GB
 15/07/25 15:51:36 INFO HttpFileServer: HTTP File server directory is /data/spark/tmp/spark-bf5752a7-9e85-40dd-8911-6890f416397b/httpd-d6a4e8b0-4bae-4905-9884-2b800fb0288f
 15/07/25 15:51:36 INFO HttpServer: Starting HTTP Server
 15/07/25 15:51:36 INFO Utils: Successfully started service 'HTTP file server' on port 56500.
 15/07/25 15:51:36 INFO SparkEnv: Registering OutputCommitCoordinator
 15/07/25 15:51:36 INFO Utils: Successfully started service 'SparkUI' on port 4040.
 15/07/25 15:51:36 INFO SparkUI: Started SparkUI at http://10.88.50.154:4040
 15/07/25 15:51:36 INFO SparkContext: Added JAR file:/home/ubuntu/.ivy2/jars/com.databricks_spark-csv_2.10-1.0.3.jar at http://10.88.50.154:56500/jars/com.databricks_spark-csv_2.10-1.0.3.jar with timestamp 1437839496834
 15/07/25 15:51:36 INFO SparkContext: Added JAR file:/home/ubuntu/.ivy2/jars/org.apache.commons_commons-csv-1.1.jar at http://10.88.50.154:56500/jars/org.apache.commons_commons-csv-1.1.jar with timestamp 1437839496835
 15/07/25 15:51:36 INFO Executor: Starting executor ID driver on host localhost
 15/07/25 15:51:36 INFO Executor: Using REPL class URI: http://10.88.50.154:51235
 15/07/25 15:51:36 INFO Utils: Successfully started service 'org.apache.spark.network.netty.NettyBlockTransferService' on port 54164.
 15/07/25 15:51:36 INFO NettyBlockTransferService: Server created on 54164
 15/07/25 15:51:36 INFO BlockManagerMaster: Trying to register BlockManager
 15/07/25 15:51:36 INFO BlockManagerMasterEndpoint: Registering block manager localhost:54164 with 26.9 GB RAM, BlockManagerId(driver, localhost, 54164)
 15/07/25 15:51:36 INFO BlockManagerMaster: Registered BlockManager
 15/07/25 15:51:37 INFO SparkILoop: Created spark context..
 Spark context available as sc.
 15/07/25 15:51:37 INFO HiveContext: Initializing execution hive, version 0.13.1
 15/07/25 15:51:37 INFO HiveMetaStore: 0: Opening raw store with implemenation class:org.apache.hadoop.hive.metastore.ObjectStore
 15/07/25 15:51:37 INFO ObjectStore: ObjectStore, initialize called
 15/07/25 15:51:37 INFO Persistence: Property datanucleus.cache.level2 unknown - will be ignored
 15/07/25 15:51:37 INFO Persistence: Property hive.metastore.integral.jdo.pushdown unknown - will be ignored
 15/07/25 15:51:37 WARN Connection: BoneCP specified but not present in CLASSPATH (or one of dependencies)
 15/07/25 15:51:38 WARN Connection: BoneCP specified but not present in CLASSPATH (or one of dependencies)
 15/07/25 15:51:39 INFO ObjectStore: Setting MetaStore object pin classes with hive.metastore.cache.pinobjtypes="Table,StorageDescriptor,SerDeInfo,Partition,Database,Type,FieldSchema,Order"
 15/07/25 15:51:39 INFO MetaStoreDirectSql: MySQL check failed, assuming we are not on mysql: Lexical error at line 1, column 5.  Encountered: "@" (64), after : "".
 15/07/25 15:51:39 INFO Datastore: The class "org.apache.hadoop.hive.metastore.model.MFieldSchema" is tagged as "embedded-only" so does not have its own datastore table.
 15/07/25 15:51:39 INFO Datastore: The class "org.apache.hadoop.hive.metastore.model.MOrder" is tagged as "embedded-only" so does not have its own datastore table.
 15/07/25 15:51:41 INFO Datastore: The class "org.apache.hadoop.hive.metastore.model.MFieldSchema" is tagged as "embedded-only" so does not have its own datastore table.
 15/07/25 15:51:41 INFO Datastore: The class "org.apache.hadoop.hive.metastore.model.MOrder" is tagged as "embedded-only" so does not have its own datastore table.
 15/07/25 15:51:41 INFO ObjectStore: Initialized ObjectStore
 15/07/25 15:51:41 WARN ObjectStore: Version information not found in metastore. hive.metastore.schema.verification is not enabled so recording the schema version 0.13.1aa
 15/07/25 15:51:42 INFO HiveMetaStore: Added admin role in metastore
 15/07/25 15:51:42 INFO HiveMetaStore: Added public role in metastore
 15/07/25 15:51:42 INFO HiveMetaStore: No user is added in admin role, since config is empty
 15/07/25 15:51:42 INFO SessionState: No Tez session required at this point. hive.execution.engine=mr.
 15/07/25 15:51:42 INFO SparkILoop: Created sql context (with Hive support)..
 SQL context available as sqlContext.

 scala> // This code is designed to be pasted in spark-shell in a *nix environment

 scala> // On Windows, replace sys.env("HOME") with a directory of your choice

 scala>

 scala> import java.io.File
 import java.io.File

 scala> import java.io.PrintWriter
 import java.io.PrintWriter

 scala> import org.apache.spark.sql.hive.HiveContext
 import org.apache.spark.sql.hive.HiveContext

 scala>

 scala> val ctx = sqlContext.asInstanceOf[HiveContext]
 ctx: org.apache.spark.sql.hive.HiveContext = org.apache.spark.sql.hive.HiveContext@73c6e165

 scala> import ctx.implicits._
 import ctx.implicits._

 scala>

 scala> // Test data

 scala> val json = """{"category" : "A", "num" : 5}"""
 json: String = {"category" : "A", "num" : 5}

 scala>

 scala> // Load test data in a table called test

 scala> val path = sys.env("HOME") + "/test_data.jsonlines"
 path: String = /home/ubuntu/test_data.jsonlines

 scala> new PrintWriter(path) { write(json); close }
 res0: java.io.PrintWriter = $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anon$1@1d7e57f2

 scala> ctx.read.json("file://" + path).saveAsTable("test")
 warning: there were 1 deprecation warning(s); re-run with -deprecation for details
 15/07/25 15:52:02 INFO MemoryStore: ensureFreeSpace(112568) called with curMem=0, maxMem=28894769971
 15/07/25 15:52:02 INFO MemoryStore: Block broadcast_0 stored as values in memory (estimated size 109.9 KB, free 26.9 GB)
 15/07/25 15:52:02 INFO MemoryStore: ensureFreeSpace(19865) called with curMem=112568, maxMem=28894769971
 15/07/25 15:52:02 INFO MemoryStore: Block broadcast_0_piece0 stored as bytes in memory (estimated size 19.4 KB, free 26.9 GB)
 15/07/25 15:52:02 INFO BlockManagerInfo: Added broadcast_0_piece0 in memory on localhost:54164 (size: 19.4 KB, free: 26.9 GB)
 15/07/25 15:52:02 INFO SparkContext: Created broadcast 0 from json at <console>:30
 15/07/25 15:52:02 INFO FileInputFormat: Total input paths to process : 1
 15/07/25 15:52:02 INFO SparkContext: Starting job: json at <console>:30
 15/07/25 15:52:02 INFO DAGScheduler: Got job 0 (json at <console>:30) with 2 output partitions (allowLocal=false)
 15/07/25 15:52:02 INFO DAGScheduler: Final stage: ResultStage 0(json at <console>:30)
 15/07/25 15:52:02 INFO DAGScheduler: Parents of final stage: List()
 15/07/25 15:52:02 INFO DAGScheduler: Missing parents: List()
 15/07/25 15:52:02 INFO DAGScheduler: Submitting ResultStage 0 (MapPartitionsRDD[3] at json at <console>:30), which has no missing parents
 15/07/25 15:52:02 INFO MemoryStore: ensureFreeSpace(4352) called with curMem=132433, maxMem=28894769971
 15/07/25 15:52:02 INFO MemoryStore: Block broadcast_1 stored as values in memory (estimated size 4.3 KB, free 26.9 GB)
 15/07/25 15:52:02 INFO MemoryStore: ensureFreeSpace(2396) called with curMem=136785, maxMem=28894769971
 15/07/25 15:52:02 INFO MemoryStore: Block broadcast_1_piece0 stored as bytes in memory (estimated size 2.3 KB, free 26.9 GB)
 15/07/25 15:52:02 INFO BlockManagerInfo: Added broadcast_1_piece0 in memory on localhost:54164 (size: 2.3 KB, free: 26.9 GB)
 15/07/25 15:52:02 INFO SparkContext: Created broadcast 1 from broadcast at DAGScheduler.scala:874
 15/07/25 15:52:02 INFO DAGScheduler: Submitting 2 missing tasks from ResultStage 0 (MapPartitionsRDD[3] at json at <console>:30)
 15/07/25 15:52:02 INFO TaskSchedulerImpl: Adding task set 0.0 with 2 tasks
 15/07/25 15:52:02 INFO TaskSetManager: Starting task 0.0 in stage 0.0 (TID 0, localhost, PROCESS_LOCAL, 1569 bytes)
 15/07/25 15:52:02 INFO TaskSetManager: Starting task 1.0 in stage 0.0 (TID 1, localhost, PROCESS_LOCAL, 1569 bytes)
 15/07/25 15:52:02 INFO Executor: Running task 0.0 in stage 0.0 (TID 0)
 15/07/25 15:52:02 INFO Executor: Running task 1.0 in stage 0.0 (TID 1)
 15/07/25 15:52:02 INFO Executor: Fetching http://10.88.50.154:56500/jars/com.databricks_spark-csv_2.10-1.0.3.jar with timestamp 1437839496834
 15/07/25 15:52:02 INFO Utils: Fetching http://10.88.50.154:56500/jars/com.databricks_spark-csv_2.10-1.0.3.jar to /data/spark/tmp/spark-bf5752a7-9e85-40dd-8911-6890f416397b/userFiles-30683ff9-d308-48cf-bc4d-951eacfff698/fetchFileTemp2853300902354407035.tmp
 15/07/25 15:52:02 INFO Executor: Adding file:/data/spark/tmp/spark-bf5752a7-9e85-40dd-8911-6890f416397b/userFiles-30683ff9-d308-48cf-bc4d-951eacfff698/com.databricks_spark-csv_2.10-1.0.3.jar to class loader
 15/07/25 15:52:02 INFO Executor: Fetching http://10.88.50.154:56500/jars/org.apache.commons_commons-csv-1.1.jar with timestamp 1437839496835
 15/07/25 15:52:02 INFO Utils: Fetching http://10.88.50.154:56500/jars/org.apache.commons_commons-csv-1.1.jar to /data/spark/tmp/spark-bf5752a7-9e85-40dd-8911-6890f416397b/userFiles-30683ff9-d308-48cf-bc4d-951eacfff698/fetchFileTemp6237388167733059447.tmp
 15/07/25 15:52:02 INFO Executor: Adding file:/data/spark/tmp/spark-bf5752a7-9e85-40dd-8911-6890f416397b/userFiles-30683ff9-d308-48cf-bc4d-951eacfff698/org.apache.commons_commons-csv-1.1.jar to class loader
 15/07/25 15:52:02 INFO HadoopRDD: Input split: file:/home/ubuntu/test_data.jsonlines:0+14
 15/07/25 15:52:02 INFO HadoopRDD: Input split: file:/home/ubuntu/test_data.jsonlines:14+15
 15/07/25 15:52:02 INFO deprecation: mapred.tip.id is deprecated. Instead, use mapreduce.task.id
 15/07/25 15:52:02 INFO deprecation: mapred.task.id is deprecated. Instead, use mapreduce.task.attempt.id
 15/07/25 15:52:02 INFO deprecation: mapred.task.is.map is deprecated. Instead, use mapreduce.task.ismap
 15/07/25 15:52:02 INFO deprecation: mapred.task.partition is deprecated. Instead, use mapreduce.task.partition
 15/07/25 15:52:02 INFO deprecation: mapred.job.id is deprecated. Instead, use mapreduce.job.id
 15/07/25 15:52:02 INFO Executor: Finished task 1.0 in stage 0.0 (TID 1). 2137 bytes result sent to driver
 15/07/25 15:52:02 INFO TaskSetManager: Finished task 1.0 in stage 0.0 (TID 1) in 277 ms on localhost (1/2)
 15/07/25 15:52:02 INFO Executor: Finished task 0.0 in stage 0.0 (TID 0). 2597 bytes result sent to driver
 15/07/25 15:52:02 INFO TaskSetManager: Finished task 0.0 in stage 0.0 (TID 0) in 299 ms on localhost (2/2)
 15/07/25 15:52:02 INFO DAGScheduler: ResultStage 0 (json at <console>:30) finished in 0.306 s
 15/07/25 15:52:02 INFO TaskSchedulerImpl: Removed TaskSet 0.0, whose tasks have all completed, from pool
 15/07/25 15:52:02 INFO DAGScheduler: Job 0 finished: json at <console>:30, took 0.347843 s
 15/07/25 15:52:02 INFO HiveContext: Initializing HiveMetastoreConnection version 0.13.1 using Spark classes.
 15/07/25 15:52:03 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
 15/07/25 15:52:03 INFO HiveMetaStore: 0: Opening raw store with implemenation class:org.apache.hadoop.hive.metastore.ObjectStore
 15/07/25 15:52:03 INFO ObjectStore: ObjectStore, initialize called
 15/07/25 15:52:03 INFO Persistence: Property datanucleus.cache.level2 unknown - will be ignored
 15/07/25 15:52:03 INFO Persistence: Property hive.metastore.integral.jdo.pushdown unknown - will be ignored
 15/07/25 15:52:03 WARN Connection: BoneCP specified but not present in CLASSPATH (or one of dependencies)
 15/07/25 15:52:03 WARN Connection: BoneCP specified but not present in CLASSPATH (or one of dependencies)
 15/07/25 15:52:04 INFO ObjectStore: Setting MetaStore object pin classes with hive.metastore.cache.pinobjtypes="Table,StorageDescriptor,SerDeInfo,Partition,Database,Type,FieldSchema,Order"
 15/07/25 15:52:04 INFO MetaStoreDirectSql: MySQL check failed, assuming we are not on mysql: Lexical error at line 1, column 5.  Encountered: "@" (64), after : "".
 15/07/25 15:52:04 INFO Datastore: The class "org.apache.hadoop.hive.metastore.model.MFieldSchema" is tagged as "embedded-only" so does not have its own datastore table.
 15/07/25 15:52:04 INFO Datastore: The class "org.apache.hadoop.hive.metastore.model.MOrder" is tagged as "embedded-only" so does not have its own datastore table.
 15/07/25 15:52:04 INFO Datastore: The class "org.apache.hadoop.hive.metastore.model.MFieldSchema" is tagged as "embedded-only" so does not have its own datastore table.
 15/07/25 15:52:04 INFO Datastore: The class "org.apache.hadoop.hive.metastore.model.MOrder" is tagged as "embedded-only" so does not have its own datastore table.
 15/07/25 15:52:05 INFO Query: Reading in results for query "org.datanucleus.store.rdbms.query.SQLQuery@0" since the connection used is closing
 15/07/25 15:52:05 INFO ObjectStore: Initialized ObjectStore
 15/07/25 15:52:05 INFO HiveMetaStore: Added admin role in metastore
 15/07/25 15:52:05 INFO HiveMetaStore: Added public role in metastore
 15/07/25 15:52:05 INFO HiveMetaStore: No user is added in admin role, since config is empty
 15/07/25 15:52:05 INFO SessionState: No Tez session required at this point. hive.execution.engine=mr.
 15/07/25 15:52:05 INFO HiveMetaStore: 0: get_table : db=default tbl=test
 15/07/25 15:52:05 INFO audit: ugi=ubuntu	ip=unknown-ip-addr	cmd=get_table : db=default tbl=test
 15/07/25 15:52:05 INFO HiveMetaStore: 0: get_database: default
 15/07/25 15:52:05 INFO audit: ugi=ubuntu	ip=unknown-ip-addr	cmd=get_database: default
 15/07/25 15:52:05 INFO HiveMetaStore: 0: get_table : db=default tbl=test
 15/07/25 15:52:05 INFO audit: ugi=ubuntu	ip=unknown-ip-addr	cmd=get_table : db=default tbl=test
 15/07/25 15:52:05 INFO MemoryStore: ensureFreeSpace(294808) called with curMem=139181, maxMem=28894769971
 15/07/25 15:52:05 INFO MemoryStore: Block broadcast_2 stored as values in memory (estimated size 287.9 KB, free 26.9 GB)
 15/07/25 15:52:05 INFO MemoryStore: ensureFreeSpace(19865) called with curMem=433989, maxMem=28894769971
 15/07/25 15:52:05 INFO MemoryStore: Block broadcast_2_piece0 stored as bytes in memory (estimated size 19.4 KB, free 26.9 GB)
 15/07/25 15:52:05 INFO BlockManagerInfo: Added broadcast_2_piece0 in memory on localhost:54164 (size: 19.4 KB, free: 26.9 GB)
 15/07/25 15:52:05 INFO SparkContext: Created broadcast 2 from saveAsTable at <console>:30
 15/07/25 15:52:05 INFO ParquetRelation2: Using default output committer for Parquet: parquet.hadoop.ParquetOutputCommitter
 15/07/25 15:52:06 INFO DefaultWriterContainer: Using user defined output committer class parquet.hadoop.ParquetOutputCommitter
 15/07/25 15:52:06 ERROR FileOutputCommitter: Mkdirs failed to create file:/user/hive/warehouse/test/_temporary/0
 15/07/25 15:52:06 INFO FileInputFormat: Total input paths to process : 1
 15/07/25 15:52:06 INFO SparkContext: Starting job: saveAsTable at <console>:30
 15/07/25 15:52:06 INFO DAGScheduler: Got job 1 (saveAsTable at <console>:30) with 2 output partitions (allowLocal=false)
 15/07/25 15:52:06 INFO DAGScheduler: Final stage: ResultStage 1(saveAsTable at <console>:30)
 15/07/25 15:52:06 INFO DAGScheduler: Parents of final stage: List()
 15/07/25 15:52:06 INFO DAGScheduler: Missing parents: List()
 15/07/25 15:52:06 INFO DAGScheduler: Submitting ResultStage 1 (MapPartitionsRDD[6] at saveAsTable at <console>:30), which has no missing parents
 15/07/25 15:52:06 INFO MemoryStore: ensureFreeSpace(67792) called with curMem=453854, maxMem=28894769971
 15/07/25 15:52:06 INFO MemoryStore: Block broadcast_3 stored as values in memory (estimated size 66.2 KB, free 26.9 GB)
 15/07/25 15:52:06 INFO MemoryStore: ensureFreeSpace(24065) called with curMem=521646, maxMem=28894769971
 15/07/25 15:52:06 INFO MemoryStore: Block broadcast_3_piece0 stored as bytes in memory (estimated size 23.5 KB, free 26.9 GB)
 15/07/25 15:52:06 INFO BlockManagerInfo: Added broadcast_3_piece0 in memory on localhost:54164 (size: 23.5 KB, free: 26.9 GB)
 15/07/25 15:52:06 INFO SparkContext: Created broadcast 3 from broadcast at DAGScheduler.scala:874
 15/07/25 15:52:06 INFO DAGScheduler: Submitting 2 missing tasks from ResultStage 1 (MapPartitionsRDD[6] at saveAsTable at <console>:30)
 15/07/25 15:52:06 INFO TaskSchedulerImpl: Adding task set 1.0 with 2 tasks
 15/07/25 15:52:06 INFO TaskSetManager: Starting task 0.0 in stage 1.0 (TID 2, localhost, PROCESS_LOCAL, 1569 bytes)
 15/07/25 15:52:06 INFO TaskSetManager: Starting task 1.0 in stage 1.0 (TID 3, localhost, PROCESS_LOCAL, 1569 bytes)
 15/07/25 15:52:06 INFO Executor: Running task 0.0 in stage 1.0 (TID 2)
 15/07/25 15:52:06 INFO Executor: Running task 1.0 in stage 1.0 (TID 3)
 15/07/25 15:52:06 INFO HadoopRDD: Input split: file:/home/ubuntu/test_data.jsonlines:14+15
 15/07/25 15:52:06 INFO HadoopRDD: Input split: file:/home/ubuntu/test_data.jsonlines:0+14
 15/07/25 15:52:06 INFO DefaultWriterContainer: Using user defined output committer class parquet.hadoop.ParquetOutputCommitter
 15/07/25 15:52:06 INFO DefaultWriterContainer: Using user defined output committer class parquet.hadoop.ParquetOutputCommitter
 15/07/25 15:52:06 ERROR InsertIntoHadoopFsRelation: Aborting task.
 java.io.IOException: Mkdirs failed to create file:/user/hive/warehouse/test/_temporary/0/_temporary/attempt_201507251552_0001_m_000001_0 (exists=false, cwd=file:/home/ubuntu)
 	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:442)
 	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:428)
 	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:908)
 	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:889)
 	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:786)
 	at parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:154)
 	at parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:279)
 	at parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:252)
 	at org.apache.spark.sql.parquet.ParquetOutputWriter.<init>(newParquet.scala:83)
 	at org.apache.spark.sql.parquet.ParquetRelation2$$anon$4.newInstance(newParquet.scala:229)
 	at org.apache.spark.sql.sources.DefaultWriterContainer.initWriters(commands.scala:470)
 	at org.apache.spark.sql.sources.BaseWriterContainer.executorSideSetup(commands.scala:360)
 	at org.apache.spark.sql.sources.InsertIntoHadoopFsRelation.org$apache$spark$sql$sources$InsertIntoHadoopFsRelation$$writeRows$1(commands.scala:172)
 	at org.apache.spark.sql.sources.InsertIntoHadoopFsRelation$$anonfun$insert$1.apply(commands.scala:160)
 	at org.apache.spark.sql.sources.InsertIntoHadoopFsRelation$$anonfun$insert$1.apply(commands.scala:160)
 	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:63)
 	at org.apache.spark.scheduler.Task.run(Task.scala:70)
 	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:213)
 	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
 	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
 	at java.lang.Thread.run(Thread.java:745)
 15/07/25 15:52:06 ERROR InsertIntoHadoopFsRelation: Aborting task.
 java.io.IOException: Mkdirs failed to create file:/user/hive/warehouse/test/_temporary/0/_temporary/attempt_201507251552_0001_m_000000_0 (exists=false, cwd=file:/home/ubuntu)
 	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:442)
 	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:428)
 	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:908)
 	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:889)
 	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:786)
 	at parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:154)
 	at parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:279)
 	at parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:252)
 	at org.apache.spark.sql.parquet.ParquetOutputWriter.<init>(newParquet.scala:83)
 	at org.apache.spark.sql.parquet.ParquetRelation2$$anon$4.newInstance(newParquet.scala:229)
 	at org.apache.spark.sql.sources.DefaultWriterContainer.initWriters(commands.scala:470)
 	at org.apache.spark.sql.sources.BaseWriterContainer.executorSideSetup(commands.scala:360)
 	at org.apache.spark.sql.sources.InsertIntoHadoopFsRelation.org$apache$spark$sql$sources$InsertIntoHadoopFsRelation$$writeRows$1(commands.scala:172)
 	at org.apache.spark.sql.sources.InsertIntoHadoopFsRelation$$anonfun$insert$1.apply(commands.scala:160)
 	at org.apache.spark.sql.sources.InsertIntoHadoopFsRelation$$anonfun$insert$1.apply(commands.scala:160)
 	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:63)
 	at org.apache.spark.scheduler.Task.run(Task.scala:70)
 	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:213)
 	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
 	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
 	at java.lang.Thread.run(Thread.java:745)
 15/07/25 15:52:06 WARN FileOutputCommitter: Could not delete file:/user/hive/warehouse/test/_temporary/0/_temporary/attempt_201507251552_0001_m_000001_0
 15/07/25 15:52:06 WARN FileOutputCommitter: Could not delete file:/user/hive/warehouse/test/_temporary/0/_temporary/attempt_201507251552_0001_m_000000_0
 15/07/25 15:52:06 ERROR DefaultWriterContainer: Task attempt attempt_201507251552_0001_m_000001_0 aborted.
 15/07/25 15:52:06 ERROR DefaultWriterContainer: Task attempt attempt_201507251552_0001_m_000000_0 aborted.
 15/07/25 15:52:06 ERROR Executor: Exception in task 1.0 in stage 1.0 (TID 3)
 org.apache.spark.SparkException: Task failed while writing rows.
 	at org.apache.spark.sql.sources.InsertIntoHadoopFsRelation.org$apache$spark$sql$sources$InsertIntoHadoopFsRelation$$writeRows$1(commands.scala:191)
 	at org.apache.spark.sql.sources.InsertIntoHadoopFsRelation$$anonfun$insert$1.apply(commands.scala:160)
 	at org.apache.spark.sql.sources.InsertIntoHadoopFsRelation$$anonfun$insert$1.apply(commands.scala:160)
 	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:63)
 	at org.apache.spark.scheduler.Task.run(Task.scala:70)
 	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:213)
 	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
 	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
 	at java.lang.Thread.run(Thread.java:745)
 Caused by: java.io.IOException: Mkdirs failed to create file:/user/hive/warehouse/test/_temporary/0/_temporary/attempt_201507251552_0001_m_000001_0 (exists=false, cwd=file:/home/ubuntu)
 	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:442)
 	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:428)
 	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:908)
 	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:889)
 	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:786)
 	at parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:154)
 	at parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:279)
 	at parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:252)
 	at org.apache.spark.sql.parquet.ParquetOutputWriter.<init>(newParquet.scala:83)
 	at org.apache.spark.sql.parquet.ParquetRelation2$$anon$4.newInstance(newParquet.scala:229)
 	at org.apache.spark.sql.sources.DefaultWriterContainer.initWriters(commands.scala:470)
 	at org.apache.spark.sql.sources.BaseWriterContainer.executorSideSetup(commands.scala:360)
 	at org.apache.spark.sql.sources.InsertIntoHadoopFsRelation.org$apache$spark$sql$sources$InsertIntoHadoopFsRelation$$writeRows$1(commands.scala:172)
 	... 8 more
 15/07/25 15:52:06 ERROR Executor: Exception in task 0.0 in stage 1.0 (TID 2)
 org.apache.spark.SparkException: Task failed while writing rows.
 	at org.apache.spark.sql.sources.InsertIntoHadoopFsRelation.org$apache$spark$sql$sources$InsertIntoHadoopFsRelation$$writeRows$1(commands.scala:191)
 	at org.apache.spark.sql.sources.InsertIntoHadoopFsRelation$$anonfun$insert$1.apply(commands.scala:160)
 	at org.apache.spark.sql.sources.InsertIntoHadoopFsRelation$$anonfun$insert$1.apply(commands.scala:160)
 	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:63)
 	at org.apache.spark.scheduler.Task.run(Task.scala:70)
 	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:213)
 	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
 	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
 	at java.lang.Thread.run(Thread.java:745)
 Caused by: java.io.IOException: Mkdirs failed to create file:/user/hive/warehouse/test/_temporary/0/_temporary/attempt_201507251552_0001_m_000000_0 (exists=false, cwd=file:/home/ubuntu)
 	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:442)
 	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:428)
 	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:908)
 	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:889)
 	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:786)
 	at parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:154)
 	at parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:279)
 	at parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:252)
 	at org.apache.spark.sql.parquet.ParquetOutputWriter.<init>(newParquet.scala:83)
 	at org.apache.spark.sql.parquet.ParquetRelation2$$anon$4.newInstance(newParquet.scala:229)
 	at org.apache.spark.sql.sources.DefaultWriterContainer.initWriters(commands.scala:470)
 	at org.apache.spark.sql.sources.BaseWriterContainer.executorSideSetup(commands.scala:360)
 	at org.apache.spark.sql.sources.InsertIntoHadoopFsRelation.org$apache$spark$sql$sources$InsertIntoHadoopFsRelation$$writeRows$1(commands.scala:172)
 	... 8 more
 15/07/25 15:52:06 WARN TaskSetManager: Lost task 0.0 in stage 1.0 (TID 2, localhost): org.apache.spark.SparkException: Task failed while writing rows.
 	at org.apache.spark.sql.sources.InsertIntoHadoopFsRelation.org$apache$spark$sql$sources$InsertIntoHadoopFsRelation$$writeRows$1(commands.scala:191)
 	at org.apache.spark.sql.sources.InsertIntoHadoopFsRelation$$anonfun$insert$1.apply(commands.scala:160)
 	at org.apache.spark.sql.sources.InsertIntoHadoopFsRelation$$anonfun$insert$1.apply(commands.scala:160)
 	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:63)
 	at org.apache.spark.scheduler.Task.run(Task.scala:70)
 	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:213)
 	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
 	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
 	at java.lang.Thread.run(Thread.java:745)
 Caused by: java.io.IOException: Mkdirs failed to create file:/user/hive/warehouse/test/_temporary/0/_temporary/attempt_201507251552_0001_m_000000_0 (exists=false, cwd=file:/home/ubuntu)
 	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:442)
 	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:428)
 	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:908)
 	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:889)
 	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:786)
 	at parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:154)
 	at parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:279)
 	at parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:252)
 	at org.apache.spark.sql.parquet.ParquetOutputWriter.<init>(newParquet.scala:83)
 	at org.apache.spark.sql.parquet.ParquetRelation2$$anon$4.newInstance(newParquet.scala:229)
 	at org.apache.spark.sql.sources.DefaultWriterContainer.initWriters(commands.scala:470)
 	at org.apache.spark.sql.sources.BaseWriterContainer.executorSideSetup(commands.scala:360)
 	at org.apache.spark.sql.sources.InsertIntoHadoopFsRelation.org$apache$spark$sql$sources$InsertIntoHadoopFsRelation$$writeRows$1(commands.scala:172)
 	... 8 more

 15/07/25 15:52:06 ERROR TaskSetManager: Task 0 in stage 1.0 failed 1 times; aborting job
 15/07/25 15:52:06 INFO TaskSchedulerImpl: Removed TaskSet 1.0, whose tasks have all completed, from pool
 15/07/25 15:52:06 INFO TaskSetManager: Lost task 1.0 in stage 1.0 (TID 3) on executor localhost: org.apache.spark.SparkException (Task failed while writing rows.) [duplicate 1]
 15/07/25 15:52:06 INFO TaskSchedulerImpl: Removed TaskSet 1.0, whose tasks have all completed, from pool
 15/07/25 15:52:06 INFO TaskSchedulerImpl: Cancelling stage 1
 15/07/25 15:52:06 INFO DAGScheduler: ResultStage 1 (saveAsTable at <console>:30) failed in 0.100 s
 15/07/25 15:52:06 INFO DAGScheduler: Job 1 failed: saveAsTable at <console>:30, took 0.147694 s
 15/07/25 15:52:06 ERROR InsertIntoHadoopFsRelation: Aborting job.
 org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 1.0 failed 1 times, most recent failure: Lost task 0.0 in stage 1.0 (TID 2, localhost): org.apache.spark.SparkException: Task failed while writing rows.
 	at org.apache.spark.sql.sources.InsertIntoHadoopFsRelation.org$apache$spark$sql$sources$InsertIntoHadoopFsRelation$$writeRows$1(commands.scala:191)
 	at org.apache.spark.sql.sources.InsertIntoHadoopFsRelation$$anonfun$insert$1.apply(commands.scala:160)
 	at org.apache.spark.sql.sources.InsertIntoHadoopFsRelation$$anonfun$insert$1.apply(commands.scala:160)
 	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:63)
 	at org.apache.spark.scheduler.Task.run(Task.scala:70)
 	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:213)
 	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
 	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
 	at java.lang.Thread.run(Thread.java:745)
 Caused by: java.io.IOException: Mkdirs failed to create file:/user/hive/warehouse/test/_temporary/0/_temporary/attempt_201507251552_0001_m_000000_0 (exists=false, cwd=file:/home/ubuntu)
 	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:442)
 	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:428)
 	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:908)
 	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:889)
 	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:786)
 	at parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:154)
 	at parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:279)
 	at parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:252)
 	at org.apache.spark.sql.parquet.ParquetOutputWriter.<init>(newParquet.scala:83)
 	at org.apache.spark.sql.parquet.ParquetRelation2$$anon$4.newInstance(newParquet.scala:229)
 	at org.apache.spark.sql.sources.DefaultWriterContainer.initWriters(commands.scala:470)
 	at org.apache.spark.sql.sources.BaseWriterContainer.executorSideSetup(commands.scala:360)
 	at org.apache.spark.sql.sources.InsertIntoHadoopFsRelation.org$apache$spark$sql$sources$InsertIntoHadoopFsRelation$$writeRows$1(commands.scala:172)
 	... 8 more

 Driver stacktrace:
 	at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1273)
 	at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1264)
 	at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1263)
 	at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
 	at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
 	at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1263)
 	at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:730)
 	at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:730)
 	at scala.Option.foreach(Option.scala:236)
 	at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:730)
 	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1457)
 	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1418)
 	at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
 15/07/25 15:52:06 ERROR DefaultWriterContainer: Job job_201507251552_0000 aborted.
 org.apache.spark.SparkException: Job aborted.
 	at org.apache.spark.sql.sources.InsertIntoHadoopFsRelation.insert(commands.scala:166)
 	at org.apache.spark.sql.sources.InsertIntoHadoopFsRelation.run(commands.scala:139)
 	at org.apache.spark.sql.execution.ExecutedCommand.sideEffectResult$lzycompute(commands.scala:57)
 	at org.apache.spark.sql.execution.ExecutedCommand.sideEffectResult(commands.scala:57)
 	at org.apache.spark.sql.execution.ExecutedCommand.doExecute(commands.scala:68)
 	at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:88)
 	at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:88)
 	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:147)
 	at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:87)
 	at org.apache.spark.sql.SQLContext$QueryExecution.toRdd$lzycompute(SQLContext.scala:950)
 	at org.apache.spark.sql.SQLContext$QueryExecution.toRdd(SQLContext.scala:950)
 	at org.apache.spark.sql.sources.ResolvedDataSource$.apply(ddl.scala:336)
 	at org.apache.spark.sql.hive.execution.CreateMetastoreDataSourceAsSelect.run(commands.scala:245)
 	at org.apache.spark.sql.execution.ExecutedCommand.sideEffectResult$lzycompute(commands.scala:57)
 	at org.apache.spark.sql.execution.ExecutedCommand.sideEffectResult(commands.scala:57)
 	at org.apache.spark.sql.execution.ExecutedCommand.doExecute(commands.scala:68)
 	at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:88)
 	at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:88)
 	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:147)
 	at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:87)
 	at org.apache.spark.sql.SQLContext$QueryExecution.toRdd$lzycompute(SQLContext.scala:950)
 	at org.apache.spark.sql.SQLContext$QueryExecution.toRdd(SQLContext.scala:950)
 	at org.apache.spark.sql.DataFrameWriter.saveAsTable(DataFrameWriter.scala:211)
 	at org.apache.spark.sql.DataFrame.saveAsTable(DataFrame.scala:1531)
 	at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:30)
 	at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:35)
 	at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:37)
 	at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:39)
 	at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:41)
 	at $iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:43)
 	at $iwC$$iwC$$iwC$$iwC.<init>(<console>:45)
 	at $iwC$$iwC$$iwC.<init>(<console>:47)
 	at $iwC$$iwC.<init>(<console>:49)
 	at $iwC.<init>(<console>:51)
 	at <init>(<console>:53)
 	at .<init>(<console>:57)
 	at .<clinit>(<console>)
 	at .<init>(<console>:7)
 	at .<clinit>(<console>)
 	at $print(<console>)
 	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
 	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
 	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
 	at java.lang.reflect.Method.invoke(Method.java:606)
 	at org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:1065)
 	at org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1338)
 	at org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:840)
 	at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:871)
 	at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:819)
 	at org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:857)
 	at org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:902)
 	at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:814)
 	at org.apache.spark.repl.SparkILoop.processLine$1(SparkILoop.scala:657)
 	at org.apache.spark.repl.SparkILoop.innerLoop$1(SparkILoop.scala:665)
 	at org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$loop(SparkILoop.scala:670)
 	at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply$mcZ$sp(SparkILoop.scala:997)
 	at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945)
 	at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945)
 	at scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135)
 	at org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$process(SparkILoop.scala:945)
 	at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:1059)
 	at org.apache.spark.repl.Main$.main(Main.scala:31)
 	at org.apache.spark.repl.Main.main(Main.scala)
 	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
 	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
 	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
 	at java.lang.reflect.Method.invoke(Method.java:606)
 	at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:665)
 	at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:170)
 	at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:193)
 	at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:112)
 	at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
 Caused by: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 1.0 failed 1 times, most recent failure: Lost task 0.0 in stage 1.0 (TID 2, localhost): org.apache.spark.SparkException: Task failed while writing rows.
 	at org.apache.spark.sql.sources.InsertIntoHadoopFsRelation.org$apache$spark$sql$sources$InsertIntoHadoopFsRelation$$writeRows$1(commands.scala:191)
 	at org.apache.spark.sql.sources.InsertIntoHadoopFsRelation$$anonfun$insert$1.apply(commands.scala:160)
 	at org.apache.spark.sql.sources.InsertIntoHadoopFsRelation$$anonfun$insert$1.apply(commands.scala:160)
 	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:63)
 	at org.apache.spark.scheduler.Task.run(Task.scala:70)
 	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:213)
 	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
 	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
 	at java.lang.Thread.run(Thread.java:745)
 Caused by: java.io.IOException: Mkdirs failed to create file:/user/hive/warehouse/test/_temporary/0/_temporary/attempt_201507251552_0001_m_000000_0 (exists=false, cwd=file:/home/ubuntu)
 	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:442)
 	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:428)
 	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:908)
 	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:889)
 	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:786)
 	at parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:154)
 	at parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:279)
 	at parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:252)
 	at org.apache.spark.sql.parquet.ParquetOutputWriter.<init>(newParquet.scala:83)
 	at org.apache.spark.sql.parquet.ParquetRelation2$$anon$4.newInstance(newParquet.scala:229)
 	at org.apache.spark.sql.sources.DefaultWriterContainer.initWriters(commands.scala:470)
 	at org.apache.spark.sql.sources.BaseWriterContainer.executorSideSetup(commands.scala:360)
 	at org.apache.spark.sql.sources.InsertIntoHadoopFsRelation.org$apache$spark$sql$sources$InsertIntoHadoopFsRelation$$writeRows$1(commands.scala:172)
 	... 8 more

 Driver stacktrace:
 	at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1273)
 	at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1264)
 	at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1263)
 	at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
 	at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
 	at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1263)
 	at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:730)
 	at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:730)
 	at scala.Option.foreach(Option.scala:236)
 	at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:730)
 	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1457)
 	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1418)
 	at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)


 scala>
	// This code is designed to be pasted in spark-shell in a *nix environment
	// On Windows, replace sys.env("HOME") with a directory of your choice

	// 1.3.1 code differs from 1.4.1 code only in ctx.jsonFile vs. ctx.read.json

	import java.io.File
	import java.io.PrintWriter
	import org.apache.spark.sql.hive.HiveContext

	val ctx = sqlContext.asInstanceOf[HiveContext]
	import ctx.implicits._

	// Test data
	val json = """{"category" : "A", "num" : 5}"""

	// Load test data in a table called test
	val path = sys.env("HOME") + "/test_data.jsonlines"
	new PrintWriter(path) { write(json); close }
	ctx.jsonFile("file://" + path).saveAsTable("test")

	ctx.sql("select * from test").show

	ctx.sql("drop table test")
	new File(path).delete()