ssimeonov · August 29, 2015 14:25
diff --git a/code.scala b/code.scala
 // This code is designed to be pasted in spark-shell in a *nix environment
 // On Windows, replace sys.env("HOME") with a directory of your choice

 import java.io.File
 import java.io.PrintWriter
 import org.apache.spark.sql.hive.HiveContext

 val ctx = sqlContext.asInstanceOf[HiveContext]
 import ctx.implicits._

 // Test data
 val json = """{"category" : "A", "num" : 5}"""

 // Load test data in a table called test
 val path = sys.env("HOME") + "/test_data.jsonlines"
 new PrintWriter(path) { write(json); close }
 ctx.jsonFile("file://" + path).saveAsTable("test")

 // OK, proof that the data was loaded correctly
 ctx.sql("select * from test").show

 // OK
 ctx.sql("create view view1 as select * from test")

 // org.apache.spark.sql.AnalysisException: cannot resolve 'test.col' given input columns category, num; line 1 pos 7
 ctx.table("view1").printSchema

 // Cleanup
 ctx.sql("drop view view1")
 ctx.sql("drop table test")
 new File(path).delete()
diff --git a/error.txt b/error.txt
 org.apache.spark.sql.AnalysisException: cannot resolve 'test.col' given input columns category, num; line 1 pos 7
 	at org.apache.spark.sql.catalyst.analysis.package$AnalysisErrorAt.failAnalysis(package.scala:42)
 	at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1$$anonfun$apply$1.applyOrElse(CheckAnalysis.scala:54)
 	at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1$$anonfun$apply$1.applyOrElse(CheckAnalysis.scala:46)
 	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformUp$1.apply(TreeNode.scala:252)
 	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformUp$1.apply(TreeNode.scala:252)
 	at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:51)
 	at org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:251)
 	at org.apache.spark.sql.catalyst.plans.QueryPlan.org$apache$spark$sql$catalyst$plans$QueryPlan$$transformExpressionUp$1(QueryPlan.scala:108)
 	at org.apache.spark.sql.catalyst.plans.QueryPlan$$anonfun$2$$anonfun$apply$2.apply(QueryPlan.scala:123)
 	at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244)
 	at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244)
 	at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
 	at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
 	at scala.collection.TraversableLike$class.map(TraversableLike.scala:244)
 	at scala.collection.AbstractTraversable.map(Traversable.scala:105)
 	at org.apache.spark.sql.catalyst.plans.QueryPlan$$anonfun$2.apply(QueryPlan.scala:122)
 	at scala.collection.Iterator$$anon$11.next(Iterator.scala:328)
 	at scala.collection.Iterator$class.foreach(Iterator.scala:727)
 	at scala.collection.AbstractIterator.foreach(Iterator.scala:1157)
 	at scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:48)
 	at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:103)
 	at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:47)
 	at scala.collection.TraversableOnce$class.to(TraversableOnce.scala:273)
 	at scala.collection.AbstractIterator.to(Iterator.scala:1157)
 	at scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:265)
 	at scala.collection.AbstractIterator.toBuffer(Iterator.scala:1157)
 	at scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:252)
 	at scala.collection.AbstractIterator.toArray(Iterator.scala:1157)
 	at org.apache.spark.sql.catalyst.plans.QueryPlan.transformExpressionsUp(QueryPlan.scala:127)
 	at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:46)
 	at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:44)
 	at org.apache.spark.sql.catalyst.trees.TreeNode.foreach(TreeNode.scala:79)
 	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:88)
 	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:88)
 	at scala.collection.immutable.List.foreach(List.scala:318)
 	at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:88)
 	at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$class.checkAnalysis(CheckAnalysis.scala:44)
 	at org.apache.spark.sql.catalyst.analysis.Analyzer.checkAnalysis(Analyzer.scala:40)
 	at org.apache.spark.sql.SQLContext$QueryExecution.assertAnalyzed(SQLContext.scala:1080)
 	at org.apache.spark.sql.DataFrame.<init>(DataFrame.scala:133)
 	at org.apache.spark.sql.DataFrame$.apply(DataFrame.scala:51)
 	at org.apache.spark.sql.SQLContext.table(SQLContext.scala:945)
 	at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:28)
 	at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:33)
 	at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:35)
 	at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:37)
 	at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:39)
 	at $iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:41)
 	at $iwC$$iwC$$iwC$$iwC.<init>(<console>:43)
 	at $iwC$$iwC$$iwC.<init>(<console>:45)
 	at $iwC$$iwC.<init>(<console>:47)
 	at $iwC.<init>(<console>:49)
 	at <init>(<console>:51)
 	at .<init>(<console>:55)
 	at .<clinit>(<console>)
 	at .<init>(<console>:7)
 	at .<clinit>(<console>)
 	at $print(<console>)
 	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
 	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
 	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
 	at java.lang.reflect.Method.invoke(Method.java:606)
 	at org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:1065)
 	at org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1338)
 	at org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:840)
 	at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:871)
 	at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:819)
 	at org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:856)
 	at org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:901)
 	at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:813)
 	at org.apache.spark.repl.SparkILoop.processLine$1(SparkILoop.scala:656)
 	at org.apache.spark.repl.SparkILoop.innerLoop$1(SparkILoop.scala:664)
 	at org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$loop(SparkILoop.scala:669)
 	at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply$mcZ$sp(SparkILoop.scala:996)
 	at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:944)
 	at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:944)
 	at scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135)
 	at org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$process(SparkILoop.scala:944)
 	at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:1058)
 	at org.apache.spark.repl.Main$.main(Main.scala:31)
 	at org.apache.spark.repl.Main.main(Main.scala)
 	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
 	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
 	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
 	at java.lang.reflect.Method.invoke(Method.java:606)
 	at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:569)
 	at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:166)
 	at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:189)
 	at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:110)
 	at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
diff --git a/spark-shell-output.txt b/spark-shell-output.txt
 ubuntu:~$ ~/spark-1.3.1-bin-hadoop2.6/bin/spark-shell --packages com.databricks:spark-csv_2.10:1.0.3 --driver-memory 52g --conf "spark.driver.extraJavaOptions=-XX:MaxPermSize=512m" --conf "spark.local.dir=/data/spark/tmp"
 Ivy Default Cache set to: /home/ubuntu/.ivy2/cache
 The jars for the packages stored in: /home/ubuntu/.ivy2/jars
 :: loading settings :: url = jar:file:/home/ubuntu/spark-1.3.1-bin-hadoop2.6/lib/spark-assembly-1.3.1-hadoop2.6.0.jar!/org/apache/ivy/core/settings/ivysettings.xml
 com.databricks#spark-csv_2.10 added as a dependency
 :: resolving dependencies :: org.apache.spark#spark-submit-parent;1.0
 	confs: [default]
 	found com.databricks#spark-csv_2.10;1.0.3 in central
 	found org.apache.commons#commons-csv;1.1 in central
 :: resolution report :: resolve 212ms :: artifacts dl 6ms
 	:: modules in use:
 	com.databricks#spark-csv_2.10;1.0.3 from central in [default]
 	org.apache.commons#commons-csv;1.1 from central in [default]
 	---------------------------------------------------------------------
 	|                  |            modules            ||   artifacts   |
 	|       conf       | number| search|dwnlded|evicted|| number|dwnlded|
 	---------------------------------------------------------------------
 	|      default     |   2   |   0   |   0   |   0   ||   2   |   0   |
 	---------------------------------------------------------------------
 :: retrieving :: org.apache.spark#spark-submit-parent
 	confs: [default]
 	0 artifacts copied, 2 already retrieved (0kB/5ms)
 log4j:WARN No appenders could be found for logger (org.apache.hadoop.metrics2.lib.MutableMetricsFactory).
 log4j:WARN Please initialize the log4j system properly.
 log4j:WARN See http://logging.apache.org/log4j/1.2/faq.html#noconfig for more info.
 Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties
 15/07/25 14:58:09 INFO SecurityManager: Changing view acls to: ubuntu
 15/07/25 14:58:09 INFO SecurityManager: Changing modify acls to: ubuntu
 15/07/25 14:58:09 INFO SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users with view permissions: Set(ubuntu); users with modify permissions: Set(ubuntu)
 15/07/25 14:58:09 INFO HttpServer: Starting HTTP Server
 15/07/25 14:58:09 INFO Server: jetty-8.y.z-SNAPSHOT
 15/07/25 14:58:09 INFO AbstractConnector: Started [email protected]:54111
 15/07/25 14:58:09 INFO Utils: Successfully started service 'HTTP class server' on port 54111.
 Welcome to
      ____              __
     / __/__  ___ _____/ /__
    _\ \/ _ \/ _ `/ __/  '_/
   /___/ .__/\_,_/_/ /_/\_\   version 1.3.1
      /_/

 Using Scala version 2.10.4 (OpenJDK 64-Bit Server VM, Java 1.7.0_79)
 Type in expressions to have them evaluated.
 Type :help for more information.
 15/07/25 14:58:12 WARN Utils: Your hostname, ip-10-88-50-154 resolves to a loopback address: 127.0.0.1; using 10.88.50.154 instead (on interface eth0)
 15/07/25 14:58:12 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address
 15/07/25 14:58:12 INFO SparkContext: Running Spark version 1.3.1
 15/07/25 14:58:12 WARN SparkConf: In Spark 1.0 and later spark.local.dir will be overridden by the value set by the cluster manager (via SPARK_LOCAL_DIRS in mesos/standalone and LOCAL_DIRS in YARN).
 15/07/25 14:58:12 INFO SecurityManager: Changing view acls to: ubuntu
 15/07/25 14:58:12 INFO SecurityManager: Changing modify acls to: ubuntu
 15/07/25 14:58:12 INFO SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users with view permissions: Set(ubuntu); users with modify permissions: Set(ubuntu)
 15/07/25 14:58:12 INFO Slf4jLogger: Slf4jLogger started
 15/07/25 14:58:12 INFO Remoting: Starting remoting
 15/07/25 14:58:12 INFO Remoting: Remoting started; listening on addresses :[akka.tcp://[email protected]:54685]
 15/07/25 14:58:12 INFO Utils: Successfully started service 'sparkDriver' on port 54685.
 15/07/25 14:58:12 INFO SparkEnv: Registering MapOutputTracker
 15/07/25 14:58:12 INFO SparkEnv: Registering BlockManagerMaster
 15/07/25 14:58:12 INFO DiskBlockManager: Created local directory at /data/spark/tmp/spark-3b83545a-9f1e-4121-bb73-d02aa3ffa248/blockmgr-49462d99-53fc-4c24-8e42-034035bbcdf9
 15/07/25 14:58:12 INFO MemoryStore: MemoryStore started with capacity 26.9 GB
 15/07/25 14:58:12 INFO HttpFileServer: HTTP File server directory is /data/spark/tmp/spark-c0d9732b-13db-4c64-87fc-306e0d82407f/httpd-5c9c57e9-2618-43ad-bc8c-ab07dbc4270a
 15/07/25 14:58:12 INFO HttpServer: Starting HTTP Server
 15/07/25 14:58:12 INFO Server: jetty-8.y.z-SNAPSHOT
 15/07/25 14:58:12 INFO AbstractConnector: Started [email protected]:38885
 15/07/25 14:58:12 INFO Utils: Successfully started service 'HTTP file server' on port 38885.
 15/07/25 14:58:12 INFO SparkEnv: Registering OutputCommitCoordinator
 15/07/25 14:58:12 INFO Server: jetty-8.y.z-SNAPSHOT
 15/07/25 14:58:12 INFO AbstractConnector: Started [email protected]:4040
 15/07/25 14:58:12 INFO Utils: Successfully started service 'SparkUI' on port 4040.
 15/07/25 14:58:12 INFO SparkUI: Started SparkUI at http://10.88.50.154:4040
 15/07/25 14:58:12 INFO SparkContext: Added JAR file:/home/ubuntu/.ivy2/jars/spark-csv_2.10.jar at http://10.88.50.154:38885/jars/spark-csv_2.10.jar with timestamp 1437836292916
 15/07/25 14:58:12 INFO SparkContext: Added JAR file:/home/ubuntu/.ivy2/jars/commons-csv.jar at http://10.88.50.154:38885/jars/commons-csv.jar with timestamp 1437836292916
 15/07/25 14:58:12 INFO Executor: Starting executor ID <driver> on host localhost
 15/07/25 14:58:12 INFO Executor: Using REPL class URI: http://10.88.50.154:54111
 15/07/25 14:58:12 INFO AkkaUtils: Connecting to HeartbeatReceiver: akka.tcp://[email protected]:54685/user/HeartbeatReceiver
 15/07/25 14:58:13 INFO NettyBlockTransferService: Server created on 61000
 15/07/25 14:58:13 INFO BlockManagerMaster: Trying to register BlockManager
 15/07/25 14:58:13 INFO BlockManagerMasterActor: Registering block manager localhost:61000 with 26.9 GB RAM, BlockManagerId(<driver>, localhost, 61000)
 15/07/25 14:58:13 INFO BlockManagerMaster: Registered BlockManager
 15/07/25 14:58:13 INFO SparkILoop: Created spark context..
 Spark context available as sc.
 15/07/25 14:58:13 INFO SparkILoop: Created sql context (with Hive support)..
 SQL context available as sqlContext.

 scala> import java.io.File
 import java.io.File

 scala> import java.io.PrintWriter
 import java.io.PrintWriter

 scala> import org.apache.spark.sql.hive.HiveContext
 import org.apache.spark.sql.hive.HiveContext

 scala>

 scala> val ctx = sqlContext.asInstanceOf[HiveContext]
 ctx: org.apache.spark.sql.hive.HiveContext = org.apache.spark.sql.hive.HiveContext@30a563e9

 scala> import ctx.implicits._
 import ctx.implicits._

 scala>

 scala> val json = """{"category" : "A", "num" : 5}"""
 json: String = {"category" : "A", "num" : 5}

 scala> val path = sys.env("HOME") + "/test_data.jsonlines"
 path: String = /home/ubuntu/test_data.jsonlines

 scala> new PrintWriter(path) { write(json); close }
 res0: java.io.PrintWriter = $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anon$1@6a16ca09

 scala> ctx.jsonFile("file://" + path).saveAsTable("test")
 15/07/25 14:58:51 INFO MemoryStore: ensureFreeSpace(229888) called with curMem=0, maxMem=28894769971
 15/07/25 14:58:51 INFO MemoryStore: Block broadcast_0 stored as values in memory (estimated size 224.5 KB, free 26.9 GB)
 15/07/25 14:58:51 INFO MemoryStore: ensureFreeSpace(25473) called with curMem=229888, maxMem=28894769971
 15/07/25 14:58:51 INFO MemoryStore: Block broadcast_0_piece0 stored as bytes in memory (estimated size 24.9 KB, free 26.9 GB)
 15/07/25 14:58:51 INFO BlockManagerInfo: Added broadcast_0_piece0 in memory on localhost:61000 (size: 24.9 KB, free: 26.9 GB)
 15/07/25 14:58:51 INFO BlockManagerMaster: Updated info of block broadcast_0_piece0
 15/07/25 14:58:51 INFO SparkContext: Created broadcast 0 from textFile at JSONRelation.scala:114
 15/07/25 14:58:51 INFO FileInputFormat: Total input paths to process : 1
 15/07/25 14:58:51 INFO SparkContext: Starting job: isEmpty at JsonRDD.scala:51
 15/07/25 14:58:51 INFO DAGScheduler: Got job 0 (isEmpty at JsonRDD.scala:51) with 1 output partitions (allowLocal=true)
 15/07/25 14:58:51 INFO DAGScheduler: Final stage: Stage 0(isEmpty at JsonRDD.scala:51)
 15/07/25 14:58:51 INFO DAGScheduler: Parents of final stage: List()
 15/07/25 14:58:51 INFO DAGScheduler: Missing parents: List()
 15/07/25 14:58:51 INFO DAGScheduler: Submitting Stage 0 (file:///home/ubuntu/test_data.jsonlines MapPartitionsRDD[1] at textFile at JSONRelation.scala:114), which has no missing parents
 15/07/25 14:58:51 INFO MemoryStore: ensureFreeSpace(2704) called with curMem=255361, maxMem=28894769971
 15/07/25 14:58:51 INFO MemoryStore: Block broadcast_1 stored as values in memory (estimated size 2.6 KB, free 26.9 GB)
 15/07/25 14:58:51 INFO MemoryStore: ensureFreeSpace(1976) called with curMem=258065, maxMem=28894769971
 15/07/25 14:58:51 INFO MemoryStore: Block broadcast_1_piece0 stored as bytes in memory (estimated size 1976.0 B, free 26.9 GB)
 15/07/25 14:58:51 INFO BlockManagerInfo: Added broadcast_1_piece0 in memory on localhost:61000 (size: 1976.0 B, free: 26.9 GB)
 15/07/25 14:58:51 INFO BlockManagerMaster: Updated info of block broadcast_1_piece0
 15/07/25 14:58:51 INFO SparkContext: Created broadcast 1 from broadcast at DAGScheduler.scala:839
 15/07/25 14:58:51 INFO DAGScheduler: Submitting 1 missing tasks from Stage 0 (file:///home/ubuntu/test_data.jsonlines MapPartitionsRDD[1] at textFile at JSONRelation.scala:114)
 15/07/25 14:58:51 INFO TaskSchedulerImpl: Adding task set 0.0 with 1 tasks
 15/07/25 14:58:51 INFO TaskSetManager: Starting task 0.0 in stage 0.0 (TID 0, localhost, PROCESS_LOCAL, 1416 bytes)
 15/07/25 14:58:51 INFO Executor: Running task 0.0 in stage 0.0 (TID 0)
 15/07/25 14:58:51 INFO Executor: Fetching http://10.88.50.154:38885/jars/commons-csv.jar with timestamp 1437836292916
 15/07/25 14:58:52 INFO Utils: Fetching http://10.88.50.154:38885/jars/commons-csv.jar to /data/spark/tmp/spark-608755ae-97a2-4b05-bae9-e15190a7db32/userFiles-4611af44-b06d-4b92-9406-5d23d023494e/fetchFileTemp5233914983551982311.tmp
 15/07/25 14:58:52 INFO Executor: Adding file:/data/spark/tmp/spark-608755ae-97a2-4b05-bae9-e15190a7db32/userFiles-4611af44-b06d-4b92-9406-5d23d023494e/commons-csv.jar to class loader
 15/07/25 14:58:52 INFO Executor: Fetching http://10.88.50.154:38885/jars/spark-csv_2.10.jar with timestamp 1437836292916
 15/07/25 14:58:52 INFO Utils: Fetching http://10.88.50.154:38885/jars/spark-csv_2.10.jar to /data/spark/tmp/spark-608755ae-97a2-4b05-bae9-e15190a7db32/userFiles-4611af44-b06d-4b92-9406-5d23d023494e/fetchFileTemp4239195187520339438.tmp
 15/07/25 14:58:52 INFO Executor: Adding file:/data/spark/tmp/spark-608755ae-97a2-4b05-bae9-e15190a7db32/userFiles-4611af44-b06d-4b92-9406-5d23d023494e/spark-csv_2.10.jar to class loader
 15/07/25 14:58:52 INFO HadoopRDD: Input split: file:/home/ubuntu/test_data.jsonlines:0+14
 15/07/25 14:58:52 INFO deprecation: mapred.tip.id is deprecated. Instead, use mapreduce.task.id
 15/07/25 14:58:52 INFO deprecation: mapred.task.id is deprecated. Instead, use mapreduce.task.attempt.id
 15/07/25 14:58:52 INFO deprecation: mapred.task.is.map is deprecated. Instead, use mapreduce.task.ismap
 15/07/25 14:58:52 INFO deprecation: mapred.task.partition is deprecated. Instead, use mapreduce.task.partition
 15/07/25 14:58:52 INFO deprecation: mapred.job.id is deprecated. Instead, use mapreduce.job.id
 15/07/25 14:58:52 INFO Executor: Finished task 0.0 in stage 0.0 (TID 0). 1824 bytes result sent to driver
 15/07/25 14:58:52 INFO TaskSetManager: Finished task 0.0 in stage 0.0 (TID 0) in 228 ms on localhost (1/1)
 15/07/25 14:58:52 INFO TaskSchedulerImpl: Removed TaskSet 0.0, whose tasks have all completed, from pool
 15/07/25 14:58:52 INFO DAGScheduler: Stage 0 (isEmpty at JsonRDD.scala:51) finished in 0.238 s
 15/07/25 14:58:52 INFO DAGScheduler: Job 0 finished: isEmpty at JsonRDD.scala:51, took 0.272322 s
 15/07/25 14:58:52 INFO SparkContext: Starting job: reduce at JsonRDD.scala:54
 15/07/25 14:58:52 INFO DAGScheduler: Got job 1 (reduce at JsonRDD.scala:54) with 2 output partitions (allowLocal=false)
 15/07/25 14:58:52 INFO DAGScheduler: Final stage: Stage 1(reduce at JsonRDD.scala:54)
 15/07/25 14:58:52 INFO DAGScheduler: Parents of final stage: List()
 15/07/25 14:58:52 INFO DAGScheduler: Missing parents: List()
 15/07/25 14:58:52 INFO DAGScheduler: Submitting Stage 1 (MapPartitionsRDD[3] at map at JsonRDD.scala:54), which has no missing parents
 15/07/25 14:58:52 INFO MemoryStore: ensureFreeSpace(3216) called with curMem=260041, maxMem=28894769971
 15/07/25 14:58:52 INFO MemoryStore: Block broadcast_2 stored as values in memory (estimated size 3.1 KB, free 26.9 GB)
 15/07/25 14:58:52 INFO MemoryStore: ensureFreeSpace(2278) called with curMem=263257, maxMem=28894769971
 15/07/25 14:58:52 INFO MemoryStore: Block broadcast_2_piece0 stored as bytes in memory (estimated size 2.2 KB, free 26.9 GB)
 15/07/25 14:58:52 INFO BlockManagerInfo: Added broadcast_2_piece0 in memory on localhost:61000 (size: 2.2 KB, free: 26.9 GB)
 15/07/25 14:58:52 INFO BlockManagerMaster: Updated info of block broadcast_2_piece0
 15/07/25 14:58:52 INFO SparkContext: Created broadcast 2 from broadcast at DAGScheduler.scala:839
 15/07/25 14:58:52 INFO DAGScheduler: Submitting 2 missing tasks from Stage 1 (MapPartitionsRDD[3] at map at JsonRDD.scala:54)
 15/07/25 14:58:52 INFO TaskSchedulerImpl: Adding task set 1.0 with 2 tasks
 15/07/25 14:58:52 INFO TaskSetManager: Starting task 0.0 in stage 1.0 (TID 1, localhost, PROCESS_LOCAL, 1416 bytes)
 15/07/25 14:58:52 INFO TaskSetManager: Starting task 1.0 in stage 1.0 (TID 2, localhost, PROCESS_LOCAL, 1416 bytes)
 15/07/25 14:58:52 INFO Executor: Running task 0.0 in stage 1.0 (TID 1)
 15/07/25 14:58:52 INFO Executor: Running task 1.0 in stage 1.0 (TID 2)
 15/07/25 14:58:52 INFO HadoopRDD: Input split: file:/home/ubuntu/test_data.jsonlines:0+14
 15/07/25 14:58:52 INFO HadoopRDD: Input split: file:/home/ubuntu/test_data.jsonlines:14+15
 15/07/25 14:58:52 INFO Executor: Finished task 1.0 in stage 1.0 (TID 2). 1807 bytes result sent to driver
 15/07/25 14:58:52 INFO TaskSetManager: Finished task 1.0 in stage 1.0 (TID 2) in 12 ms on localhost (1/2)
 15/07/25 14:58:52 INFO Executor: Finished task 0.0 in stage 1.0 (TID 1). 2096 bytes result sent to driver
 15/07/25 14:58:52 INFO TaskSetManager: Finished task 0.0 in stage 1.0 (TID 1) in 99 ms on localhost (2/2)
 15/07/25 14:58:52 INFO DAGScheduler: Stage 1 (reduce at JsonRDD.scala:54) finished in 0.101 s
 15/07/25 14:58:52 INFO TaskSchedulerImpl: Removed TaskSet 1.0, whose tasks have all completed, from pool
 15/07/25 14:58:52 INFO DAGScheduler: Job 1 finished: reduce at JsonRDD.scala:54, took 0.108922 s
 15/07/25 14:58:52 INFO HiveMetaStore: 0: Opening raw store with implemenation class:org.apache.hadoop.hive.metastore.ObjectStore
 15/07/25 14:58:52 INFO ObjectStore: ObjectStore, initialize called
 15/07/25 14:58:52 INFO Persistence: Property datanucleus.cache.level2 unknown - will be ignored
 15/07/25 14:58:52 INFO Persistence: Property hive.metastore.integral.jdo.pushdown unknown - will be ignored
 15/07/25 14:58:52 WARN Connection: BoneCP specified but not present in CLASSPATH (or one of dependencies)
 15/07/25 14:58:52 WARN Connection: BoneCP specified but not present in CLASSPATH (or one of dependencies)
 15/07/25 14:58:53 INFO ObjectStore: Setting MetaStore object pin classes with hive.metastore.cache.pinobjtypes="Table,StorageDescriptor,SerDeInfo,Partition,Database,Type,FieldSchema,Order"
 15/07/25 14:58:53 INFO MetaStoreDirectSql: MySQL check failed, assuming we are not on mysql: Lexical error at line 1, column 5.  Encountered: "@" (64), after : "".
 15/07/25 14:58:54 INFO Datastore: The class "org.apache.hadoop.hive.metastore.model.MFieldSchema" is tagged as "embedded-only" so does not have its own datastore table.
 15/07/25 14:58:54 INFO Datastore: The class "org.apache.hadoop.hive.metastore.model.MOrder" is tagged as "embedded-only" so does not have its own datastore table.
 15/07/25 14:58:54 INFO Datastore: The class "org.apache.hadoop.hive.metastore.model.MFieldSchema" is tagged as "embedded-only" so does not have its own datastore table.
 15/07/25 14:58:54 INFO Datastore: The class "org.apache.hadoop.hive.metastore.model.MOrder" is tagged as "embedded-only" so does not have its own datastore table.
 15/07/25 14:58:54 INFO Query: Reading in results for query "org.datanucleus.store.rdbms.query.SQLQuery@0" since the connection used is closing
 15/07/25 14:58:54 INFO ObjectStore: Initialized ObjectStore
 15/07/25 14:58:54 INFO HiveMetaStore: Added admin role in metastore
 15/07/25 14:58:54 INFO HiveMetaStore: Added public role in metastore
 15/07/25 14:58:54 INFO HiveMetaStore: No user is added in admin role, since config is empty
 15/07/25 14:58:54 INFO SessionState: No Tez session required at this point. hive.execution.engine=mr.
 15/07/25 14:58:54 INFO SessionState: No Tez session required at this point. hive.execution.engine=mr.
 15/07/25 14:58:55 INFO HiveMetaStore: 0: get_table : db=default tbl=test
 15/07/25 14:58:55 INFO audit: ugi=ubuntu	ip=unknown-ip-addr	cmd=get_table : db=default tbl=test
 15/07/25 14:58:55 INFO HiveMetaStore: 0: get_database: default
 15/07/25 14:58:55 INFO audit: ugi=ubuntu	ip=unknown-ip-addr	cmd=get_database: default
 15/07/25 14:58:55 INFO HiveMetaStore: 0: get_table : db=default tbl=test
 15/07/25 14:58:55 INFO audit: ugi=ubuntu	ip=unknown-ip-addr	cmd=get_table : db=default tbl=test
 15/07/25 14:58:55 INFO MemoryStore: ensureFreeSpace(321846) called with curMem=265535, maxMem=28894769971
 15/07/25 14:58:55 INFO MemoryStore: Block broadcast_3 stored as values in memory (estimated size 314.3 KB, free 26.9 GB)
 15/07/25 14:58:55 INFO MemoryStore: ensureFreeSpace(36241) called with curMem=587381, maxMem=28894769971
 15/07/25 14:58:55 INFO MemoryStore: Block broadcast_3_piece0 stored as bytes in memory (estimated size 35.4 KB, free 26.9 GB)
 15/07/25 14:58:55 INFO BlockManagerInfo: Added broadcast_3_piece0 in memory on localhost:61000 (size: 35.4 KB, free: 26.9 GB)
 15/07/25 14:58:55 INFO BlockManagerMaster: Updated info of block broadcast_3_piece0
 15/07/25 14:58:55 INFO SparkContext: Created broadcast 3 from textFile at JSONRelation.scala:114
 SLF4J: Failed to load class "org.slf4j.impl.StaticLoggerBinder".
 SLF4J: Defaulting to no-operation (NOP) logger implementation
 SLF4J: See http://www.slf4j.org/codes.html#StaticLoggerBinder for further details.
 15/07/25 14:58:55 INFO FileInputFormat: Total input paths to process : 1
 15/07/25 14:58:55 INFO SparkContext: Starting job: runJob at newParquet.scala:689
 15/07/25 14:58:55 INFO DAGScheduler: Got job 2 (runJob at newParquet.scala:689) with 2 output partitions (allowLocal=false)
 15/07/25 14:58:55 INFO DAGScheduler: Final stage: Stage 2(runJob at newParquet.scala:689)
 15/07/25 14:58:55 INFO DAGScheduler: Parents of final stage: List()
 15/07/25 14:58:55 INFO DAGScheduler: Missing parents: List()
 15/07/25 14:58:55 INFO DAGScheduler: Submitting Stage 2 (MapPartitionsRDD[7] at map at JsonRDD.scala:41), which has no missing parents
 15/07/25 14:58:55 INFO MemoryStore: ensureFreeSpace(66216) called with curMem=623622, maxMem=28894769971
 15/07/25 14:58:55 INFO MemoryStore: Block broadcast_4 stored as values in memory (estimated size 64.7 KB, free 26.9 GB)
 15/07/25 14:58:55 INFO MemoryStore: ensureFreeSpace(40069) called with curMem=689838, maxMem=28894769971
 15/07/25 14:58:55 INFO MemoryStore: Block broadcast_4_piece0 stored as bytes in memory (estimated size 39.1 KB, free 26.9 GB)
 15/07/25 14:58:55 INFO BlockManagerInfo: Added broadcast_4_piece0 in memory on localhost:61000 (size: 39.1 KB, free: 26.9 GB)
 15/07/25 14:58:55 INFO BlockManagerMaster: Updated info of block broadcast_4_piece0
 15/07/25 14:58:55 INFO SparkContext: Created broadcast 4 from broadcast at DAGScheduler.scala:839
 15/07/25 14:58:55 INFO DAGScheduler: Submitting 2 missing tasks from Stage 2 (MapPartitionsRDD[7] at map at JsonRDD.scala:41)
 15/07/25 14:58:55 INFO TaskSchedulerImpl: Adding task set 2.0 with 2 tasks
 15/07/25 14:58:55 INFO TaskSetManager: Starting task 0.0 in stage 2.0 (TID 3, localhost, PROCESS_LOCAL, 1416 bytes)
 15/07/25 14:58:55 INFO TaskSetManager: Starting task 1.0 in stage 2.0 (TID 4, localhost, PROCESS_LOCAL, 1416 bytes)
 15/07/25 14:58:55 INFO Executor: Running task 0.0 in stage 2.0 (TID 3)
 15/07/25 14:58:55 INFO Executor: Running task 1.0 in stage 2.0 (TID 4)
 15/07/25 14:58:55 INFO HadoopRDD: Input split: file:/home/ubuntu/test_data.jsonlines:0+14
 15/07/25 14:58:55 INFO HadoopRDD: Input split: file:/home/ubuntu/test_data.jsonlines:14+15
 15/07/25 14:58:55 INFO CodecConfig: Compression: GZIP
 15/07/25 14:58:55 INFO CodecConfig: Compression: GZIP
 15/07/25 14:58:55 INFO ParquetOutputFormat: Parquet block size to 134217728
 15/07/25 14:58:55 INFO ParquetOutputFormat: Parquet block size to 134217728
 15/07/25 14:58:55 INFO ParquetOutputFormat: Parquet page size to 1048576
 15/07/25 14:58:55 INFO ParquetOutputFormat: Parquet page size to 1048576
 15/07/25 14:58:55 INFO ParquetOutputFormat: Parquet dictionary page size to 1048576
 15/07/25 14:58:55 INFO ParquetOutputFormat: Parquet dictionary page size to 1048576
 15/07/25 14:58:55 INFO ParquetOutputFormat: Dictionary is on
 15/07/25 14:58:55 INFO ParquetOutputFormat: Dictionary is on
 15/07/25 14:58:55 INFO ParquetOutputFormat: Validation is off
 15/07/25 14:58:55 INFO ParquetOutputFormat: Validation is off
 15/07/25 14:58:55 INFO ParquetOutputFormat: Writer version is: PARQUET_1_0
 15/07/25 14:58:55 INFO ParquetOutputFormat: Writer version is: PARQUET_1_0
 15/07/25 14:58:55 INFO CodecPool: Got brand-new compressor [.gz]
 15/07/25 14:58:55 INFO CodecPool: Got brand-new compressor [.gz]
 15/07/25 14:58:55 INFO InternalParquetRecordWriter: Flushing mem columnStore to file. allocated memory: 31,457,276
 15/07/25 14:58:55 INFO InternalParquetRecordWriter: Flushing mem columnStore to file. allocated memory: 31,457,297
 15/07/25 14:58:55 INFO FileOutputCommitter: Saved output of task 'attempt_201507251458_0008_r_000001_0' to hdfs://localhost:54310/user/hive/warehouse/test/_temporary/0/task_201507251458_0008_r_000001
 15/07/25 14:58:55 INFO SparkHadoopMapRedUtil: attempt_201507251458_0008_r_000001_0: Committed
 15/07/25 14:58:55 INFO Executor: Finished task 1.0 in stage 2.0 (TID 4). 1792 bytes result sent to driver
 15/07/25 14:58:55 INFO TaskSetManager: Finished task 1.0 in stage 2.0 (TID 4) in 152 ms on localhost (1/2)
 15/07/25 14:58:55 INFO ColumnChunkPageWriteStore: written 56B for [category] BINARY: 1 values, 11B raw, 29B comp, 1 pages, encodings: [BIT_PACKED, RLE, PLAIN]
 15/07/25 14:58:55 INFO ColumnChunkPageWriteStore: written 70B for [num] INT64: 1 values, 14B raw, 29B comp, 1 pages, encodings: [BIT_PACKED, RLE, PLAIN]
 15/07/25 14:58:55 INFO FileOutputCommitter: Saved output of task 'attempt_201507251458_0008_r_000000_0' to hdfs://localhost:54310/user/hive/warehouse/test/_temporary/0/task_201507251458_0008_r_000000
 15/07/25 14:58:55 INFO SparkHadoopMapRedUtil: attempt_201507251458_0008_r_000000_0: Committed
 15/07/25 14:58:55 INFO Executor: Finished task 0.0 in stage 2.0 (TID 3). 1792 bytes result sent to driver
 15/07/25 14:58:55 INFO TaskSetManager: Finished task 0.0 in stage 2.0 (TID 3) in 182 ms on localhost (2/2)
 15/07/25 14:58:55 INFO DAGScheduler: Stage 2 (runJob at newParquet.scala:689) finished in 0.183 s
 15/07/25 14:58:55 INFO TaskSchedulerImpl: Removed TaskSet 2.0, whose tasks have all completed, from pool
 15/07/25 14:58:55 INFO DAGScheduler: Job 2 finished: runJob at newParquet.scala:689, took 0.230330 s
 15/07/25 14:58:55 INFO ParquetFileReader: Initiating action with parallelism: 5
 15/07/25 14:58:56 INFO HiveMetaStore: 0: create_table: Table(tableName:test, dbName:default, owner:ubuntu, createTime:1437836336, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:col, type:array<string>, comment:from deserializer)], location:null, inputFormat:org.apache.hadoop.mapred.SequenceFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe, parameters:{serialization.format=1, path=hdfs://localhost:54310/user/hive/warehouse/test}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{})), partitionKeys:[], parameters:{spark.sql.sources.schema.part.0={"type":"struct","fields":[{"name":"category","type":"string","nullable":true,"metadata":{}},{"name":"num","type":"long","nullable":true,"metadata":{}}]}, EXTERNAL=FALSE, spark.sql.sources.schema.numParts=1, spark.sql.sources.provider=org.apache.spark.sql.parquet}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)
 15/07/25 14:58:56 INFO audit: ugi=ubuntu	ip=unknown-ip-addr	cmd=create_table: Table(tableName:test, dbName:default, owner:ubuntu, createTime:1437836336, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:col, type:array<string>, comment:from deserializer)], location:null, inputFormat:org.apache.hadoop.mapred.SequenceFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe, parameters:{serialization.format=1, path=hdfs://localhost:54310/user/hive/warehouse/test}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{})), partitionKeys:[], parameters:{spark.sql.sources.schema.part.0={"type":"struct","fields":[{"name":"category","type":"string","nullable":true,"metadata":{}},{"name":"num","type":"long","nullable":true,"metadata":{}}]}, EXTERNAL=FALSE, spark.sql.sources.schema.numParts=1, spark.sql.sources.provider=org.apache.spark.sql.parquet}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)
 15/07/25 14:58:56 INFO log: Updating table stats fast for test
 15/07/25 14:58:56 INFO log: Updated size of table test to 1530

 scala> ctx.sql("select * from test").show
 15/07/25 14:58:56 INFO ParseDriver: Parsing command: select * from test
 15/07/25 14:58:56 INFO ParseDriver: Parse Completed
 15/07/25 14:58:56 INFO HiveMetaStore: 0: get_table : db=default tbl=test
 15/07/25 14:58:56 INFO audit: ugi=ubuntu	ip=unknown-ip-addr	cmd=get_table : db=default tbl=test
 15/07/25 14:58:56 INFO HiveMetaStore: 0: get_table : db=default tbl=test
 15/07/25 14:58:56 INFO audit: ugi=ubuntu	ip=unknown-ip-addr	cmd=get_table : db=default tbl=test
 15/07/25 14:58:56 INFO MemoryStore: ensureFreeSpace(328206) called with curMem=729907, maxMem=28894769971
 15/07/25 14:58:56 INFO MemoryStore: Block broadcast_5 stored as values in memory (estimated size 320.5 KB, free 26.9 GB)
 15/07/25 14:58:57 INFO MemoryStore: ensureFreeSpace(37143) called with curMem=1058113, maxMem=28894769971
 15/07/25 14:58:57 INFO MemoryStore: Block broadcast_5_piece0 stored as bytes in memory (estimated size 36.3 KB, free 26.9 GB)
 15/07/25 14:58:57 INFO BlockManagerInfo: Added broadcast_5_piece0 in memory on localhost:61000 (size: 36.3 KB, free: 26.9 GB)
 15/07/25 14:58:57 INFO BlockManagerMaster: Updated info of block broadcast_5_piece0
 15/07/25 14:58:57 INFO SparkContext: Created broadcast 5 from NewHadoopRDD at newParquet.scala:478
 15/07/25 14:58:57 INFO deprecation: mapred.max.split.size is deprecated. Instead, use mapreduce.input.fileinputformat.split.maxsize
 15/07/25 14:58:57 INFO deprecation: mapred.min.split.size is deprecated. Instead, use mapreduce.input.fileinputformat.split.minsize
 15/07/25 14:58:57 INFO ParquetRelation2$$anon$1$$anon$2: Using Task Side Metadata Split Strategy
 15/07/25 14:58:57 INFO SparkContext: Starting job: runJob at SparkPlan.scala:122
 15/07/25 14:58:57 INFO DAGScheduler: Got job 3 (runJob at SparkPlan.scala:122) with 1 output partitions (allowLocal=false)
 15/07/25 14:58:57 INFO DAGScheduler: Final stage: Stage 3(runJob at SparkPlan.scala:122)
 15/07/25 14:58:57 INFO DAGScheduler: Parents of final stage: List()
 15/07/25 14:58:57 INFO DAGScheduler: Missing parents: List()
 15/07/25 14:58:57 INFO DAGScheduler: Submitting Stage 3 (MapPartitionsRDD[12] at map at SparkPlan.scala:97), which has no missing parents
 15/07/25 14:58:57 INFO MemoryStore: ensureFreeSpace(2680) called with curMem=1095256, maxMem=28894769971
 15/07/25 14:58:57 INFO MemoryStore: Block broadcast_6 stored as values in memory (estimated size 2.6 KB, free 26.9 GB)
 15/07/25 14:58:57 INFO MemoryStore: ensureFreeSpace(1846) called with curMem=1097936, maxMem=28894769971
 15/07/25 14:58:57 INFO MemoryStore: Block broadcast_6_piece0 stored as bytes in memory (estimated size 1846.0 B, free 26.9 GB)
 15/07/25 14:58:57 INFO BlockManagerInfo: Added broadcast_6_piece0 in memory on localhost:61000 (size: 1846.0 B, free: 26.9 GB)
 15/07/25 14:58:57 INFO BlockManagerMaster: Updated info of block broadcast_6_piece0
 15/07/25 14:58:57 INFO SparkContext: Created broadcast 6 from broadcast at DAGScheduler.scala:839
 15/07/25 14:58:57 INFO DAGScheduler: Submitting 1 missing tasks from Stage 3 (MapPartitionsRDD[12] at map at SparkPlan.scala:97)
 15/07/25 14:58:57 INFO TaskSchedulerImpl: Adding task set 3.0 with 1 tasks
 15/07/25 14:58:57 INFO TaskSetManager: Starting task 0.0 in stage 3.0 (TID 5, localhost, PROCESS_LOCAL, 1642 bytes)
 15/07/25 14:58:57 INFO Executor: Running task 0.0 in stage 3.0 (TID 5)
 15/07/25 14:58:57 INFO ParquetRelation2$$anon$1: Input split: ParquetInputSplit{part: hdfs://localhost:54310/user/hive/warehouse/test/part-r-00001.parquet start: 0 end: 518 length: 518 hosts: [] requestedSchema: message root {
  optional binary category (UTF8);
  optional int64 num;
 }
 readSupportMetadata: {org.apache.spark.sql.parquet.row.metadata={"type":"struct","fields":[{"name":"category","type":"string","nullable":true,"metadata":{}},{"name":"num","type":"long","nullable":true,"metadata":{}}]}, org.apache.spark.sql.parquet.row.requested_schema={"type":"struct","fields":[{"name":"category","type":"string","nullable":true,"metadata":{}},{"name":"num","type":"long","nullable":true,"metadata":{}}]}}}
 15/07/25 14:58:57 WARN ParquetRecordReader: Can not initialize counter due to context is not a instance of TaskInputOutputContext, but is org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl
 15/07/25 14:58:57 INFO InternalParquetRecordReader: RecordReader initialized will read a total of 1 records.
 15/07/25 14:58:57 INFO InternalParquetRecordReader: at row 0. reading next block
 15/07/25 14:58:57 INFO CodecPool: Got brand-new decompressor [.gz]
 15/07/25 14:58:57 INFO InternalParquetRecordReader: block read in memory in 5 ms. row count = 1
 15/07/25 14:58:57 INFO Executor: Finished task 0.0 in stage 3.0 (TID 5). 2025 bytes result sent to driver
 15/07/25 14:58:57 INFO TaskSetManager: Finished task 0.0 in stage 3.0 (TID 5) in 39 ms on localhost (1/1)
 15/07/25 14:58:57 INFO DAGScheduler: Stage 3 (runJob at SparkPlan.scala:122) finished in 0.040 s
 15/07/25 14:58:57 INFO TaskSchedulerImpl: Removed TaskSet 3.0, whose tasks have all completed, from pool
 15/07/25 14:58:57 INFO DAGScheduler: Job 3 finished: runJob at SparkPlan.scala:122, took 0.049380 s
 15/07/25 14:58:57 INFO SparkContext: Starting job: runJob at SparkPlan.scala:122
 15/07/25 14:58:57 INFO DAGScheduler: Got job 4 (runJob at SparkPlan.scala:122) with 1 output partitions (allowLocal=false)
 15/07/25 14:58:57 INFO DAGScheduler: Final stage: Stage 4(runJob at SparkPlan.scala:122)
 15/07/25 14:58:57 INFO DAGScheduler: Parents of final stage: List()
 15/07/25 14:58:57 INFO DAGScheduler: Missing parents: List()
 15/07/25 14:58:57 INFO DAGScheduler: Submitting Stage 4 (MapPartitionsRDD[12] at map at SparkPlan.scala:97), which has no missing parents
 15/07/25 14:58:57 INFO MemoryStore: ensureFreeSpace(2680) called with curMem=1099782, maxMem=28894769971
 15/07/25 14:58:57 INFO MemoryStore: Block broadcast_7 stored as values in memory (estimated size 2.6 KB, free 26.9 GB)
 15/07/25 14:58:57 INFO MemoryStore: ensureFreeSpace(1846) called with curMem=1102462, maxMem=28894769971
 15/07/25 14:58:57 INFO MemoryStore: Block broadcast_7_piece0 stored as bytes in memory (estimated size 1846.0 B, free 26.9 GB)
 15/07/25 14:58:57 INFO BlockManagerInfo: Added broadcast_7_piece0 in memory on localhost:61000 (size: 1846.0 B, free: 26.9 GB)
 15/07/25 14:58:57 INFO BlockManagerMaster: Updated info of block broadcast_7_piece0
 15/07/25 14:58:57 INFO SparkContext: Created broadcast 7 from broadcast at DAGScheduler.scala:839
 15/07/25 14:58:57 INFO DAGScheduler: Submitting 1 missing tasks from Stage 4 (MapPartitionsRDD[12] at map at SparkPlan.scala:97)
 15/07/25 14:58:57 INFO TaskSchedulerImpl: Adding task set 4.0 with 1 tasks
 15/07/25 14:58:57 INFO TaskSetManager: Starting task 0.0 in stage 4.0 (TID 6, localhost, PROCESS_LOCAL, 1641 bytes)
 15/07/25 14:58:57 INFO Executor: Running task 0.0 in stage 4.0 (TID 6)
 15/07/25 14:58:57 INFO ParquetRelation2$$anon$1: Input split: ParquetInputSplit{part: hdfs://localhost:54310/user/hive/warehouse/test/part-r-00002.parquet start: 0 end: 288 length: 288 hosts: [] requestedSchema: message root {
  optional binary category (UTF8);
  optional int64 num;
 }
 readSupportMetadata: {org.apache.spark.sql.parquet.row.metadata={"type":"struct","fields":[{"name":"category","type":"string","nullable":true,"metadata":{}},{"name":"num","type":"long","nullable":true,"metadata":{}}]}, org.apache.spark.sql.parquet.row.requested_schema={"type":"struct","fields":[{"name":"category","type":"string","nullable":true,"metadata":{}},{"name":"num","type":"long","nullable":true,"metadata":{}}]}}}
 15/07/25 14:58:57 WARN ParquetRecordReader: Can not initialize counter due to context is not a instance of TaskInputOutputContext, but is org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl
 15/07/25 14:58:57 INFO InternalParquetRecordReader: RecordReader initialized will read a total of 0 records.
 15/07/25 14:58:57 INFO Executor: Finished task 0.0 in stage 4.0 (TID 6). 1800 bytes result sent to driver
 15/07/25 14:58:57 INFO TaskSetManager: Finished task 0.0 in stage 4.0 (TID 6) in 11 ms on localhost (1/1)
 15/07/25 14:58:57 INFO DAGScheduler: Stage 4 (runJob at SparkPlan.scala:122) finished in 0.011 s
 15/07/25 14:58:57 INFO TaskSchedulerImpl: Removed TaskSet 4.0, whose tasks have all completed, from pool
 15/07/25 14:58:57 INFO DAGScheduler: Job 4 finished: runJob at SparkPlan.scala:122, took 0.017107 s
 category num
 A        5

 scala> ctx.sql("create view view1 as select * from test")
 15/07/25 14:58:57 INFO ParseDriver: Parsing command: create view view1 as select * from test
 15/07/25 14:58:57 INFO ParseDriver: Parse Completed
 15/07/25 14:58:57 INFO PerfLogger: <PERFLOG method=Driver.run from=org.apache.hadoop.hive.ql.Driver>
 15/07/25 14:58:57 INFO PerfLogger: <PERFLOG method=TimeToSubmit from=org.apache.hadoop.hive.ql.Driver>
 15/07/25 14:58:57 INFO Driver: Concurrency mode is disabled, not creating a lock manager
 15/07/25 14:58:57 INFO PerfLogger: <PERFLOG method=compile from=org.apache.hadoop.hive.ql.Driver>
 15/07/25 14:58:57 INFO PerfLogger: <PERFLOG method=parse from=org.apache.hadoop.hive.ql.Driver>
 15/07/25 14:58:57 INFO ParseDriver: Parsing command: create view view1 as select * from test
 15/07/25 14:58:57 INFO ParseDriver: Parse Completed
 15/07/25 14:58:57 INFO PerfLogger: </PERFLOG method=parse start=1437836337356 end=1437836337357 duration=1 from=org.apache.hadoop.hive.ql.Driver>
 15/07/25 14:58:57 INFO PerfLogger: <PERFLOG method=semanticAnalyze from=org.apache.hadoop.hive.ql.Driver>
 15/07/25 14:58:57 INFO SemanticAnalyzer: Starting Semantic Analysis
 15/07/25 14:58:57 INFO SemanticAnalyzer: Creating view view1 position=12
 15/07/25 14:58:57 INFO SemanticAnalyzer: Completed phase 1 of Semantic Analysis
 15/07/25 14:58:57 INFO SemanticAnalyzer: Get metadata for source tables
 15/07/25 14:58:57 INFO HiveMetaStore: 0: get_table : db=default tbl=test
 15/07/25 14:58:57 INFO audit: ugi=ubuntu	ip=unknown-ip-addr	cmd=get_table : db=default tbl=test
 15/07/25 14:58:57 INFO SemanticAnalyzer: Get metadata for subqueries
 15/07/25 14:58:57 INFO SemanticAnalyzer: Get metadata for destination tables
 15/07/25 14:58:57 INFO SemanticAnalyzer: Completed getting MetaData in Semantic Analysis
 15/07/25 14:58:57 INFO HiveMetaStore: 0: get_table : db=default tbl=view1
 15/07/25 14:58:57 INFO audit: ugi=ubuntu	ip=unknown-ip-addr	cmd=get_table : db=default tbl=view1
 15/07/25 14:58:57 INFO Driver: Semantic Analysis Completed
 15/07/25 14:58:57 INFO PerfLogger: </PERFLOG method=semanticAnalyze start=1437836337357 end=1437836337455 duration=98 from=org.apache.hadoop.hive.ql.Driver>
 15/07/25 14:58:57 INFO Driver: Returning Hive schema: Schema(fieldSchemas:[FieldSchema(name:col, type:string, comment:null)], properties:null)
 15/07/25 14:58:57 INFO PerfLogger: </PERFLOG method=compile start=1437836337335 end=1437836337461 duration=126 from=org.apache.hadoop.hive.ql.Driver>
 15/07/25 14:58:57 INFO PerfLogger: <PERFLOG method=Driver.execute from=org.apache.hadoop.hive.ql.Driver>
 15/07/25 14:58:57 INFO Driver: Starting command: create view view1 as select * from test
 15/07/25 14:58:57 INFO PerfLogger: </PERFLOG method=TimeToSubmit start=1437836337334 end=1437836337463 duration=129 from=org.apache.hadoop.hive.ql.Driver>
 15/07/25 14:58:57 INFO PerfLogger: <PERFLOG method=runTasks from=org.apache.hadoop.hive.ql.Driver>
 15/07/25 14:58:57 INFO PerfLogger: <PERFLOG method=task.DDL.Stage-0 from=org.apache.hadoop.hive.ql.Driver>
 15/07/25 14:58:57 INFO HiveMetaStore: 0: get_table : db=default tbl=view1
 15/07/25 14:58:57 INFO audit: ugi=ubuntu	ip=unknown-ip-addr	cmd=get_table : db=default tbl=view1
 15/07/25 14:58:57 INFO HiveMetaStore: 0: create_table: Table(tableName:view1, dbName:default, owner:ubuntu, createTime:1437836337, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:col, type:string, comment:null)], location:null, inputFormat:org.apache.hadoop.mapred.SequenceFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:null, parameters:{}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{})), partitionKeys:[], parameters:{}, viewOriginalText:select * from test, viewExpandedText:select `test`.`col` from `default`.`test`, tableType:VIRTUAL_VIEW)
 15/07/25 14:58:57 INFO audit: ugi=ubuntu	ip=unknown-ip-addr	cmd=create_table: Table(tableName:view1, dbName:default, owner:ubuntu, createTime:1437836337, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:col, type:string, comment:null)], location:null, inputFormat:org.apache.hadoop.mapred.SequenceFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:null, parameters:{}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{})), partitionKeys:[], parameters:{}, viewOriginalText:select * from test, viewExpandedText:select `test`.`col` from `default`.`test`, tableType:VIRTUAL_VIEW)
 15/07/25 14:58:57 INFO PerfLogger: </PERFLOG method=runTasks start=1437836337463 end=1437836337481 duration=18 from=org.apache.hadoop.hive.ql.Driver>
 15/07/25 14:58:57 INFO PerfLogger: </PERFLOG method=Driver.execute start=1437836337461 end=1437836337481 duration=20 from=org.apache.hadoop.hive.ql.Driver>
 15/07/25 14:58:57 INFO Driver: OK
 15/07/25 14:58:57 INFO PerfLogger: <PERFLOG method=releaseLocks from=org.apache.hadoop.hive.ql.Driver>
 15/07/25 14:58:57 INFO PerfLogger: </PERFLOG method=releaseLocks start=1437836337482 end=1437836337482 duration=0 from=org.apache.hadoop.hive.ql.Driver>
 15/07/25 14:58:57 INFO PerfLogger: </PERFLOG method=Driver.run start=1437836337333 end=1437836337482 duration=149 from=org.apache.hadoop.hive.ql.Driver>
 res3: org.apache.spark.sql.DataFrame = [result: string]

 scala> ctx.table("view1").printSchema
 15/07/25 14:58:57 INFO HiveMetaStore: 0: get_table : db=default tbl=view1
 15/07/25 14:58:57 INFO audit: ugi=ubuntu	ip=unknown-ip-addr	cmd=get_table : db=default tbl=view1
 15/07/25 14:58:57 INFO ParseDriver: Parsing command: select `test`.`col` from `default`.`test`
 15/07/25 14:58:57 INFO ParseDriver: Parse Completed
 15/07/25 14:58:57 INFO HiveMetaStore: 0: get_table : db=default tbl=test
 15/07/25 14:58:57 INFO audit: ugi=ubuntu	ip=unknown-ip-addr	cmd=get_table : db=default tbl=test
 org.apache.spark.sql.AnalysisException: cannot resolve 'test.col' given input columns category, num; line 1 pos 7
 	at org.apache.spark.sql.catalyst.analysis.package$AnalysisErrorAt.failAnalysis(package.scala:42)
 	at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1$$anonfun$apply$1.applyOrElse(CheckAnalysis.scala:54)
 	at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1$$anonfun$apply$1.applyOrElse(CheckAnalysis.scala:46)
 	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformUp$1.apply(TreeNode.scala:252)
 	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformUp$1.apply(TreeNode.scala:252)
 	at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:51)
 	at org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:251)
 	at org.apache.spark.sql.catalyst.plans.QueryPlan.org$apache$spark$sql$catalyst$plans$QueryPlan$$transformExpressionUp$1(QueryPlan.scala:108)
 	at org.apache.spark.sql.catalyst.plans.QueryPlan$$anonfun$2$$anonfun$apply$2.apply(QueryPlan.scala:123)
 	at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244)
 	at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244)
 	at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
 	at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
 	at scala.collection.TraversableLike$class.map(TraversableLike.scala:244)
 	at scala.collection.AbstractTraversable.map(Traversable.scala:105)
 	at org.apache.spark.sql.catalyst.plans.QueryPlan$$anonfun$2.apply(QueryPlan.scala:122)
 	at scala.collection.Iterator$$anon$11.next(Iterator.scala:328)
 	at scala.collection.Iterator$class.foreach(Iterator.scala:727)
 	at scala.collection.AbstractIterator.foreach(Iterator.scala:1157)
 	at scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:48)
 	at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:103)
 	at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:47)
 	at scala.collection.TraversableOnce$class.to(TraversableOnce.scala:273)
 	at scala.collection.AbstractIterator.to(Iterator.scala:1157)
 	at scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:265)
 	at scala.collection.AbstractIterator.toBuffer(Iterator.scala:1157)
 	at scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:252)
 	at scala.collection.AbstractIterator.toArray(Iterator.scala:1157)
 	at org.apache.spark.sql.catalyst.plans.QueryPlan.transformExpressionsUp(QueryPlan.scala:127)
 	at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:46)
 	at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:44)
 	at org.apache.spark.sql.catalyst.trees.TreeNode.foreach(TreeNode.scala:79)
 	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:88)
 	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:88)
 	at scala.collection.immutable.List.foreach(List.scala:318)
 	at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:88)
 	at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$class.checkAnalysis(CheckAnalysis.scala:44)
 	at org.apache.spark.sql.catalyst.analysis.Analyzer.checkAnalysis(Analyzer.scala:40)
 	at org.apache.spark.sql.SQLContext$QueryExecution.assertAnalyzed(SQLContext.scala:1080)
 	at org.apache.spark.sql.DataFrame.<init>(DataFrame.scala:133)
 	at org.apache.spark.sql.DataFrame$.apply(DataFrame.scala:51)
 	at org.apache.spark.sql.SQLContext.table(SQLContext.scala:945)
 	at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:28)
 	at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:33)
 	at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:35)
 	at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:37)
 	at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:39)
 	at $iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:41)
 	at $iwC$$iwC$$iwC$$iwC.<init>(<console>:43)
 	at $iwC$$iwC$$iwC.<init>(<console>:45)
 	at $iwC$$iwC.<init>(<console>:47)
 	at $iwC.<init>(<console>:49)
 	at <init>(<console>:51)
 	at .<init>(<console>:55)
 	at .<clinit>(<console>)
 	at .<init>(<console>:7)
 	at .<clinit>(<console>)
 	at $print(<console>)
 	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
 	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
 	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
 	at java.lang.reflect.Method.invoke(Method.java:606)
 	at org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:1065)
 	at org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1338)
 	at org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:840)
 	at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:871)
 	at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:819)
 	at org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:856)
 	at org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:901)
 	at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:813)
 	at org.apache.spark.repl.SparkILoop.processLine$1(SparkILoop.scala:656)
 	at org.apache.spark.repl.SparkILoop.innerLoop$1(SparkILoop.scala:664)
 	at org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$loop(SparkILoop.scala:669)
 	at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply$mcZ$sp(SparkILoop.scala:996)
 	at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:944)
 	at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:944)
 	at scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135)
 	at org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$process(SparkILoop.scala:944)
 	at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:1058)
 	at org.apache.spark.repl.Main$.main(Main.scala:31)
 	at org.apache.spark.repl.Main.main(Main.scala)
 	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
 	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
 	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
 	at java.lang.reflect.Method.invoke(Method.java:606)
 	at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:569)
 	at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:166)
 	at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:189)
 	at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:110)
 	at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)


 scala> ctx.sql("drop view view1")
 15/07/25 14:58:58 INFO ParseDriver: Parsing command: drop view view1
 15/07/25 14:58:58 INFO ParseDriver: Parse Completed
 15/07/25 14:58:58 INFO PerfLogger: <PERFLOG method=Driver.run from=org.apache.hadoop.hive.ql.Driver>
 15/07/25 14:58:58 INFO PerfLogger: <PERFLOG method=TimeToSubmit from=org.apache.hadoop.hive.ql.Driver>
 15/07/25 14:58:58 INFO Driver: Concurrency mode is disabled, not creating a lock manager
 15/07/25 14:58:58 INFO PerfLogger: <PERFLOG method=compile from=org.apache.hadoop.hive.ql.Driver>
 15/07/25 14:58:58 INFO PerfLogger: <PERFLOG method=parse from=org.apache.hadoop.hive.ql.Driver>
 15/07/25 14:58:58 INFO ParseDriver: Parsing command: drop view view1
 15/07/25 14:58:58 INFO ParseDriver: Parse Completed
 15/07/25 14:58:58 INFO PerfLogger: </PERFLOG method=parse start=1437836338046 end=1437836338046 duration=0 from=org.apache.hadoop.hive.ql.Driver>
 15/07/25 14:58:58 INFO PerfLogger: <PERFLOG method=semanticAnalyze from=org.apache.hadoop.hive.ql.Driver>
 15/07/25 14:58:58 INFO HiveMetaStore: 0: get_table : db=default tbl=view1
 15/07/25 14:58:58 INFO audit: ugi=ubuntu	ip=unknown-ip-addr	cmd=get_table : db=default tbl=view1
 15/07/25 14:58:58 INFO Driver: Semantic Analysis Completed
 15/07/25 14:58:58 INFO PerfLogger: </PERFLOG method=semanticAnalyze start=1437836338046 end=1437836338065 duration=19 from=org.apache.hadoop.hive.ql.Driver>
 15/07/25 14:58:58 INFO Driver: Returning Hive schema: Schema(fieldSchemas:null, properties:null)
 15/07/25 14:58:58 INFO PerfLogger: </PERFLOG method=compile start=1437836338046 end=1437836338065 duration=19 from=org.apache.hadoop.hive.ql.Driver>
 15/07/25 14:58:58 INFO PerfLogger: <PERFLOG method=Driver.execute from=org.apache.hadoop.hive.ql.Driver>
 15/07/25 14:58:58 INFO Driver: Starting command: drop view view1
 15/07/25 14:58:58 INFO PerfLogger: </PERFLOG method=TimeToSubmit start=1437836338046 end=1437836338066 duration=20 from=org.apache.hadoop.hive.ql.Driver>
 15/07/25 14:58:58 INFO PerfLogger: <PERFLOG method=runTasks from=org.apache.hadoop.hive.ql.Driver>
 15/07/25 14:58:58 INFO PerfLogger: <PERFLOG method=task.DDL.Stage-0 from=org.apache.hadoop.hive.ql.Driver>
 15/07/25 14:58:58 INFO HiveMetaStore: 0: get_table : db=default tbl=view1
 15/07/25 14:58:58 INFO audit: ugi=ubuntu	ip=unknown-ip-addr	cmd=get_table : db=default tbl=view1
 15/07/25 14:58:58 INFO HiveMetaStore: 0: get_table : db=default tbl=view1
 15/07/25 14:58:58 INFO audit: ugi=ubuntu	ip=unknown-ip-addr	cmd=get_table : db=default tbl=view1
 15/07/25 14:58:58 INFO HiveMetaStore: 0: drop_table : db=default tbl=view1
 15/07/25 14:58:58 INFO audit: ugi=ubuntu	ip=unknown-ip-addr	cmd=drop_table : db=default tbl=view1
 15/07/25 14:58:58 INFO HiveMetaStore: 0: get_table : db=default tbl=view1
 15/07/25 14:58:58 INFO audit: ugi=ubuntu	ip=unknown-ip-addr	cmd=get_table : db=default tbl=view1
 15/07/25 14:58:58 INFO Datastore: The class "org.apache.hadoop.hive.metastore.model.MFieldSchema" is tagged as "embedded-only" so does not have its own datastore table.
 15/07/25 14:58:58 INFO Datastore: The class "org.apache.hadoop.hive.metastore.model.MOrder" is tagged as "embedded-only" so does not have its own datastore table.
 15/07/25 14:58:58 INFO Datastore: The class "org.apache.hadoop.hive.metastore.model.MFieldSchema" is tagged as "embedded-only" so does not have its own datastore table.
 15/07/25 14:58:58 INFO Datastore: The class "org.apache.hadoop.hive.metastore.model.MOrder" is tagged as "embedded-only" so does not have its own datastore table.
 15/07/25 14:58:58 INFO Datastore: The class "org.apache.hadoop.hive.metastore.model.MFieldSchema" is tagged as "embedded-only" so does not have its own datastore table.
 15/07/25 14:58:58 INFO Datastore: The class "org.apache.hadoop.hive.metastore.model.MOrder" is tagged as "embedded-only" so does not have its own datastore table.
 15/07/25 14:58:58 INFO Datastore: The class "org.apache.hadoop.hive.metastore.model.MFieldSchema" is tagged as "embedded-only" so does not have its own datastore table.
 15/07/25 14:58:58 INFO Datastore: The class "org.apache.hadoop.hive.metastore.model.MOrder" is tagged as "embedded-only" so does not have its own datastore table.
 15/07/25 14:58:58 INFO Datastore: The class "org.apache.hadoop.hive.metastore.model.MFieldSchema" is tagged as "embedded-only" so does not have its own datastore table.
 15/07/25 14:58:58 INFO Datastore: The class "org.apache.hadoop.hive.metastore.model.MOrder" is tagged as "embedded-only" so does not have its own datastore table.
 15/07/25 14:58:58 INFO PerfLogger: </PERFLOG method=runTasks start=1437836338066 end=1437836338645 duration=579 from=org.apache.hadoop.hive.ql.Driver>
 15/07/25 14:58:58 INFO PerfLogger: </PERFLOG method=Driver.execute start=1437836338065 end=1437836338645 duration=580 from=org.apache.hadoop.hive.ql.Driver>
 15/07/25 14:58:58 INFO Driver: OK
 15/07/25 14:58:58 INFO PerfLogger: <PERFLOG method=releaseLocks from=org.apache.hadoop.hive.ql.Driver>
 15/07/25 14:58:58 INFO PerfLogger: </PERFLOG method=releaseLocks start=1437836338645 end=1437836338645 duration=0 from=org.apache.hadoop.hive.ql.Driver>
 15/07/25 14:58:58 INFO PerfLogger: </PERFLOG method=Driver.run start=1437836338045 end=1437836338645 duration=600 from=org.apache.hadoop.hive.ql.Driver>
 res5: org.apache.spark.sql.DataFrame = [result: string]

 scala> ctx.sql("drop table test")
 15/07/25 14:58:58 INFO ParseDriver: Parsing command: drop table test
 15/07/25 14:58:58 INFO ParseDriver: Parse Completed
 15/07/25 14:58:58 INFO HiveMetaStore: 0: get_table : db=default tbl=test
 15/07/25 14:58:58 INFO audit: ugi=ubuntu	ip=unknown-ip-addr	cmd=get_table : db=default tbl=test
 15/07/25 14:58:58 INFO PerfLogger: <PERFLOG method=Driver.run from=org.apache.hadoop.hive.ql.Driver>
 15/07/25 14:58:58 INFO PerfLogger: <PERFLOG method=TimeToSubmit from=org.apache.hadoop.hive.ql.Driver>
 15/07/25 14:58:58 INFO Driver: Concurrency mode is disabled, not creating a lock manager
 15/07/25 14:58:58 INFO PerfLogger: <PERFLOG method=compile from=org.apache.hadoop.hive.ql.Driver>
 15/07/25 14:58:58 INFO PerfLogger: <PERFLOG method=parse from=org.apache.hadoop.hive.ql.Driver>
 15/07/25 14:58:58 INFO ParseDriver: Parsing command: DROP TABLE test
 15/07/25 14:58:58 INFO ParseDriver: Parse Completed
 15/07/25 14:58:58 INFO PerfLogger: </PERFLOG method=parse start=1437836338847 end=1437836338848 duration=1 from=org.apache.hadoop.hive.ql.Driver>
 15/07/25 14:58:58 INFO PerfLogger: <PERFLOG method=semanticAnalyze from=org.apache.hadoop.hive.ql.Driver>
 15/07/25 14:58:58 INFO HiveMetaStore: 0: get_table : db=default tbl=test
 15/07/25 14:58:58 INFO audit: ugi=ubuntu	ip=unknown-ip-addr	cmd=get_table : db=default tbl=test
 15/07/25 14:58:58 INFO Driver: Semantic Analysis Completed
 15/07/25 14:58:58 INFO PerfLogger: </PERFLOG method=semanticAnalyze start=1437836338848 end=1437836338860 duration=12 from=org.apache.hadoop.hive.ql.Driver>
 15/07/25 14:58:58 INFO Driver: Returning Hive schema: Schema(fieldSchemas:null, properties:null)
 15/07/25 14:58:58 INFO PerfLogger: </PERFLOG method=compile start=1437836338847 end=1437836338861 duration=14 from=org.apache.hadoop.hive.ql.Driver>
 15/07/25 14:58:58 INFO PerfLogger: <PERFLOG method=Driver.execute from=org.apache.hadoop.hive.ql.Driver>
 15/07/25 14:58:58 INFO Driver: Starting command: DROP TABLE test
 15/07/25 14:58:58 INFO PerfLogger: </PERFLOG method=TimeToSubmit start=1437836338847 end=1437836338861 duration=14 from=org.apache.hadoop.hive.ql.Driver>
 15/07/25 14:58:58 INFO PerfLogger: <PERFLOG method=runTasks from=org.apache.hadoop.hive.ql.Driver>
 15/07/25 14:58:58 INFO PerfLogger: <PERFLOG method=task.DDL.Stage-0 from=org.apache.hadoop.hive.ql.Driver>
 15/07/25 14:58:58 INFO HiveMetaStore: 0: get_table : db=default tbl=test
 15/07/25 14:58:58 INFO audit: ugi=ubuntu	ip=unknown-ip-addr	cmd=get_table : db=default tbl=test
 15/07/25 14:58:58 INFO HiveMetaStore: 0: get_table : db=default tbl=test
 15/07/25 14:58:58 INFO audit: ugi=ubuntu	ip=unknown-ip-addr	cmd=get_table : db=default tbl=test
 15/07/25 14:58:58 INFO HiveMetaStore: 0: drop_table : db=default tbl=test
 15/07/25 14:58:58 INFO audit: ugi=ubuntu	ip=unknown-ip-addr	cmd=drop_table : db=default tbl=test
 15/07/25 14:58:58 INFO HiveMetaStore: 0: get_table : db=default tbl=test
 15/07/25 14:58:58 INFO audit: ugi=ubuntu	ip=unknown-ip-addr	cmd=get_table : db=default tbl=test
 15/07/25 14:58:58 INFO hivemetastoressimpl: deleting  hdfs://localhost:54310/user/hive/warehouse/test
 15/07/25 14:58:58 INFO TrashPolicyDefault: Namenode trash configuration: Deletion interval = 0 minutes, Emptier interval = 0 minutes.
 15/07/25 14:58:58 INFO hivemetastoressimpl: Deleted the diretory hdfs://localhost:54310/user/hive/warehouse/test
 15/07/25 14:58:58 INFO PerfLogger: </PERFLOG method=runTasks start=1437836338861 end=1437836338928 duration=67 from=org.apache.hadoop.hive.ql.Driver>
 15/07/25 14:58:58 INFO PerfLogger: </PERFLOG method=Driver.execute start=1437836338861 end=1437836338928 duration=67 from=org.apache.hadoop.hive.ql.Driver>
 15/07/25 14:58:58 INFO Driver: OK
 15/07/25 14:58:58 INFO PerfLogger: <PERFLOG method=releaseLocks from=org.apache.hadoop.hive.ql.Driver>
 15/07/25 14:58:58 INFO PerfLogger: </PERFLOG method=releaseLocks start=1437836338928 end=1437836338928 duration=0 from=org.apache.hadoop.hive.ql.Driver>
 15/07/25 14:58:58 INFO PerfLogger: </PERFLOG method=Driver.run start=1437836338847 end=1437836338928 duration=81 from=org.apache.hadoop.hive.ql.Driver>
 res6: org.apache.spark.sql.DataFrame = []

 scala> new File(path).delete()
 res7: Boolean = true

 scala>
	// This code is designed to be pasted in spark-shell in a *nix environment
	// On Windows, replace sys.env("HOME") with a directory of your choice

	import java.io.File
	import java.io.PrintWriter
	import org.apache.spark.sql.hive.HiveContext

	val ctx = sqlContext.asInstanceOf[HiveContext]
	import ctx.implicits._

	// Test data
	val json = """{"category" : "A", "num" : 5}"""

	// Load test data in a table called test
	val path = sys.env("HOME") + "/test_data.jsonlines"
	new PrintWriter(path) { write(json); close }
	ctx.jsonFile("file://" + path).saveAsTable("test")

	// OK, proof that the data was loaded correctly
	ctx.sql("select * from test").show

	// OK
	ctx.sql("create view view1 as select * from test")

	// org.apache.spark.sql.AnalysisException: cannot resolve 'test.col' given input columns category, num; line 1 pos 7
	ctx.table("view1").printSchema

	// Cleanup
	ctx.sql("drop view view1")
	ctx.sql("drop table test")
	new File(path).delete()
	org.apache.spark.sql.AnalysisException: cannot resolve 'test.col' given input columns category, num; line 1 pos 7
	at org.apache.spark.sql.catalyst.analysis.package$AnalysisErrorAt.failAnalysis(package.scala:42)
	at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1$$anonfun$apply$1.applyOrElse(CheckAnalysis.scala:54)
	at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1$$anonfun$apply$1.applyOrElse(CheckAnalysis.scala:46)
	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformUp$1.apply(TreeNode.scala:252)
	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformUp$1.apply(TreeNode.scala:252)
	at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:51)
	at org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:251)
	at org.apache.spark.sql.catalyst.plans.QueryPlan.org$apache$spark$sql$catalyst$plans$QueryPlan$$transformExpressionUp$1(QueryPlan.scala:108)
	at org.apache.spark.sql.catalyst.plans.QueryPlan$$anonfun$2$$anonfun$apply$2.apply(QueryPlan.scala:123)
	at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244)
	at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244)
	at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
	at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
	at scala.collection.TraversableLike$class.map(TraversableLike.scala:244)
	at scala.collection.AbstractTraversable.map(Traversable.scala:105)
	at org.apache.spark.sql.catalyst.plans.QueryPlan$$anonfun$2.apply(QueryPlan.scala:122)
	at scala.collection.Iterator$$anon$11.next(Iterator.scala:328)
	at scala.collection.Iterator$class.foreach(Iterator.scala:727)
	at scala.collection.AbstractIterator.foreach(Iterator.scala:1157)
	at scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:48)
	at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:103)
	at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:47)
	at scala.collection.TraversableOnce$class.to(TraversableOnce.scala:273)
	at scala.collection.AbstractIterator.to(Iterator.scala:1157)
	at scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:265)
	at scala.collection.AbstractIterator.toBuffer(Iterator.scala:1157)
	at scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:252)
	at scala.collection.AbstractIterator.toArray(Iterator.scala:1157)
	at org.apache.spark.sql.catalyst.plans.QueryPlan.transformExpressionsUp(QueryPlan.scala:127)
	at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:46)
	at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:44)
	at org.apache.spark.sql.catalyst.trees.TreeNode.foreach(TreeNode.scala:79)
	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:88)
	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:88)
	at scala.collection.immutable.List.foreach(List.scala:318)
	at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:88)
	at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$class.checkAnalysis(CheckAnalysis.scala:44)
	at org.apache.spark.sql.catalyst.analysis.Analyzer.checkAnalysis(Analyzer.scala:40)
	at org.apache.spark.sql.SQLContext$QueryExecution.assertAnalyzed(SQLContext.scala:1080)
	at org.apache.spark.sql.DataFrame.<init>(DataFrame.scala:133)
	at org.apache.spark.sql.DataFrame$.apply(DataFrame.scala:51)
	at org.apache.spark.sql.SQLContext.table(SQLContext.scala:945)
	at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:28)
	at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:33)
	at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:35)
	at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:37)
	at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:39)
	at $iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:41)
	at $iwC$$iwC$$iwC$$iwC.<init>(<console>:43)
	at $iwC$$iwC$$iwC.<init>(<console>:45)
	at $iwC$$iwC.<init>(<console>:47)
	at $iwC.<init>(<console>:49)
	at <init>(<console>:51)
	at .<init>(<console>:55)
	at .<clinit>(<console>)
	at .<init>(<console>:7)
	at .<clinit>(<console>)
	at $print(<console>)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:606)
	at org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:1065)
	at org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1338)
	at org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:840)
	at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:871)
	at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:819)
	at org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:856)
	at org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:901)
	at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:813)
	at org.apache.spark.repl.SparkILoop.processLine$1(SparkILoop.scala:656)
	at org.apache.spark.repl.SparkILoop.innerLoop$1(SparkILoop.scala:664)
	at org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$loop(SparkILoop.scala:669)
	at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply$mcZ$sp(SparkILoop.scala:996)
	at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:944)
	at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:944)
	at scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135)
	at org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$process(SparkILoop.scala:944)
	at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:1058)
	at org.apache.spark.repl.Main$.main(Main.scala:31)
	at org.apache.spark.repl.Main.main(Main.scala)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:606)
	at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:569)
	at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:166)
	at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:189)
	at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:110)
	at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)