Last active
August 29, 2015 14:25
-
-
Save ssimeonov/57164f9d6b928ba0cfde to your computer and use it in GitHub Desktop.
SPARK-9342 Spark SQL problems dealing with views
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// This code is designed to be pasted in spark-shell in a *nix environment | |
// On Windows, replace sys.env("HOME") with a directory of your choice | |
import java.io.File | |
import java.io.PrintWriter | |
import org.apache.spark.sql.hive.HiveContext | |
val ctx = sqlContext.asInstanceOf[HiveContext] | |
import ctx.implicits._ | |
// Test data | |
val json = """{"category" : "A", "num" : 5}""" | |
// Load test data in a table called test | |
val path = sys.env("HOME") + "/test_data.jsonlines" | |
new PrintWriter(path) { write(json); close } | |
ctx.jsonFile("file://" + path).saveAsTable("test") | |
// OK, proof that the data was loaded correctly | |
ctx.sql("select * from test").show | |
// OK | |
ctx.sql("create view view1 as select * from test") | |
// org.apache.spark.sql.AnalysisException: cannot resolve 'test.col' given input columns category, num; line 1 pos 7 | |
ctx.table("view1").printSchema | |
// Cleanup | |
ctx.sql("drop view view1") | |
ctx.sql("drop table test") | |
new File(path).delete() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
org.apache.spark.sql.AnalysisException: cannot resolve 'test.col' given input columns category, num; line 1 pos 7 | |
at org.apache.spark.sql.catalyst.analysis.package$AnalysisErrorAt.failAnalysis(package.scala:42) | |
at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1$$anonfun$apply$1.applyOrElse(CheckAnalysis.scala:54) | |
at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1$$anonfun$apply$1.applyOrElse(CheckAnalysis.scala:46) | |
at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformUp$1.apply(TreeNode.scala:252) | |
at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformUp$1.apply(TreeNode.scala:252) | |
at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:51) | |
at org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:251) | |
at org.apache.spark.sql.catalyst.plans.QueryPlan.org$apache$spark$sql$catalyst$plans$QueryPlan$$transformExpressionUp$1(QueryPlan.scala:108) | |
at org.apache.spark.sql.catalyst.plans.QueryPlan$$anonfun$2$$anonfun$apply$2.apply(QueryPlan.scala:123) | |
at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244) | |
at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244) | |
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) | |
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47) | |
at scala.collection.TraversableLike$class.map(TraversableLike.scala:244) | |
at scala.collection.AbstractTraversable.map(Traversable.scala:105) | |
at org.apache.spark.sql.catalyst.plans.QueryPlan$$anonfun$2.apply(QueryPlan.scala:122) | |
at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) | |
at scala.collection.Iterator$class.foreach(Iterator.scala:727) | |
at scala.collection.AbstractIterator.foreach(Iterator.scala:1157) | |
at scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:48) | |
at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:103) | |
at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:47) | |
at scala.collection.TraversableOnce$class.to(TraversableOnce.scala:273) | |
at scala.collection.AbstractIterator.to(Iterator.scala:1157) | |
at scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:265) | |
at scala.collection.AbstractIterator.toBuffer(Iterator.scala:1157) | |
at scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:252) | |
at scala.collection.AbstractIterator.toArray(Iterator.scala:1157) | |
at org.apache.spark.sql.catalyst.plans.QueryPlan.transformExpressionsUp(QueryPlan.scala:127) | |
at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:46) | |
at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:44) | |
at org.apache.spark.sql.catalyst.trees.TreeNode.foreach(TreeNode.scala:79) | |
at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:88) | |
at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:88) | |
at scala.collection.immutable.List.foreach(List.scala:318) | |
at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:88) | |
at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$class.checkAnalysis(CheckAnalysis.scala:44) | |
at org.apache.spark.sql.catalyst.analysis.Analyzer.checkAnalysis(Analyzer.scala:40) | |
at org.apache.spark.sql.SQLContext$QueryExecution.assertAnalyzed(SQLContext.scala:1080) | |
at org.apache.spark.sql.DataFrame.<init>(DataFrame.scala:133) | |
at org.apache.spark.sql.DataFrame$.apply(DataFrame.scala:51) | |
at org.apache.spark.sql.SQLContext.table(SQLContext.scala:945) | |
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:28) | |
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:33) | |
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:35) | |
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:37) | |
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:39) | |
at $iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:41) | |
at $iwC$$iwC$$iwC$$iwC.<init>(<console>:43) | |
at $iwC$$iwC$$iwC.<init>(<console>:45) | |
at $iwC$$iwC.<init>(<console>:47) | |
at $iwC.<init>(<console>:49) | |
at <init>(<console>:51) | |
at .<init>(<console>:55) | |
at .<clinit>(<console>) | |
at .<init>(<console>:7) | |
at .<clinit>(<console>) | |
at $print(<console>) | |
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) | |
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) | |
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) | |
at java.lang.reflect.Method.invoke(Method.java:606) | |
at org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:1065) | |
at org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1338) | |
at org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:840) | |
at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:871) | |
at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:819) | |
at org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:856) | |
at org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:901) | |
at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:813) | |
at org.apache.spark.repl.SparkILoop.processLine$1(SparkILoop.scala:656) | |
at org.apache.spark.repl.SparkILoop.innerLoop$1(SparkILoop.scala:664) | |
at org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$loop(SparkILoop.scala:669) | |
at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply$mcZ$sp(SparkILoop.scala:996) | |
at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:944) | |
at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:944) | |
at scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135) | |
at org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$process(SparkILoop.scala:944) | |
at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:1058) | |
at org.apache.spark.repl.Main$.main(Main.scala:31) | |
at org.apache.spark.repl.Main.main(Main.scala) | |
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) | |
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) | |
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) | |
at java.lang.reflect.Method.invoke(Method.java:606) | |
at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:569) | |
at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:166) | |
at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:189) | |
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:110) | |
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
ubuntu:~$ ~/spark-1.3.1-bin-hadoop2.6/bin/spark-shell --packages com.databricks:spark-csv_2.10:1.0.3 --driver-memory 52g --conf "spark.driver.extraJavaOptions=-XX:MaxPermSize=512m" --conf "spark.local.dir=/data/spark/tmp" | |
Ivy Default Cache set to: /home/ubuntu/.ivy2/cache | |
The jars for the packages stored in: /home/ubuntu/.ivy2/jars | |
:: loading settings :: url = jar:file:/home/ubuntu/spark-1.3.1-bin-hadoop2.6/lib/spark-assembly-1.3.1-hadoop2.6.0.jar!/org/apache/ivy/core/settings/ivysettings.xml | |
com.databricks#spark-csv_2.10 added as a dependency | |
:: resolving dependencies :: org.apache.spark#spark-submit-parent;1.0 | |
confs: [default] | |
found com.databricks#spark-csv_2.10;1.0.3 in central | |
found org.apache.commons#commons-csv;1.1 in central | |
:: resolution report :: resolve 212ms :: artifacts dl 6ms | |
:: modules in use: | |
com.databricks#spark-csv_2.10;1.0.3 from central in [default] | |
org.apache.commons#commons-csv;1.1 from central in [default] | |
--------------------------------------------------------------------- | |
| | modules || artifacts | | |
| conf | number| search|dwnlded|evicted|| number|dwnlded| | |
--------------------------------------------------------------------- | |
| default | 2 | 0 | 0 | 0 || 2 | 0 | | |
--------------------------------------------------------------------- | |
:: retrieving :: org.apache.spark#spark-submit-parent | |
confs: [default] | |
0 artifacts copied, 2 already retrieved (0kB/5ms) | |
log4j:WARN No appenders could be found for logger (org.apache.hadoop.metrics2.lib.MutableMetricsFactory). | |
log4j:WARN Please initialize the log4j system properly. | |
log4j:WARN See http://logging.apache.org/log4j/1.2/faq.html#noconfig for more info. | |
Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties | |
15/07/25 14:58:09 INFO SecurityManager: Changing view acls to: ubuntu | |
15/07/25 14:58:09 INFO SecurityManager: Changing modify acls to: ubuntu | |
15/07/25 14:58:09 INFO SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users with view permissions: Set(ubuntu); users with modify permissions: Set(ubuntu) | |
15/07/25 14:58:09 INFO HttpServer: Starting HTTP Server | |
15/07/25 14:58:09 INFO Server: jetty-8.y.z-SNAPSHOT | |
15/07/25 14:58:09 INFO AbstractConnector: Started [email protected]:54111 | |
15/07/25 14:58:09 INFO Utils: Successfully started service 'HTTP class server' on port 54111. | |
Welcome to | |
____ __ | |
/ __/__ ___ _____/ /__ | |
_\ \/ _ \/ _ `/ __/ '_/ | |
/___/ .__/\_,_/_/ /_/\_\ version 1.3.1 | |
/_/ | |
Using Scala version 2.10.4 (OpenJDK 64-Bit Server VM, Java 1.7.0_79) | |
Type in expressions to have them evaluated. | |
Type :help for more information. | |
15/07/25 14:58:12 WARN Utils: Your hostname, ip-10-88-50-154 resolves to a loopback address: 127.0.0.1; using 10.88.50.154 instead (on interface eth0) | |
15/07/25 14:58:12 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address | |
15/07/25 14:58:12 INFO SparkContext: Running Spark version 1.3.1 | |
15/07/25 14:58:12 WARN SparkConf: In Spark 1.0 and later spark.local.dir will be overridden by the value set by the cluster manager (via SPARK_LOCAL_DIRS in mesos/standalone and LOCAL_DIRS in YARN). | |
15/07/25 14:58:12 INFO SecurityManager: Changing view acls to: ubuntu | |
15/07/25 14:58:12 INFO SecurityManager: Changing modify acls to: ubuntu | |
15/07/25 14:58:12 INFO SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users with view permissions: Set(ubuntu); users with modify permissions: Set(ubuntu) | |
15/07/25 14:58:12 INFO Slf4jLogger: Slf4jLogger started | |
15/07/25 14:58:12 INFO Remoting: Starting remoting | |
15/07/25 14:58:12 INFO Remoting: Remoting started; listening on addresses :[akka.tcp://[email protected]:54685] | |
15/07/25 14:58:12 INFO Utils: Successfully started service 'sparkDriver' on port 54685. | |
15/07/25 14:58:12 INFO SparkEnv: Registering MapOutputTracker | |
15/07/25 14:58:12 INFO SparkEnv: Registering BlockManagerMaster | |
15/07/25 14:58:12 INFO DiskBlockManager: Created local directory at /data/spark/tmp/spark-3b83545a-9f1e-4121-bb73-d02aa3ffa248/blockmgr-49462d99-53fc-4c24-8e42-034035bbcdf9 | |
15/07/25 14:58:12 INFO MemoryStore: MemoryStore started with capacity 26.9 GB | |
15/07/25 14:58:12 INFO HttpFileServer: HTTP File server directory is /data/spark/tmp/spark-c0d9732b-13db-4c64-87fc-306e0d82407f/httpd-5c9c57e9-2618-43ad-bc8c-ab07dbc4270a | |
15/07/25 14:58:12 INFO HttpServer: Starting HTTP Server | |
15/07/25 14:58:12 INFO Server: jetty-8.y.z-SNAPSHOT | |
15/07/25 14:58:12 INFO AbstractConnector: Started [email protected]:38885 | |
15/07/25 14:58:12 INFO Utils: Successfully started service 'HTTP file server' on port 38885. | |
15/07/25 14:58:12 INFO SparkEnv: Registering OutputCommitCoordinator | |
15/07/25 14:58:12 INFO Server: jetty-8.y.z-SNAPSHOT | |
15/07/25 14:58:12 INFO AbstractConnector: Started [email protected]:4040 | |
15/07/25 14:58:12 INFO Utils: Successfully started service 'SparkUI' on port 4040. | |
15/07/25 14:58:12 INFO SparkUI: Started SparkUI at http://10.88.50.154:4040 | |
15/07/25 14:58:12 INFO SparkContext: Added JAR file:/home/ubuntu/.ivy2/jars/spark-csv_2.10.jar at http://10.88.50.154:38885/jars/spark-csv_2.10.jar with timestamp 1437836292916 | |
15/07/25 14:58:12 INFO SparkContext: Added JAR file:/home/ubuntu/.ivy2/jars/commons-csv.jar at http://10.88.50.154:38885/jars/commons-csv.jar with timestamp 1437836292916 | |
15/07/25 14:58:12 INFO Executor: Starting executor ID <driver> on host localhost | |
15/07/25 14:58:12 INFO Executor: Using REPL class URI: http://10.88.50.154:54111 | |
15/07/25 14:58:12 INFO AkkaUtils: Connecting to HeartbeatReceiver: akka.tcp://[email protected]:54685/user/HeartbeatReceiver | |
15/07/25 14:58:13 INFO NettyBlockTransferService: Server created on 61000 | |
15/07/25 14:58:13 INFO BlockManagerMaster: Trying to register BlockManager | |
15/07/25 14:58:13 INFO BlockManagerMasterActor: Registering block manager localhost:61000 with 26.9 GB RAM, BlockManagerId(<driver>, localhost, 61000) | |
15/07/25 14:58:13 INFO BlockManagerMaster: Registered BlockManager | |
15/07/25 14:58:13 INFO SparkILoop: Created spark context.. | |
Spark context available as sc. | |
15/07/25 14:58:13 INFO SparkILoop: Created sql context (with Hive support).. | |
SQL context available as sqlContext. | |
scala> import java.io.File | |
import java.io.File | |
scala> import java.io.PrintWriter | |
import java.io.PrintWriter | |
scala> import org.apache.spark.sql.hive.HiveContext | |
import org.apache.spark.sql.hive.HiveContext | |
scala> | |
scala> val ctx = sqlContext.asInstanceOf[HiveContext] | |
ctx: org.apache.spark.sql.hive.HiveContext = org.apache.spark.sql.hive.HiveContext@30a563e9 | |
scala> import ctx.implicits._ | |
import ctx.implicits._ | |
scala> | |
scala> val json = """{"category" : "A", "num" : 5}""" | |
json: String = {"category" : "A", "num" : 5} | |
scala> val path = sys.env("HOME") + "/test_data.jsonlines" | |
path: String = /home/ubuntu/test_data.jsonlines | |
scala> new PrintWriter(path) { write(json); close } | |
res0: java.io.PrintWriter = $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anon$1@6a16ca09 | |
scala> ctx.jsonFile("file://" + path).saveAsTable("test") | |
15/07/25 14:58:51 INFO MemoryStore: ensureFreeSpace(229888) called with curMem=0, maxMem=28894769971 | |
15/07/25 14:58:51 INFO MemoryStore: Block broadcast_0 stored as values in memory (estimated size 224.5 KB, free 26.9 GB) | |
15/07/25 14:58:51 INFO MemoryStore: ensureFreeSpace(25473) called with curMem=229888, maxMem=28894769971 | |
15/07/25 14:58:51 INFO MemoryStore: Block broadcast_0_piece0 stored as bytes in memory (estimated size 24.9 KB, free 26.9 GB) | |
15/07/25 14:58:51 INFO BlockManagerInfo: Added broadcast_0_piece0 in memory on localhost:61000 (size: 24.9 KB, free: 26.9 GB) | |
15/07/25 14:58:51 INFO BlockManagerMaster: Updated info of block broadcast_0_piece0 | |
15/07/25 14:58:51 INFO SparkContext: Created broadcast 0 from textFile at JSONRelation.scala:114 | |
15/07/25 14:58:51 INFO FileInputFormat: Total input paths to process : 1 | |
15/07/25 14:58:51 INFO SparkContext: Starting job: isEmpty at JsonRDD.scala:51 | |
15/07/25 14:58:51 INFO DAGScheduler: Got job 0 (isEmpty at JsonRDD.scala:51) with 1 output partitions (allowLocal=true) | |
15/07/25 14:58:51 INFO DAGScheduler: Final stage: Stage 0(isEmpty at JsonRDD.scala:51) | |
15/07/25 14:58:51 INFO DAGScheduler: Parents of final stage: List() | |
15/07/25 14:58:51 INFO DAGScheduler: Missing parents: List() | |
15/07/25 14:58:51 INFO DAGScheduler: Submitting Stage 0 (file:///home/ubuntu/test_data.jsonlines MapPartitionsRDD[1] at textFile at JSONRelation.scala:114), which has no missing parents | |
15/07/25 14:58:51 INFO MemoryStore: ensureFreeSpace(2704) called with curMem=255361, maxMem=28894769971 | |
15/07/25 14:58:51 INFO MemoryStore: Block broadcast_1 stored as values in memory (estimated size 2.6 KB, free 26.9 GB) | |
15/07/25 14:58:51 INFO MemoryStore: ensureFreeSpace(1976) called with curMem=258065, maxMem=28894769971 | |
15/07/25 14:58:51 INFO MemoryStore: Block broadcast_1_piece0 stored as bytes in memory (estimated size 1976.0 B, free 26.9 GB) | |
15/07/25 14:58:51 INFO BlockManagerInfo: Added broadcast_1_piece0 in memory on localhost:61000 (size: 1976.0 B, free: 26.9 GB) | |
15/07/25 14:58:51 INFO BlockManagerMaster: Updated info of block broadcast_1_piece0 | |
15/07/25 14:58:51 INFO SparkContext: Created broadcast 1 from broadcast at DAGScheduler.scala:839 | |
15/07/25 14:58:51 INFO DAGScheduler: Submitting 1 missing tasks from Stage 0 (file:///home/ubuntu/test_data.jsonlines MapPartitionsRDD[1] at textFile at JSONRelation.scala:114) | |
15/07/25 14:58:51 INFO TaskSchedulerImpl: Adding task set 0.0 with 1 tasks | |
15/07/25 14:58:51 INFO TaskSetManager: Starting task 0.0 in stage 0.0 (TID 0, localhost, PROCESS_LOCAL, 1416 bytes) | |
15/07/25 14:58:51 INFO Executor: Running task 0.0 in stage 0.0 (TID 0) | |
15/07/25 14:58:51 INFO Executor: Fetching http://10.88.50.154:38885/jars/commons-csv.jar with timestamp 1437836292916 | |
15/07/25 14:58:52 INFO Utils: Fetching http://10.88.50.154:38885/jars/commons-csv.jar to /data/spark/tmp/spark-608755ae-97a2-4b05-bae9-e15190a7db32/userFiles-4611af44-b06d-4b92-9406-5d23d023494e/fetchFileTemp5233914983551982311.tmp | |
15/07/25 14:58:52 INFO Executor: Adding file:/data/spark/tmp/spark-608755ae-97a2-4b05-bae9-e15190a7db32/userFiles-4611af44-b06d-4b92-9406-5d23d023494e/commons-csv.jar to class loader | |
15/07/25 14:58:52 INFO Executor: Fetching http://10.88.50.154:38885/jars/spark-csv_2.10.jar with timestamp 1437836292916 | |
15/07/25 14:58:52 INFO Utils: Fetching http://10.88.50.154:38885/jars/spark-csv_2.10.jar to /data/spark/tmp/spark-608755ae-97a2-4b05-bae9-e15190a7db32/userFiles-4611af44-b06d-4b92-9406-5d23d023494e/fetchFileTemp4239195187520339438.tmp | |
15/07/25 14:58:52 INFO Executor: Adding file:/data/spark/tmp/spark-608755ae-97a2-4b05-bae9-e15190a7db32/userFiles-4611af44-b06d-4b92-9406-5d23d023494e/spark-csv_2.10.jar to class loader | |
15/07/25 14:58:52 INFO HadoopRDD: Input split: file:/home/ubuntu/test_data.jsonlines:0+14 | |
15/07/25 14:58:52 INFO deprecation: mapred.tip.id is deprecated. Instead, use mapreduce.task.id | |
15/07/25 14:58:52 INFO deprecation: mapred.task.id is deprecated. Instead, use mapreduce.task.attempt.id | |
15/07/25 14:58:52 INFO deprecation: mapred.task.is.map is deprecated. Instead, use mapreduce.task.ismap | |
15/07/25 14:58:52 INFO deprecation: mapred.task.partition is deprecated. Instead, use mapreduce.task.partition | |
15/07/25 14:58:52 INFO deprecation: mapred.job.id is deprecated. Instead, use mapreduce.job.id | |
15/07/25 14:58:52 INFO Executor: Finished task 0.0 in stage 0.0 (TID 0). 1824 bytes result sent to driver | |
15/07/25 14:58:52 INFO TaskSetManager: Finished task 0.0 in stage 0.0 (TID 0) in 228 ms on localhost (1/1) | |
15/07/25 14:58:52 INFO TaskSchedulerImpl: Removed TaskSet 0.0, whose tasks have all completed, from pool | |
15/07/25 14:58:52 INFO DAGScheduler: Stage 0 (isEmpty at JsonRDD.scala:51) finished in 0.238 s | |
15/07/25 14:58:52 INFO DAGScheduler: Job 0 finished: isEmpty at JsonRDD.scala:51, took 0.272322 s | |
15/07/25 14:58:52 INFO SparkContext: Starting job: reduce at JsonRDD.scala:54 | |
15/07/25 14:58:52 INFO DAGScheduler: Got job 1 (reduce at JsonRDD.scala:54) with 2 output partitions (allowLocal=false) | |
15/07/25 14:58:52 INFO DAGScheduler: Final stage: Stage 1(reduce at JsonRDD.scala:54) | |
15/07/25 14:58:52 INFO DAGScheduler: Parents of final stage: List() | |
15/07/25 14:58:52 INFO DAGScheduler: Missing parents: List() | |
15/07/25 14:58:52 INFO DAGScheduler: Submitting Stage 1 (MapPartitionsRDD[3] at map at JsonRDD.scala:54), which has no missing parents | |
15/07/25 14:58:52 INFO MemoryStore: ensureFreeSpace(3216) called with curMem=260041, maxMem=28894769971 | |
15/07/25 14:58:52 INFO MemoryStore: Block broadcast_2 stored as values in memory (estimated size 3.1 KB, free 26.9 GB) | |
15/07/25 14:58:52 INFO MemoryStore: ensureFreeSpace(2278) called with curMem=263257, maxMem=28894769971 | |
15/07/25 14:58:52 INFO MemoryStore: Block broadcast_2_piece0 stored as bytes in memory (estimated size 2.2 KB, free 26.9 GB) | |
15/07/25 14:58:52 INFO BlockManagerInfo: Added broadcast_2_piece0 in memory on localhost:61000 (size: 2.2 KB, free: 26.9 GB) | |
15/07/25 14:58:52 INFO BlockManagerMaster: Updated info of block broadcast_2_piece0 | |
15/07/25 14:58:52 INFO SparkContext: Created broadcast 2 from broadcast at DAGScheduler.scala:839 | |
15/07/25 14:58:52 INFO DAGScheduler: Submitting 2 missing tasks from Stage 1 (MapPartitionsRDD[3] at map at JsonRDD.scala:54) | |
15/07/25 14:58:52 INFO TaskSchedulerImpl: Adding task set 1.0 with 2 tasks | |
15/07/25 14:58:52 INFO TaskSetManager: Starting task 0.0 in stage 1.0 (TID 1, localhost, PROCESS_LOCAL, 1416 bytes) | |
15/07/25 14:58:52 INFO TaskSetManager: Starting task 1.0 in stage 1.0 (TID 2, localhost, PROCESS_LOCAL, 1416 bytes) | |
15/07/25 14:58:52 INFO Executor: Running task 0.0 in stage 1.0 (TID 1) | |
15/07/25 14:58:52 INFO Executor: Running task 1.0 in stage 1.0 (TID 2) | |
15/07/25 14:58:52 INFO HadoopRDD: Input split: file:/home/ubuntu/test_data.jsonlines:0+14 | |
15/07/25 14:58:52 INFO HadoopRDD: Input split: file:/home/ubuntu/test_data.jsonlines:14+15 | |
15/07/25 14:58:52 INFO Executor: Finished task 1.0 in stage 1.0 (TID 2). 1807 bytes result sent to driver | |
15/07/25 14:58:52 INFO TaskSetManager: Finished task 1.0 in stage 1.0 (TID 2) in 12 ms on localhost (1/2) | |
15/07/25 14:58:52 INFO Executor: Finished task 0.0 in stage 1.0 (TID 1). 2096 bytes result sent to driver | |
15/07/25 14:58:52 INFO TaskSetManager: Finished task 0.0 in stage 1.0 (TID 1) in 99 ms on localhost (2/2) | |
15/07/25 14:58:52 INFO DAGScheduler: Stage 1 (reduce at JsonRDD.scala:54) finished in 0.101 s | |
15/07/25 14:58:52 INFO TaskSchedulerImpl: Removed TaskSet 1.0, whose tasks have all completed, from pool | |
15/07/25 14:58:52 INFO DAGScheduler: Job 1 finished: reduce at JsonRDD.scala:54, took 0.108922 s | |
15/07/25 14:58:52 INFO HiveMetaStore: 0: Opening raw store with implemenation class:org.apache.hadoop.hive.metastore.ObjectStore | |
15/07/25 14:58:52 INFO ObjectStore: ObjectStore, initialize called | |
15/07/25 14:58:52 INFO Persistence: Property datanucleus.cache.level2 unknown - will be ignored | |
15/07/25 14:58:52 INFO Persistence: Property hive.metastore.integral.jdo.pushdown unknown - will be ignored | |
15/07/25 14:58:52 WARN Connection: BoneCP specified but not present in CLASSPATH (or one of dependencies) | |
15/07/25 14:58:52 WARN Connection: BoneCP specified but not present in CLASSPATH (or one of dependencies) | |
15/07/25 14:58:53 INFO ObjectStore: Setting MetaStore object pin classes with hive.metastore.cache.pinobjtypes="Table,StorageDescriptor,SerDeInfo,Partition,Database,Type,FieldSchema,Order" | |
15/07/25 14:58:53 INFO MetaStoreDirectSql: MySQL check failed, assuming we are not on mysql: Lexical error at line 1, column 5. Encountered: "@" (64), after : "". | |
15/07/25 14:58:54 INFO Datastore: The class "org.apache.hadoop.hive.metastore.model.MFieldSchema" is tagged as "embedded-only" so does not have its own datastore table. | |
15/07/25 14:58:54 INFO Datastore: The class "org.apache.hadoop.hive.metastore.model.MOrder" is tagged as "embedded-only" so does not have its own datastore table. | |
15/07/25 14:58:54 INFO Datastore: The class "org.apache.hadoop.hive.metastore.model.MFieldSchema" is tagged as "embedded-only" so does not have its own datastore table. | |
15/07/25 14:58:54 INFO Datastore: The class "org.apache.hadoop.hive.metastore.model.MOrder" is tagged as "embedded-only" so does not have its own datastore table. | |
15/07/25 14:58:54 INFO Query: Reading in results for query "org.datanucleus.store.rdbms.query.SQLQuery@0" since the connection used is closing | |
15/07/25 14:58:54 INFO ObjectStore: Initialized ObjectStore | |
15/07/25 14:58:54 INFO HiveMetaStore: Added admin role in metastore | |
15/07/25 14:58:54 INFO HiveMetaStore: Added public role in metastore | |
15/07/25 14:58:54 INFO HiveMetaStore: No user is added in admin role, since config is empty | |
15/07/25 14:58:54 INFO SessionState: No Tez session required at this point. hive.execution.engine=mr. | |
15/07/25 14:58:54 INFO SessionState: No Tez session required at this point. hive.execution.engine=mr. | |
15/07/25 14:58:55 INFO HiveMetaStore: 0: get_table : db=default tbl=test | |
15/07/25 14:58:55 INFO audit: ugi=ubuntu ip=unknown-ip-addr cmd=get_table : db=default tbl=test | |
15/07/25 14:58:55 INFO HiveMetaStore: 0: get_database: default | |
15/07/25 14:58:55 INFO audit: ugi=ubuntu ip=unknown-ip-addr cmd=get_database: default | |
15/07/25 14:58:55 INFO HiveMetaStore: 0: get_table : db=default tbl=test | |
15/07/25 14:58:55 INFO audit: ugi=ubuntu ip=unknown-ip-addr cmd=get_table : db=default tbl=test | |
15/07/25 14:58:55 INFO MemoryStore: ensureFreeSpace(321846) called with curMem=265535, maxMem=28894769971 | |
15/07/25 14:58:55 INFO MemoryStore: Block broadcast_3 stored as values in memory (estimated size 314.3 KB, free 26.9 GB) | |
15/07/25 14:58:55 INFO MemoryStore: ensureFreeSpace(36241) called with curMem=587381, maxMem=28894769971 | |
15/07/25 14:58:55 INFO MemoryStore: Block broadcast_3_piece0 stored as bytes in memory (estimated size 35.4 KB, free 26.9 GB) | |
15/07/25 14:58:55 INFO BlockManagerInfo: Added broadcast_3_piece0 in memory on localhost:61000 (size: 35.4 KB, free: 26.9 GB) | |
15/07/25 14:58:55 INFO BlockManagerMaster: Updated info of block broadcast_3_piece0 | |
15/07/25 14:58:55 INFO SparkContext: Created broadcast 3 from textFile at JSONRelation.scala:114 | |
SLF4J: Failed to load class "org.slf4j.impl.StaticLoggerBinder". | |
SLF4J: Defaulting to no-operation (NOP) logger implementation | |
SLF4J: See http://www.slf4j.org/codes.html#StaticLoggerBinder for further details. | |
15/07/25 14:58:55 INFO FileInputFormat: Total input paths to process : 1 | |
15/07/25 14:58:55 INFO SparkContext: Starting job: runJob at newParquet.scala:689 | |
15/07/25 14:58:55 INFO DAGScheduler: Got job 2 (runJob at newParquet.scala:689) with 2 output partitions (allowLocal=false) | |
15/07/25 14:58:55 INFO DAGScheduler: Final stage: Stage 2(runJob at newParquet.scala:689) | |
15/07/25 14:58:55 INFO DAGScheduler: Parents of final stage: List() | |
15/07/25 14:58:55 INFO DAGScheduler: Missing parents: List() | |
15/07/25 14:58:55 INFO DAGScheduler: Submitting Stage 2 (MapPartitionsRDD[7] at map at JsonRDD.scala:41), which has no missing parents | |
15/07/25 14:58:55 INFO MemoryStore: ensureFreeSpace(66216) called with curMem=623622, maxMem=28894769971 | |
15/07/25 14:58:55 INFO MemoryStore: Block broadcast_4 stored as values in memory (estimated size 64.7 KB, free 26.9 GB) | |
15/07/25 14:58:55 INFO MemoryStore: ensureFreeSpace(40069) called with curMem=689838, maxMem=28894769971 | |
15/07/25 14:58:55 INFO MemoryStore: Block broadcast_4_piece0 stored as bytes in memory (estimated size 39.1 KB, free 26.9 GB) | |
15/07/25 14:58:55 INFO BlockManagerInfo: Added broadcast_4_piece0 in memory on localhost:61000 (size: 39.1 KB, free: 26.9 GB) | |
15/07/25 14:58:55 INFO BlockManagerMaster: Updated info of block broadcast_4_piece0 | |
15/07/25 14:58:55 INFO SparkContext: Created broadcast 4 from broadcast at DAGScheduler.scala:839 | |
15/07/25 14:58:55 INFO DAGScheduler: Submitting 2 missing tasks from Stage 2 (MapPartitionsRDD[7] at map at JsonRDD.scala:41) | |
15/07/25 14:58:55 INFO TaskSchedulerImpl: Adding task set 2.0 with 2 tasks | |
15/07/25 14:58:55 INFO TaskSetManager: Starting task 0.0 in stage 2.0 (TID 3, localhost, PROCESS_LOCAL, 1416 bytes) | |
15/07/25 14:58:55 INFO TaskSetManager: Starting task 1.0 in stage 2.0 (TID 4, localhost, PROCESS_LOCAL, 1416 bytes) | |
15/07/25 14:58:55 INFO Executor: Running task 0.0 in stage 2.0 (TID 3) | |
15/07/25 14:58:55 INFO Executor: Running task 1.0 in stage 2.0 (TID 4) | |
15/07/25 14:58:55 INFO HadoopRDD: Input split: file:/home/ubuntu/test_data.jsonlines:0+14 | |
15/07/25 14:58:55 INFO HadoopRDD: Input split: file:/home/ubuntu/test_data.jsonlines:14+15 | |
15/07/25 14:58:55 INFO CodecConfig: Compression: GZIP | |
15/07/25 14:58:55 INFO CodecConfig: Compression: GZIP | |
15/07/25 14:58:55 INFO ParquetOutputFormat: Parquet block size to 134217728 | |
15/07/25 14:58:55 INFO ParquetOutputFormat: Parquet block size to 134217728 | |
15/07/25 14:58:55 INFO ParquetOutputFormat: Parquet page size to 1048576 | |
15/07/25 14:58:55 INFO ParquetOutputFormat: Parquet page size to 1048576 | |
15/07/25 14:58:55 INFO ParquetOutputFormat: Parquet dictionary page size to 1048576 | |
15/07/25 14:58:55 INFO ParquetOutputFormat: Parquet dictionary page size to 1048576 | |
15/07/25 14:58:55 INFO ParquetOutputFormat: Dictionary is on | |
15/07/25 14:58:55 INFO ParquetOutputFormat: Dictionary is on | |
15/07/25 14:58:55 INFO ParquetOutputFormat: Validation is off | |
15/07/25 14:58:55 INFO ParquetOutputFormat: Validation is off | |
15/07/25 14:58:55 INFO ParquetOutputFormat: Writer version is: PARQUET_1_0 | |
15/07/25 14:58:55 INFO ParquetOutputFormat: Writer version is: PARQUET_1_0 | |
15/07/25 14:58:55 INFO CodecPool: Got brand-new compressor [.gz] | |
15/07/25 14:58:55 INFO CodecPool: Got brand-new compressor [.gz] | |
15/07/25 14:58:55 INFO InternalParquetRecordWriter: Flushing mem columnStore to file. allocated memory: 31,457,276 | |
15/07/25 14:58:55 INFO InternalParquetRecordWriter: Flushing mem columnStore to file. allocated memory: 31,457,297 | |
15/07/25 14:58:55 INFO FileOutputCommitter: Saved output of task 'attempt_201507251458_0008_r_000001_0' to hdfs://localhost:54310/user/hive/warehouse/test/_temporary/0/task_201507251458_0008_r_000001 | |
15/07/25 14:58:55 INFO SparkHadoopMapRedUtil: attempt_201507251458_0008_r_000001_0: Committed | |
15/07/25 14:58:55 INFO Executor: Finished task 1.0 in stage 2.0 (TID 4). 1792 bytes result sent to driver | |
15/07/25 14:58:55 INFO TaskSetManager: Finished task 1.0 in stage 2.0 (TID 4) in 152 ms on localhost (1/2) | |
15/07/25 14:58:55 INFO ColumnChunkPageWriteStore: written 56B for [category] BINARY: 1 values, 11B raw, 29B comp, 1 pages, encodings: [BIT_PACKED, RLE, PLAIN] | |
15/07/25 14:58:55 INFO ColumnChunkPageWriteStore: written 70B for [num] INT64: 1 values, 14B raw, 29B comp, 1 pages, encodings: [BIT_PACKED, RLE, PLAIN] | |
15/07/25 14:58:55 INFO FileOutputCommitter: Saved output of task 'attempt_201507251458_0008_r_000000_0' to hdfs://localhost:54310/user/hive/warehouse/test/_temporary/0/task_201507251458_0008_r_000000 | |
15/07/25 14:58:55 INFO SparkHadoopMapRedUtil: attempt_201507251458_0008_r_000000_0: Committed | |
15/07/25 14:58:55 INFO Executor: Finished task 0.0 in stage 2.0 (TID 3). 1792 bytes result sent to driver | |
15/07/25 14:58:55 INFO TaskSetManager: Finished task 0.0 in stage 2.0 (TID 3) in 182 ms on localhost (2/2) | |
15/07/25 14:58:55 INFO DAGScheduler: Stage 2 (runJob at newParquet.scala:689) finished in 0.183 s | |
15/07/25 14:58:55 INFO TaskSchedulerImpl: Removed TaskSet 2.0, whose tasks have all completed, from pool | |
15/07/25 14:58:55 INFO DAGScheduler: Job 2 finished: runJob at newParquet.scala:689, took 0.230330 s | |
15/07/25 14:58:55 INFO ParquetFileReader: Initiating action with parallelism: 5 | |
15/07/25 14:58:56 INFO HiveMetaStore: 0: create_table: Table(tableName:test, dbName:default, owner:ubuntu, createTime:1437836336, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:col, type:array<string>, comment:from deserializer)], location:null, inputFormat:org.apache.hadoop.mapred.SequenceFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe, parameters:{serialization.format=1, path=hdfs://localhost:54310/user/hive/warehouse/test}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{})), partitionKeys:[], parameters:{spark.sql.sources.schema.part.0={"type":"struct","fields":[{"name":"category","type":"string","nullable":true,"metadata":{}},{"name":"num","type":"long","nullable":true,"metadata":{}}]}, EXTERNAL=FALSE, spark.sql.sources.schema.numParts=1, spark.sql.sources.provider=org.apache.spark.sql.parquet}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE) | |
15/07/25 14:58:56 INFO audit: ugi=ubuntu ip=unknown-ip-addr cmd=create_table: Table(tableName:test, dbName:default, owner:ubuntu, createTime:1437836336, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:col, type:array<string>, comment:from deserializer)], location:null, inputFormat:org.apache.hadoop.mapred.SequenceFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe, parameters:{serialization.format=1, path=hdfs://localhost:54310/user/hive/warehouse/test}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{})), partitionKeys:[], parameters:{spark.sql.sources.schema.part.0={"type":"struct","fields":[{"name":"category","type":"string","nullable":true,"metadata":{}},{"name":"num","type":"long","nullable":true,"metadata":{}}]}, EXTERNAL=FALSE, spark.sql.sources.schema.numParts=1, spark.sql.sources.provider=org.apache.spark.sql.parquet}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE) | |
15/07/25 14:58:56 INFO log: Updating table stats fast for test | |
15/07/25 14:58:56 INFO log: Updated size of table test to 1530 | |
scala> ctx.sql("select * from test").show | |
15/07/25 14:58:56 INFO ParseDriver: Parsing command: select * from test | |
15/07/25 14:58:56 INFO ParseDriver: Parse Completed | |
15/07/25 14:58:56 INFO HiveMetaStore: 0: get_table : db=default tbl=test | |
15/07/25 14:58:56 INFO audit: ugi=ubuntu ip=unknown-ip-addr cmd=get_table : db=default tbl=test | |
15/07/25 14:58:56 INFO HiveMetaStore: 0: get_table : db=default tbl=test | |
15/07/25 14:58:56 INFO audit: ugi=ubuntu ip=unknown-ip-addr cmd=get_table : db=default tbl=test | |
15/07/25 14:58:56 INFO MemoryStore: ensureFreeSpace(328206) called with curMem=729907, maxMem=28894769971 | |
15/07/25 14:58:56 INFO MemoryStore: Block broadcast_5 stored as values in memory (estimated size 320.5 KB, free 26.9 GB) | |
15/07/25 14:58:57 INFO MemoryStore: ensureFreeSpace(37143) called with curMem=1058113, maxMem=28894769971 | |
15/07/25 14:58:57 INFO MemoryStore: Block broadcast_5_piece0 stored as bytes in memory (estimated size 36.3 KB, free 26.9 GB) | |
15/07/25 14:58:57 INFO BlockManagerInfo: Added broadcast_5_piece0 in memory on localhost:61000 (size: 36.3 KB, free: 26.9 GB) | |
15/07/25 14:58:57 INFO BlockManagerMaster: Updated info of block broadcast_5_piece0 | |
15/07/25 14:58:57 INFO SparkContext: Created broadcast 5 from NewHadoopRDD at newParquet.scala:478 | |
15/07/25 14:58:57 INFO deprecation: mapred.max.split.size is deprecated. Instead, use mapreduce.input.fileinputformat.split.maxsize | |
15/07/25 14:58:57 INFO deprecation: mapred.min.split.size is deprecated. Instead, use mapreduce.input.fileinputformat.split.minsize | |
15/07/25 14:58:57 INFO ParquetRelation2$$anon$1$$anon$2: Using Task Side Metadata Split Strategy | |
15/07/25 14:58:57 INFO SparkContext: Starting job: runJob at SparkPlan.scala:122 | |
15/07/25 14:58:57 INFO DAGScheduler: Got job 3 (runJob at SparkPlan.scala:122) with 1 output partitions (allowLocal=false) | |
15/07/25 14:58:57 INFO DAGScheduler: Final stage: Stage 3(runJob at SparkPlan.scala:122) | |
15/07/25 14:58:57 INFO DAGScheduler: Parents of final stage: List() | |
15/07/25 14:58:57 INFO DAGScheduler: Missing parents: List() | |
15/07/25 14:58:57 INFO DAGScheduler: Submitting Stage 3 (MapPartitionsRDD[12] at map at SparkPlan.scala:97), which has no missing parents | |
15/07/25 14:58:57 INFO MemoryStore: ensureFreeSpace(2680) called with curMem=1095256, maxMem=28894769971 | |
15/07/25 14:58:57 INFO MemoryStore: Block broadcast_6 stored as values in memory (estimated size 2.6 KB, free 26.9 GB) | |
15/07/25 14:58:57 INFO MemoryStore: ensureFreeSpace(1846) called with curMem=1097936, maxMem=28894769971 | |
15/07/25 14:58:57 INFO MemoryStore: Block broadcast_6_piece0 stored as bytes in memory (estimated size 1846.0 B, free 26.9 GB) | |
15/07/25 14:58:57 INFO BlockManagerInfo: Added broadcast_6_piece0 in memory on localhost:61000 (size: 1846.0 B, free: 26.9 GB) | |
15/07/25 14:58:57 INFO BlockManagerMaster: Updated info of block broadcast_6_piece0 | |
15/07/25 14:58:57 INFO SparkContext: Created broadcast 6 from broadcast at DAGScheduler.scala:839 | |
15/07/25 14:58:57 INFO DAGScheduler: Submitting 1 missing tasks from Stage 3 (MapPartitionsRDD[12] at map at SparkPlan.scala:97) | |
15/07/25 14:58:57 INFO TaskSchedulerImpl: Adding task set 3.0 with 1 tasks | |
15/07/25 14:58:57 INFO TaskSetManager: Starting task 0.0 in stage 3.0 (TID 5, localhost, PROCESS_LOCAL, 1642 bytes) | |
15/07/25 14:58:57 INFO Executor: Running task 0.0 in stage 3.0 (TID 5) | |
15/07/25 14:58:57 INFO ParquetRelation2$$anon$1: Input split: ParquetInputSplit{part: hdfs://localhost:54310/user/hive/warehouse/test/part-r-00001.parquet start: 0 end: 518 length: 518 hosts: [] requestedSchema: message root { | |
optional binary category (UTF8); | |
optional int64 num; | |
} | |
readSupportMetadata: {org.apache.spark.sql.parquet.row.metadata={"type":"struct","fields":[{"name":"category","type":"string","nullable":true,"metadata":{}},{"name":"num","type":"long","nullable":true,"metadata":{}}]}, org.apache.spark.sql.parquet.row.requested_schema={"type":"struct","fields":[{"name":"category","type":"string","nullable":true,"metadata":{}},{"name":"num","type":"long","nullable":true,"metadata":{}}]}}} | |
15/07/25 14:58:57 WARN ParquetRecordReader: Can not initialize counter due to context is not a instance of TaskInputOutputContext, but is org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl | |
15/07/25 14:58:57 INFO InternalParquetRecordReader: RecordReader initialized will read a total of 1 records. | |
15/07/25 14:58:57 INFO InternalParquetRecordReader: at row 0. reading next block | |
15/07/25 14:58:57 INFO CodecPool: Got brand-new decompressor [.gz] | |
15/07/25 14:58:57 INFO InternalParquetRecordReader: block read in memory in 5 ms. row count = 1 | |
15/07/25 14:58:57 INFO Executor: Finished task 0.0 in stage 3.0 (TID 5). 2025 bytes result sent to driver | |
15/07/25 14:58:57 INFO TaskSetManager: Finished task 0.0 in stage 3.0 (TID 5) in 39 ms on localhost (1/1) | |
15/07/25 14:58:57 INFO DAGScheduler: Stage 3 (runJob at SparkPlan.scala:122) finished in 0.040 s | |
15/07/25 14:58:57 INFO TaskSchedulerImpl: Removed TaskSet 3.0, whose tasks have all completed, from pool | |
15/07/25 14:58:57 INFO DAGScheduler: Job 3 finished: runJob at SparkPlan.scala:122, took 0.049380 s | |
15/07/25 14:58:57 INFO SparkContext: Starting job: runJob at SparkPlan.scala:122 | |
15/07/25 14:58:57 INFO DAGScheduler: Got job 4 (runJob at SparkPlan.scala:122) with 1 output partitions (allowLocal=false) | |
15/07/25 14:58:57 INFO DAGScheduler: Final stage: Stage 4(runJob at SparkPlan.scala:122) | |
15/07/25 14:58:57 INFO DAGScheduler: Parents of final stage: List() | |
15/07/25 14:58:57 INFO DAGScheduler: Missing parents: List() | |
15/07/25 14:58:57 INFO DAGScheduler: Submitting Stage 4 (MapPartitionsRDD[12] at map at SparkPlan.scala:97), which has no missing parents | |
15/07/25 14:58:57 INFO MemoryStore: ensureFreeSpace(2680) called with curMem=1099782, maxMem=28894769971 | |
15/07/25 14:58:57 INFO MemoryStore: Block broadcast_7 stored as values in memory (estimated size 2.6 KB, free 26.9 GB) | |
15/07/25 14:58:57 INFO MemoryStore: ensureFreeSpace(1846) called with curMem=1102462, maxMem=28894769971 | |
15/07/25 14:58:57 INFO MemoryStore: Block broadcast_7_piece0 stored as bytes in memory (estimated size 1846.0 B, free 26.9 GB) | |
15/07/25 14:58:57 INFO BlockManagerInfo: Added broadcast_7_piece0 in memory on localhost:61000 (size: 1846.0 B, free: 26.9 GB) | |
15/07/25 14:58:57 INFO BlockManagerMaster: Updated info of block broadcast_7_piece0 | |
15/07/25 14:58:57 INFO SparkContext: Created broadcast 7 from broadcast at DAGScheduler.scala:839 | |
15/07/25 14:58:57 INFO DAGScheduler: Submitting 1 missing tasks from Stage 4 (MapPartitionsRDD[12] at map at SparkPlan.scala:97) | |
15/07/25 14:58:57 INFO TaskSchedulerImpl: Adding task set 4.0 with 1 tasks | |
15/07/25 14:58:57 INFO TaskSetManager: Starting task 0.0 in stage 4.0 (TID 6, localhost, PROCESS_LOCAL, 1641 bytes) | |
15/07/25 14:58:57 INFO Executor: Running task 0.0 in stage 4.0 (TID 6) | |
15/07/25 14:58:57 INFO ParquetRelation2$$anon$1: Input split: ParquetInputSplit{part: hdfs://localhost:54310/user/hive/warehouse/test/part-r-00002.parquet start: 0 end: 288 length: 288 hosts: [] requestedSchema: message root { | |
optional binary category (UTF8); | |
optional int64 num; | |
} | |
readSupportMetadata: {org.apache.spark.sql.parquet.row.metadata={"type":"struct","fields":[{"name":"category","type":"string","nullable":true,"metadata":{}},{"name":"num","type":"long","nullable":true,"metadata":{}}]}, org.apache.spark.sql.parquet.row.requested_schema={"type":"struct","fields":[{"name":"category","type":"string","nullable":true,"metadata":{}},{"name":"num","type":"long","nullable":true,"metadata":{}}]}}} | |
15/07/25 14:58:57 WARN ParquetRecordReader: Can not initialize counter due to context is not a instance of TaskInputOutputContext, but is org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl | |
15/07/25 14:58:57 INFO InternalParquetRecordReader: RecordReader initialized will read a total of 0 records. | |
15/07/25 14:58:57 INFO Executor: Finished task 0.0 in stage 4.0 (TID 6). 1800 bytes result sent to driver | |
15/07/25 14:58:57 INFO TaskSetManager: Finished task 0.0 in stage 4.0 (TID 6) in 11 ms on localhost (1/1) | |
15/07/25 14:58:57 INFO DAGScheduler: Stage 4 (runJob at SparkPlan.scala:122) finished in 0.011 s | |
15/07/25 14:58:57 INFO TaskSchedulerImpl: Removed TaskSet 4.0, whose tasks have all completed, from pool | |
15/07/25 14:58:57 INFO DAGScheduler: Job 4 finished: runJob at SparkPlan.scala:122, took 0.017107 s | |
category num | |
A 5 | |
scala> ctx.sql("create view view1 as select * from test") | |
15/07/25 14:58:57 INFO ParseDriver: Parsing command: create view view1 as select * from test | |
15/07/25 14:58:57 INFO ParseDriver: Parse Completed | |
15/07/25 14:58:57 INFO PerfLogger: <PERFLOG method=Driver.run from=org.apache.hadoop.hive.ql.Driver> | |
15/07/25 14:58:57 INFO PerfLogger: <PERFLOG method=TimeToSubmit from=org.apache.hadoop.hive.ql.Driver> | |
15/07/25 14:58:57 INFO Driver: Concurrency mode is disabled, not creating a lock manager | |
15/07/25 14:58:57 INFO PerfLogger: <PERFLOG method=compile from=org.apache.hadoop.hive.ql.Driver> | |
15/07/25 14:58:57 INFO PerfLogger: <PERFLOG method=parse from=org.apache.hadoop.hive.ql.Driver> | |
15/07/25 14:58:57 INFO ParseDriver: Parsing command: create view view1 as select * from test | |
15/07/25 14:58:57 INFO ParseDriver: Parse Completed | |
15/07/25 14:58:57 INFO PerfLogger: </PERFLOG method=parse start=1437836337356 end=1437836337357 duration=1 from=org.apache.hadoop.hive.ql.Driver> | |
15/07/25 14:58:57 INFO PerfLogger: <PERFLOG method=semanticAnalyze from=org.apache.hadoop.hive.ql.Driver> | |
15/07/25 14:58:57 INFO SemanticAnalyzer: Starting Semantic Analysis | |
15/07/25 14:58:57 INFO SemanticAnalyzer: Creating view view1 position=12 | |
15/07/25 14:58:57 INFO SemanticAnalyzer: Completed phase 1 of Semantic Analysis | |
15/07/25 14:58:57 INFO SemanticAnalyzer: Get metadata for source tables | |
15/07/25 14:58:57 INFO HiveMetaStore: 0: get_table : db=default tbl=test | |
15/07/25 14:58:57 INFO audit: ugi=ubuntu ip=unknown-ip-addr cmd=get_table : db=default tbl=test | |
15/07/25 14:58:57 INFO SemanticAnalyzer: Get metadata for subqueries | |
15/07/25 14:58:57 INFO SemanticAnalyzer: Get metadata for destination tables | |
15/07/25 14:58:57 INFO SemanticAnalyzer: Completed getting MetaData in Semantic Analysis | |
15/07/25 14:58:57 INFO HiveMetaStore: 0: get_table : db=default tbl=view1 | |
15/07/25 14:58:57 INFO audit: ugi=ubuntu ip=unknown-ip-addr cmd=get_table : db=default tbl=view1 | |
15/07/25 14:58:57 INFO Driver: Semantic Analysis Completed | |
15/07/25 14:58:57 INFO PerfLogger: </PERFLOG method=semanticAnalyze start=1437836337357 end=1437836337455 duration=98 from=org.apache.hadoop.hive.ql.Driver> | |
15/07/25 14:58:57 INFO Driver: Returning Hive schema: Schema(fieldSchemas:[FieldSchema(name:col, type:string, comment:null)], properties:null) | |
15/07/25 14:58:57 INFO PerfLogger: </PERFLOG method=compile start=1437836337335 end=1437836337461 duration=126 from=org.apache.hadoop.hive.ql.Driver> | |
15/07/25 14:58:57 INFO PerfLogger: <PERFLOG method=Driver.execute from=org.apache.hadoop.hive.ql.Driver> | |
15/07/25 14:58:57 INFO Driver: Starting command: create view view1 as select * from test | |
15/07/25 14:58:57 INFO PerfLogger: </PERFLOG method=TimeToSubmit start=1437836337334 end=1437836337463 duration=129 from=org.apache.hadoop.hive.ql.Driver> | |
15/07/25 14:58:57 INFO PerfLogger: <PERFLOG method=runTasks from=org.apache.hadoop.hive.ql.Driver> | |
15/07/25 14:58:57 INFO PerfLogger: <PERFLOG method=task.DDL.Stage-0 from=org.apache.hadoop.hive.ql.Driver> | |
15/07/25 14:58:57 INFO HiveMetaStore: 0: get_table : db=default tbl=view1 | |
15/07/25 14:58:57 INFO audit: ugi=ubuntu ip=unknown-ip-addr cmd=get_table : db=default tbl=view1 | |
15/07/25 14:58:57 INFO HiveMetaStore: 0: create_table: Table(tableName:view1, dbName:default, owner:ubuntu, createTime:1437836337, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:col, type:string, comment:null)], location:null, inputFormat:org.apache.hadoop.mapred.SequenceFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:null, parameters:{}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{})), partitionKeys:[], parameters:{}, viewOriginalText:select * from test, viewExpandedText:select `test`.`col` from `default`.`test`, tableType:VIRTUAL_VIEW) | |
15/07/25 14:58:57 INFO audit: ugi=ubuntu ip=unknown-ip-addr cmd=create_table: Table(tableName:view1, dbName:default, owner:ubuntu, createTime:1437836337, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:col, type:string, comment:null)], location:null, inputFormat:org.apache.hadoop.mapred.SequenceFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:null, parameters:{}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{})), partitionKeys:[], parameters:{}, viewOriginalText:select * from test, viewExpandedText:select `test`.`col` from `default`.`test`, tableType:VIRTUAL_VIEW) | |
15/07/25 14:58:57 INFO PerfLogger: </PERFLOG method=runTasks start=1437836337463 end=1437836337481 duration=18 from=org.apache.hadoop.hive.ql.Driver> | |
15/07/25 14:58:57 INFO PerfLogger: </PERFLOG method=Driver.execute start=1437836337461 end=1437836337481 duration=20 from=org.apache.hadoop.hive.ql.Driver> | |
15/07/25 14:58:57 INFO Driver: OK | |
15/07/25 14:58:57 INFO PerfLogger: <PERFLOG method=releaseLocks from=org.apache.hadoop.hive.ql.Driver> | |
15/07/25 14:58:57 INFO PerfLogger: </PERFLOG method=releaseLocks start=1437836337482 end=1437836337482 duration=0 from=org.apache.hadoop.hive.ql.Driver> | |
15/07/25 14:58:57 INFO PerfLogger: </PERFLOG method=Driver.run start=1437836337333 end=1437836337482 duration=149 from=org.apache.hadoop.hive.ql.Driver> | |
res3: org.apache.spark.sql.DataFrame = [result: string] | |
scala> ctx.table("view1").printSchema | |
15/07/25 14:58:57 INFO HiveMetaStore: 0: get_table : db=default tbl=view1 | |
15/07/25 14:58:57 INFO audit: ugi=ubuntu ip=unknown-ip-addr cmd=get_table : db=default tbl=view1 | |
15/07/25 14:58:57 INFO ParseDriver: Parsing command: select `test`.`col` from `default`.`test` | |
15/07/25 14:58:57 INFO ParseDriver: Parse Completed | |
15/07/25 14:58:57 INFO HiveMetaStore: 0: get_table : db=default tbl=test | |
15/07/25 14:58:57 INFO audit: ugi=ubuntu ip=unknown-ip-addr cmd=get_table : db=default tbl=test | |
org.apache.spark.sql.AnalysisException: cannot resolve 'test.col' given input columns category, num; line 1 pos 7 | |
at org.apache.spark.sql.catalyst.analysis.package$AnalysisErrorAt.failAnalysis(package.scala:42) | |
at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1$$anonfun$apply$1.applyOrElse(CheckAnalysis.scala:54) | |
at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1$$anonfun$apply$1.applyOrElse(CheckAnalysis.scala:46) | |
at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformUp$1.apply(TreeNode.scala:252) | |
at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformUp$1.apply(TreeNode.scala:252) | |
at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:51) | |
at org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:251) | |
at org.apache.spark.sql.catalyst.plans.QueryPlan.org$apache$spark$sql$catalyst$plans$QueryPlan$$transformExpressionUp$1(QueryPlan.scala:108) | |
at org.apache.spark.sql.catalyst.plans.QueryPlan$$anonfun$2$$anonfun$apply$2.apply(QueryPlan.scala:123) | |
at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244) | |
at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244) | |
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) | |
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47) | |
at scala.collection.TraversableLike$class.map(TraversableLike.scala:244) | |
at scala.collection.AbstractTraversable.map(Traversable.scala:105) | |
at org.apache.spark.sql.catalyst.plans.QueryPlan$$anonfun$2.apply(QueryPlan.scala:122) | |
at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) | |
at scala.collection.Iterator$class.foreach(Iterator.scala:727) | |
at scala.collection.AbstractIterator.foreach(Iterator.scala:1157) | |
at scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:48) | |
at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:103) | |
at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:47) | |
at scala.collection.TraversableOnce$class.to(TraversableOnce.scala:273) | |
at scala.collection.AbstractIterator.to(Iterator.scala:1157) | |
at scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:265) | |
at scala.collection.AbstractIterator.toBuffer(Iterator.scala:1157) | |
at scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:252) | |
at scala.collection.AbstractIterator.toArray(Iterator.scala:1157) | |
at org.apache.spark.sql.catalyst.plans.QueryPlan.transformExpressionsUp(QueryPlan.scala:127) | |
at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:46) | |
at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:44) | |
at org.apache.spark.sql.catalyst.trees.TreeNode.foreach(TreeNode.scala:79) | |
at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:88) | |
at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:88) | |
at scala.collection.immutable.List.foreach(List.scala:318) | |
at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:88) | |
at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$class.checkAnalysis(CheckAnalysis.scala:44) | |
at org.apache.spark.sql.catalyst.analysis.Analyzer.checkAnalysis(Analyzer.scala:40) | |
at org.apache.spark.sql.SQLContext$QueryExecution.assertAnalyzed(SQLContext.scala:1080) | |
at org.apache.spark.sql.DataFrame.<init>(DataFrame.scala:133) | |
at org.apache.spark.sql.DataFrame$.apply(DataFrame.scala:51) | |
at org.apache.spark.sql.SQLContext.table(SQLContext.scala:945) | |
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:28) | |
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:33) | |
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:35) | |
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:37) | |
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:39) | |
at $iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:41) | |
at $iwC$$iwC$$iwC$$iwC.<init>(<console>:43) | |
at $iwC$$iwC$$iwC.<init>(<console>:45) | |
at $iwC$$iwC.<init>(<console>:47) | |
at $iwC.<init>(<console>:49) | |
at <init>(<console>:51) | |
at .<init>(<console>:55) | |
at .<clinit>(<console>) | |
at .<init>(<console>:7) | |
at .<clinit>(<console>) | |
at $print(<console>) | |
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) | |
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) | |
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) | |
at java.lang.reflect.Method.invoke(Method.java:606) | |
at org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:1065) | |
at org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1338) | |
at org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:840) | |
at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:871) | |
at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:819) | |
at org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:856) | |
at org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:901) | |
at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:813) | |
at org.apache.spark.repl.SparkILoop.processLine$1(SparkILoop.scala:656) | |
at org.apache.spark.repl.SparkILoop.innerLoop$1(SparkILoop.scala:664) | |
at org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$loop(SparkILoop.scala:669) | |
at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply$mcZ$sp(SparkILoop.scala:996) | |
at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:944) | |
at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:944) | |
at scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135) | |
at org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$process(SparkILoop.scala:944) | |
at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:1058) | |
at org.apache.spark.repl.Main$.main(Main.scala:31) | |
at org.apache.spark.repl.Main.main(Main.scala) | |
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) | |
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) | |
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) | |
at java.lang.reflect.Method.invoke(Method.java:606) | |
at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:569) | |
at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:166) | |
at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:189) | |
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:110) | |
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) | |
scala> ctx.sql("drop view view1") | |
15/07/25 14:58:58 INFO ParseDriver: Parsing command: drop view view1 | |
15/07/25 14:58:58 INFO ParseDriver: Parse Completed | |
15/07/25 14:58:58 INFO PerfLogger: <PERFLOG method=Driver.run from=org.apache.hadoop.hive.ql.Driver> | |
15/07/25 14:58:58 INFO PerfLogger: <PERFLOG method=TimeToSubmit from=org.apache.hadoop.hive.ql.Driver> | |
15/07/25 14:58:58 INFO Driver: Concurrency mode is disabled, not creating a lock manager | |
15/07/25 14:58:58 INFO PerfLogger: <PERFLOG method=compile from=org.apache.hadoop.hive.ql.Driver> | |
15/07/25 14:58:58 INFO PerfLogger: <PERFLOG method=parse from=org.apache.hadoop.hive.ql.Driver> | |
15/07/25 14:58:58 INFO ParseDriver: Parsing command: drop view view1 | |
15/07/25 14:58:58 INFO ParseDriver: Parse Completed | |
15/07/25 14:58:58 INFO PerfLogger: </PERFLOG method=parse start=1437836338046 end=1437836338046 duration=0 from=org.apache.hadoop.hive.ql.Driver> | |
15/07/25 14:58:58 INFO PerfLogger: <PERFLOG method=semanticAnalyze from=org.apache.hadoop.hive.ql.Driver> | |
15/07/25 14:58:58 INFO HiveMetaStore: 0: get_table : db=default tbl=view1 | |
15/07/25 14:58:58 INFO audit: ugi=ubuntu ip=unknown-ip-addr cmd=get_table : db=default tbl=view1 | |
15/07/25 14:58:58 INFO Driver: Semantic Analysis Completed | |
15/07/25 14:58:58 INFO PerfLogger: </PERFLOG method=semanticAnalyze start=1437836338046 end=1437836338065 duration=19 from=org.apache.hadoop.hive.ql.Driver> | |
15/07/25 14:58:58 INFO Driver: Returning Hive schema: Schema(fieldSchemas:null, properties:null) | |
15/07/25 14:58:58 INFO PerfLogger: </PERFLOG method=compile start=1437836338046 end=1437836338065 duration=19 from=org.apache.hadoop.hive.ql.Driver> | |
15/07/25 14:58:58 INFO PerfLogger: <PERFLOG method=Driver.execute from=org.apache.hadoop.hive.ql.Driver> | |
15/07/25 14:58:58 INFO Driver: Starting command: drop view view1 | |
15/07/25 14:58:58 INFO PerfLogger: </PERFLOG method=TimeToSubmit start=1437836338046 end=1437836338066 duration=20 from=org.apache.hadoop.hive.ql.Driver> | |
15/07/25 14:58:58 INFO PerfLogger: <PERFLOG method=runTasks from=org.apache.hadoop.hive.ql.Driver> | |
15/07/25 14:58:58 INFO PerfLogger: <PERFLOG method=task.DDL.Stage-0 from=org.apache.hadoop.hive.ql.Driver> | |
15/07/25 14:58:58 INFO HiveMetaStore: 0: get_table : db=default tbl=view1 | |
15/07/25 14:58:58 INFO audit: ugi=ubuntu ip=unknown-ip-addr cmd=get_table : db=default tbl=view1 | |
15/07/25 14:58:58 INFO HiveMetaStore: 0: get_table : db=default tbl=view1 | |
15/07/25 14:58:58 INFO audit: ugi=ubuntu ip=unknown-ip-addr cmd=get_table : db=default tbl=view1 | |
15/07/25 14:58:58 INFO HiveMetaStore: 0: drop_table : db=default tbl=view1 | |
15/07/25 14:58:58 INFO audit: ugi=ubuntu ip=unknown-ip-addr cmd=drop_table : db=default tbl=view1 | |
15/07/25 14:58:58 INFO HiveMetaStore: 0: get_table : db=default tbl=view1 | |
15/07/25 14:58:58 INFO audit: ugi=ubuntu ip=unknown-ip-addr cmd=get_table : db=default tbl=view1 | |
15/07/25 14:58:58 INFO Datastore: The class "org.apache.hadoop.hive.metastore.model.MFieldSchema" is tagged as "embedded-only" so does not have its own datastore table. | |
15/07/25 14:58:58 INFO Datastore: The class "org.apache.hadoop.hive.metastore.model.MOrder" is tagged as "embedded-only" so does not have its own datastore table. | |
15/07/25 14:58:58 INFO Datastore: The class "org.apache.hadoop.hive.metastore.model.MFieldSchema" is tagged as "embedded-only" so does not have its own datastore table. | |
15/07/25 14:58:58 INFO Datastore: The class "org.apache.hadoop.hive.metastore.model.MOrder" is tagged as "embedded-only" so does not have its own datastore table. | |
15/07/25 14:58:58 INFO Datastore: The class "org.apache.hadoop.hive.metastore.model.MFieldSchema" is tagged as "embedded-only" so does not have its own datastore table. | |
15/07/25 14:58:58 INFO Datastore: The class "org.apache.hadoop.hive.metastore.model.MOrder" is tagged as "embedded-only" so does not have its own datastore table. | |
15/07/25 14:58:58 INFO Datastore: The class "org.apache.hadoop.hive.metastore.model.MFieldSchema" is tagged as "embedded-only" so does not have its own datastore table. | |
15/07/25 14:58:58 INFO Datastore: The class "org.apache.hadoop.hive.metastore.model.MOrder" is tagged as "embedded-only" so does not have its own datastore table. | |
15/07/25 14:58:58 INFO Datastore: The class "org.apache.hadoop.hive.metastore.model.MFieldSchema" is tagged as "embedded-only" so does not have its own datastore table. | |
15/07/25 14:58:58 INFO Datastore: The class "org.apache.hadoop.hive.metastore.model.MOrder" is tagged as "embedded-only" so does not have its own datastore table. | |
15/07/25 14:58:58 INFO PerfLogger: </PERFLOG method=runTasks start=1437836338066 end=1437836338645 duration=579 from=org.apache.hadoop.hive.ql.Driver> | |
15/07/25 14:58:58 INFO PerfLogger: </PERFLOG method=Driver.execute start=1437836338065 end=1437836338645 duration=580 from=org.apache.hadoop.hive.ql.Driver> | |
15/07/25 14:58:58 INFO Driver: OK | |
15/07/25 14:58:58 INFO PerfLogger: <PERFLOG method=releaseLocks from=org.apache.hadoop.hive.ql.Driver> | |
15/07/25 14:58:58 INFO PerfLogger: </PERFLOG method=releaseLocks start=1437836338645 end=1437836338645 duration=0 from=org.apache.hadoop.hive.ql.Driver> | |
15/07/25 14:58:58 INFO PerfLogger: </PERFLOG method=Driver.run start=1437836338045 end=1437836338645 duration=600 from=org.apache.hadoop.hive.ql.Driver> | |
res5: org.apache.spark.sql.DataFrame = [result: string] | |
scala> ctx.sql("drop table test") | |
15/07/25 14:58:58 INFO ParseDriver: Parsing command: drop table test | |
15/07/25 14:58:58 INFO ParseDriver: Parse Completed | |
15/07/25 14:58:58 INFO HiveMetaStore: 0: get_table : db=default tbl=test | |
15/07/25 14:58:58 INFO audit: ugi=ubuntu ip=unknown-ip-addr cmd=get_table : db=default tbl=test | |
15/07/25 14:58:58 INFO PerfLogger: <PERFLOG method=Driver.run from=org.apache.hadoop.hive.ql.Driver> | |
15/07/25 14:58:58 INFO PerfLogger: <PERFLOG method=TimeToSubmit from=org.apache.hadoop.hive.ql.Driver> | |
15/07/25 14:58:58 INFO Driver: Concurrency mode is disabled, not creating a lock manager | |
15/07/25 14:58:58 INFO PerfLogger: <PERFLOG method=compile from=org.apache.hadoop.hive.ql.Driver> | |
15/07/25 14:58:58 INFO PerfLogger: <PERFLOG method=parse from=org.apache.hadoop.hive.ql.Driver> | |
15/07/25 14:58:58 INFO ParseDriver: Parsing command: DROP TABLE test | |
15/07/25 14:58:58 INFO ParseDriver: Parse Completed | |
15/07/25 14:58:58 INFO PerfLogger: </PERFLOG method=parse start=1437836338847 end=1437836338848 duration=1 from=org.apache.hadoop.hive.ql.Driver> | |
15/07/25 14:58:58 INFO PerfLogger: <PERFLOG method=semanticAnalyze from=org.apache.hadoop.hive.ql.Driver> | |
15/07/25 14:58:58 INFO HiveMetaStore: 0: get_table : db=default tbl=test | |
15/07/25 14:58:58 INFO audit: ugi=ubuntu ip=unknown-ip-addr cmd=get_table : db=default tbl=test | |
15/07/25 14:58:58 INFO Driver: Semantic Analysis Completed | |
15/07/25 14:58:58 INFO PerfLogger: </PERFLOG method=semanticAnalyze start=1437836338848 end=1437836338860 duration=12 from=org.apache.hadoop.hive.ql.Driver> | |
15/07/25 14:58:58 INFO Driver: Returning Hive schema: Schema(fieldSchemas:null, properties:null) | |
15/07/25 14:58:58 INFO PerfLogger: </PERFLOG method=compile start=1437836338847 end=1437836338861 duration=14 from=org.apache.hadoop.hive.ql.Driver> | |
15/07/25 14:58:58 INFO PerfLogger: <PERFLOG method=Driver.execute from=org.apache.hadoop.hive.ql.Driver> | |
15/07/25 14:58:58 INFO Driver: Starting command: DROP TABLE test | |
15/07/25 14:58:58 INFO PerfLogger: </PERFLOG method=TimeToSubmit start=1437836338847 end=1437836338861 duration=14 from=org.apache.hadoop.hive.ql.Driver> | |
15/07/25 14:58:58 INFO PerfLogger: <PERFLOG method=runTasks from=org.apache.hadoop.hive.ql.Driver> | |
15/07/25 14:58:58 INFO PerfLogger: <PERFLOG method=task.DDL.Stage-0 from=org.apache.hadoop.hive.ql.Driver> | |
15/07/25 14:58:58 INFO HiveMetaStore: 0: get_table : db=default tbl=test | |
15/07/25 14:58:58 INFO audit: ugi=ubuntu ip=unknown-ip-addr cmd=get_table : db=default tbl=test | |
15/07/25 14:58:58 INFO HiveMetaStore: 0: get_table : db=default tbl=test | |
15/07/25 14:58:58 INFO audit: ugi=ubuntu ip=unknown-ip-addr cmd=get_table : db=default tbl=test | |
15/07/25 14:58:58 INFO HiveMetaStore: 0: drop_table : db=default tbl=test | |
15/07/25 14:58:58 INFO audit: ugi=ubuntu ip=unknown-ip-addr cmd=drop_table : db=default tbl=test | |
15/07/25 14:58:58 INFO HiveMetaStore: 0: get_table : db=default tbl=test | |
15/07/25 14:58:58 INFO audit: ugi=ubuntu ip=unknown-ip-addr cmd=get_table : db=default tbl=test | |
15/07/25 14:58:58 INFO hivemetastoressimpl: deleting hdfs://localhost:54310/user/hive/warehouse/test | |
15/07/25 14:58:58 INFO TrashPolicyDefault: Namenode trash configuration: Deletion interval = 0 minutes, Emptier interval = 0 minutes. | |
15/07/25 14:58:58 INFO hivemetastoressimpl: Deleted the diretory hdfs://localhost:54310/user/hive/warehouse/test | |
15/07/25 14:58:58 INFO PerfLogger: </PERFLOG method=runTasks start=1437836338861 end=1437836338928 duration=67 from=org.apache.hadoop.hive.ql.Driver> | |
15/07/25 14:58:58 INFO PerfLogger: </PERFLOG method=Driver.execute start=1437836338861 end=1437836338928 duration=67 from=org.apache.hadoop.hive.ql.Driver> | |
15/07/25 14:58:58 INFO Driver: OK | |
15/07/25 14:58:58 INFO PerfLogger: <PERFLOG method=releaseLocks from=org.apache.hadoop.hive.ql.Driver> | |
15/07/25 14:58:58 INFO PerfLogger: </PERFLOG method=releaseLocks start=1437836338928 end=1437836338928 duration=0 from=org.apache.hadoop.hive.ql.Driver> | |
15/07/25 14:58:58 INFO PerfLogger: </PERFLOG method=Driver.run start=1437836338847 end=1437836338928 duration=81 from=org.apache.hadoop.hive.ql.Driver> | |
res6: org.apache.spark.sql.DataFrame = [] | |
scala> new File(path).delete() | |
res7: Boolean = true | |
scala> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment