Created
January 25, 2020 19:26
-
-
Save skp33/65121299b3508ab0135b36146fe14853 to your computer and use it in GitHub Desktop.
Create table schema from dataset schema
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.lang.reflect.Method | |
import java.net.URI | |
import org.apache.spark.sql.SparkSession | |
import org.apache.spark.sql.catalyst.TableIdentifier | |
import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType} | |
import org.apache.spark.sql.execution.command.ShowCreateTableCommand | |
import org.apache.spark.sql.types.StructType | |
def showCreateTableCommand( | |
spark: SparkSession, | |
schema: StructType, | |
tableName: String, | |
path: String, | |
inputFormat: Option[String], | |
partitionColumn: Seq[String], | |
properties: Map[String, String] = Map.empty[String, String]): String = { | |
val identifier: TableIdentifier = spark.sessionState.sqlParser.parseTableIdentifier(tableName) | |
val url: Some[URI] = Some(new URI(path)) | |
val outputFormat: Some[String] = | |
Some("org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat") | |
val hiveSerDe: Some[String] = | |
Some("org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe") | |
val compressed = false | |
val storageFormat = | |
CatalogStorageFormat(url, inputFormat, outputFormat, hiveSerDe, compressed, properties) | |
val catalogTable = new CatalogTable(identifier, CatalogTableType.EXTERNAL, storageFormat, | |
schema, partitionColumnNames = partitionColumn) | |
val table = ShowCreateTableCommand(identifier) | |
val field: Method = | |
table.getClass.getDeclaredMethod("showCreateHiveTable", catalogTable.getClass) | |
field.setAccessible(true) | |
field.invoke(table, catalogTable).asInstanceOf[String] | |
} | |
val tcstmt = showCreateTableCommand(spark, df.schema, "<table name>", | |
"<location of data for external table>", | |
Some("org.apache.hadoop.hive.ql.io.SymlinkTextInputFormat"), Seq("<partition column names>")) | |
println(tcstmt) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment