liprais · November 5, 2018 10:17
diff --git a/generate.py b/generate.py
 from pyspark.sql import SparkSession
 spark = SparkSession.builder.getOrCreate()
 data_type_map = {'StringType':'varchar(32672)',
 				 'LongType':'bigint',
 				 'IntegerType':'int',
 				 'BooleanType':'boolean',
 				 'DateType':'date',
 				 'DoubleType':'double',
 				 'TimestampType':'timestamp'
 				}

 parquet_file_path = 'lineitem.parquet'
 schema = spark.read.parquet(parquet_file_path).schema

 col_defs_list = []
 for i in schema.fieldNames():
 	col = schema[i]
 	data_type = col.dataType
 	nullable = col.nullable
 	name = col.name
 	if str(data_type) in data_type_map:
 		splicemachine_datatype = data_type_map[str(data_type)]
 	elif str(data_type):
 		splicemachine_datatype = str(data_type).replace('Type','')
 	if nullable:
 		col_def = name + " " + splicemachine_datatype
 	else:
 		col_def = name + " " + splicemachine_datatype + " not null"
 	col_defs_list.append(col_def)

 col_defs_str = ''',
 '''.join(col_defs_list)

 print("create external table test ( ")
 print(col_defs_str)
 print(')')
 print('stored as parquet')
 print('location ' + "'" + parquet_file_path + "'")
	from pyspark.sql import SparkSession
	spark = SparkSession.builder.getOrCreate()
	data_type_map = {'StringType':'varchar(32672)',
	'LongType':'bigint',
	'IntegerType':'int',
	'BooleanType':'boolean',
	'DateType':'date',
	'DoubleType':'double',
	'TimestampType':'timestamp'
	}

	parquet_file_path = 'lineitem.parquet'
	schema = spark.read.parquet(parquet_file_path).schema

	col_defs_list = []
	for i in schema.fieldNames():
	col = schema[i]
	data_type = col.dataType
	nullable = col.nullable
	name = col.name
	if str(data_type) in data_type_map:
	splicemachine_datatype = data_type_map[str(data_type)]
	elif str(data_type):
	splicemachine_datatype = str(data_type).replace('Type','')
	if nullable:
	col_def = name + " " + splicemachine_datatype
	else:
	col_def = name + " " + splicemachine_datatype + " not null"
	col_defs_list.append(col_def)

	col_defs_str = ''',
	'''.join(col_defs_list)

	print("create external table test ( ")
	print(col_defs_str)
	print(')')
	print('stored as parquet')
	print('location ' + "'" + parquet_file_path + "'")