Rendson.Fernandes Rendson-Fernandes

mauliksoneji / bq_spark_reader.py

Created October 28, 2019 06:33

Bigquery client to read data from bigquery into spark dataframe

	class BigQueryClient(object):

	def __init__(self, project_id):
	self.project_id = project_id

	def _get_conf(self, bucket, dataset_id, table_id):
	return {
	"fs.gs.project.id": self.project_id,
	"mapred.bq.project.id": self.project_id, # default project
	"mapred.bq.gcs.bucket": bucket, # gcs bucket holding the temperory path

stefanthoss / export-pyspark-schema-to-json.py

Created June 19, 2019 22:16

Export/import a PySpark schema to/from a JSON file

	import json
	from pyspark.sql.types import *

	# Define the schema
	schema = StructType(
	[StructField("name", StringType(), True), StructField("age", IntegerType(), True)]
	)

	# Write the schema
	with open("schema.json", "w") as f: