Skip to content

Instantly share code, notes, and snippets.

@j-thepac
Last active April 16, 2023 04:08
Show Gist options
  • Save j-thepac/d4a2ba6ae985d1f3fb7ee5227c6fcc43 to your computer and use it in GitHub Desktop.
Save j-thepac/d4a2ba6ae985d1f3fb7ee5227c6fcc43 to your computer and use it in GitHub Desktop.
"""
to Convert json data to Rows and Columns
[{"data":[["2","2xg","2Q"],["1","3xg","3Q"]],"schema":[{"columnName":"CASE_UID","ordinal":0,"dataTypeName":"varchar"},{"columnName":"QUOTE_ID","ordinal":1,"dataTypeName":"varchar"},{"columnName":"OPP_NO","ordinal":2,"dataTypeName":"varchar"}]}]
"""
from pyspark.sql.functions import *
from pyspark.sql.types import *
df=spark.read.json(path)
df=df.withColumn("c",explode(df.data))
df=(df.withColumn("CASE_UID",df.c[0].cast("string"))
.withColumn("QUOTE_ID",df.c[1].cast("string"))
.withColumn("OPP_NO",df.c[2].cast("string")))
#df=df.select(*(df.c[i]for i in range(0,3))
df.select("CASE_UID","QUOTE_ID","OPP_NO")).show()
df.select("schema.dataTypeName").show(1,False)
df.select("schema.schema.columnName").show(1,False)
"""
+--------+--------+------+
|CASE_UID|QUOTE_ID|OPP_NO|
+--------+--------+------+
| 2| 2xg| 2Q|
| 1| 3xg| 3Q|
+--------+--------+------+
"""
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment