Skip to content

Instantly share code, notes, and snippets.

@tecmaverick
Created November 24, 2022 07:28
Show Gist options
  • Save tecmaverick/38dbcb78ae4e3c907ca7a06f6d5d19f8 to your computer and use it in GitHub Desktop.
Save tecmaverick/38dbcb78ae4e3c907ca7a06f6d5d19f8 to your computer and use it in GitHub Desktop.
Spark JSON Schema Scala
Data 1.json
{"fullname": "Paisley Hoover", "sex": "female", "address": "Third Court Dr. Windermere, FL34786"}
{"fullname": "Paisley Hoover", "sex": "female", "address": "Third Court Dr. Windermere, FL34786"}
{"fullname": "Paisley Hoover", "sex": "female", "address": "Third Court Dr. Windermere, FL34786"}
Supported Schema
val schema = StructType(List(
StructField("fullname", StringType),
StructField("sex", StringType),
StructField("address", StringType)
))
val schema = StructType(Array(
StructField("fullname", StringType),
StructField("sex", StringType),
StructField("address", StringType)
))
***********************************************************************************************************************
Data 2.json
{"fullname": "Paisley Hoover", "sex": "female", "address": "Third Court Dr. Windermere, FL34786", "personal":{"age":"1","married":"yes"}}
{"fullname": "Paisley Hoover", "sex": "female", "address": "Third Court Dr. Windermere, FL34786", "personal":{"age":"1","married":"yes"}}
{"fullname": "Paisley Hoover", "sex": "female", "address": "Third Court Dr. Windermere, FL34786", "personal":{"age":"1","married":"yes"}}
Supported Schema
val schema = StructType(List(
StructField("fullname", StringType),
StructField("sex", StringType),
StructField("address", StringType),
StructField("personal", StructType(List(
StructField("age", StringType),
StructField("married", StringType)
))),
))
val schema = StructType(Array(
StructField("fullname", StringType),
StructField("sex", StringType),
StructField("address", StringType),
StructField("personal", StructType(Array(
StructField("age", StringType),
StructField("married", StringType)
))),
))
***********************************************************************************************************************
Data 3.json
{"fullname": "Paisley Hoover", "sex": "female", "address": "Third Court Dr. Windermere, FL34786", "personal":[{"age":"1","married":"yes"}, {"age":"2","married":"no"}]}
{"fullname": "Paisley Hoover", "sex": "female", "address": "Third Court Dr. Windermere, FL34786", "personal":[{"age":"1","married":"yes"}, {"age":"2","married":"no"}]}
{"fullname": "Paisley Hoover", "sex": "female", "address": "Third Court Dr. Windermere, FL34786", "personal":[{"age":"1","married":"yes"}, {"age":"2","married":"no"}]}
Supported Schema
val schema = StructType(List(
StructField("fullname", StringType),
StructField("sex", StringType),
StructField("address", StringType),
StructField("personal", ArrayType(StructType(List(
StructField("age", StringType),
StructField("married", StringType)
)))),
))
val schema = StructType(Array(
StructField("fullname", StringType),
StructField("sex", StringType),
StructField("address", StringType),
StructField("personal", StructType(Array(
StructField("age", StringType),
StructField("married", StringType)
))),
))
***********************************************************************************************************************
Data 4.json
{"name":"Michael", "age":10}
{"name":"Andy", "age":30}
{"name":"Justin"}
val schemaString = "name age gender"
val schema = StructType(schemaString.split(" ").map(fieldName => StructField(fieldName, StringType, true)))
***********************************************************************************************************************
Data 5.json
{"filename":"details","attributes":{"name":"Michael", "age":10, "gender":"M"}}
val schemaString = "name age gender"
val schema = StructType(
StructField("filename", StringType, true) ::
StructField(
"attributes",
StructType(schemaString.split(" ").map(fieldName =>
StructField(fieldName, StringType, true)
))
) :: Nil
)
***********************************************************************************************************************
Data 6.json
{"id":"1", "source":{ "dev1":{"name":"Abe","level","Principal"} , "dev2":{"name":"Anj","level","Staff"}}}
val schema1 = new StructType()
.add("id", StringType)
.add("source", MapType(StringType,
new StructType()
.add("name", StringType)
.add("level", StringType)
)
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment