Created
November 24, 2022 07:28
-
-
Save tecmaverick/38dbcb78ae4e3c907ca7a06f6d5d19f8 to your computer and use it in GitHub Desktop.
Spark JSON Schema Scala
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Data 1.json | |
| {"fullname": "Paisley Hoover", "sex": "female", "address": "Third Court Dr. Windermere, FL34786"} | |
| {"fullname": "Paisley Hoover", "sex": "female", "address": "Third Court Dr. Windermere, FL34786"} | |
| {"fullname": "Paisley Hoover", "sex": "female", "address": "Third Court Dr. Windermere, FL34786"} | |
| Supported Schema | |
| val schema = StructType(List( | |
| StructField("fullname", StringType), | |
| StructField("sex", StringType), | |
| StructField("address", StringType) | |
| )) | |
| val schema = StructType(Array( | |
| StructField("fullname", StringType), | |
| StructField("sex", StringType), | |
| StructField("address", StringType) | |
| )) | |
| *********************************************************************************************************************** | |
| Data 2.json | |
| {"fullname": "Paisley Hoover", "sex": "female", "address": "Third Court Dr. Windermere, FL34786", "personal":{"age":"1","married":"yes"}} | |
| {"fullname": "Paisley Hoover", "sex": "female", "address": "Third Court Dr. Windermere, FL34786", "personal":{"age":"1","married":"yes"}} | |
| {"fullname": "Paisley Hoover", "sex": "female", "address": "Third Court Dr. Windermere, FL34786", "personal":{"age":"1","married":"yes"}} | |
| Supported Schema | |
| val schema = StructType(List( | |
| StructField("fullname", StringType), | |
| StructField("sex", StringType), | |
| StructField("address", StringType), | |
| StructField("personal", StructType(List( | |
| StructField("age", StringType), | |
| StructField("married", StringType) | |
| ))), | |
| )) | |
| val schema = StructType(Array( | |
| StructField("fullname", StringType), | |
| StructField("sex", StringType), | |
| StructField("address", StringType), | |
| StructField("personal", StructType(Array( | |
| StructField("age", StringType), | |
| StructField("married", StringType) | |
| ))), | |
| )) | |
| *********************************************************************************************************************** | |
| Data 3.json | |
| {"fullname": "Paisley Hoover", "sex": "female", "address": "Third Court Dr. Windermere, FL34786", "personal":[{"age":"1","married":"yes"}, {"age":"2","married":"no"}]} | |
| {"fullname": "Paisley Hoover", "sex": "female", "address": "Third Court Dr. Windermere, FL34786", "personal":[{"age":"1","married":"yes"}, {"age":"2","married":"no"}]} | |
| {"fullname": "Paisley Hoover", "sex": "female", "address": "Third Court Dr. Windermere, FL34786", "personal":[{"age":"1","married":"yes"}, {"age":"2","married":"no"}]} | |
| Supported Schema | |
| val schema = StructType(List( | |
| StructField("fullname", StringType), | |
| StructField("sex", StringType), | |
| StructField("address", StringType), | |
| StructField("personal", ArrayType(StructType(List( | |
| StructField("age", StringType), | |
| StructField("married", StringType) | |
| )))), | |
| )) | |
| val schema = StructType(Array( | |
| StructField("fullname", StringType), | |
| StructField("sex", StringType), | |
| StructField("address", StringType), | |
| StructField("personal", StructType(Array( | |
| StructField("age", StringType), | |
| StructField("married", StringType) | |
| ))), | |
| )) | |
| *********************************************************************************************************************** | |
| Data 4.json | |
| {"name":"Michael", "age":10} | |
| {"name":"Andy", "age":30} | |
| {"name":"Justin"} | |
| val schemaString = "name age gender" | |
| val schema = StructType(schemaString.split(" ").map(fieldName => StructField(fieldName, StringType, true))) | |
| *********************************************************************************************************************** | |
| Data 5.json | |
| {"filename":"details","attributes":{"name":"Michael", "age":10, "gender":"M"}} | |
| val schemaString = "name age gender" | |
| val schema = StructType( | |
| StructField("filename", StringType, true) :: | |
| StructField( | |
| "attributes", | |
| StructType(schemaString.split(" ").map(fieldName => | |
| StructField(fieldName, StringType, true) | |
| )) | |
| ) :: Nil | |
| ) | |
| *********************************************************************************************************************** | |
| Data 6.json | |
| {"id":"1", "source":{ "dev1":{"name":"Abe","level","Principal"} , "dev2":{"name":"Anj","level","Staff"}}} | |
| val schema1 = new StructType() | |
| .add("id", StringType) | |
| .add("source", MapType(StringType, | |
| new StructType() | |
| .add("name", StringType) | |
| .add("level", StringType) | |
| ) | |
| ) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment