Last active
March 2, 2023 16:05
-
-
Save hakanilter/b1e9db86b19f8341f5ce774670a3faa0 to your computer and use it in GitHub Desktop.
PySpark schema save/load example
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
from pyspark.sql.types import * | |
def save_schema_as_json(df, schema_file): | |
""" | |
Saves dataframe schema as json | |
""" | |
schema = df.schema.json() | |
schema = json.dumps(json.loads(schema), indent=4) | |
with open(schema_file, "w") as f: | |
f.write(schema) | |
def load_schema_as_json(schema_file, path): | |
""" | |
Loads json data using schema file | |
""" | |
with open(schema_file) as f: | |
schema_json = f.read() | |
schema = StructType.fromJson(json.loads(schema_json)) | |
return spark.read.schema(schema).json(path) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment