Created
January 12, 2022 17:16
-
-
Save cmpadden/845d1d3a749f617891ec4febc82997b3 to your computer and use it in GitHub Desktop.
PySpark: Union Multiple Dataframes
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from functools import reduce | |
| from pyspark.sql import DataFrame, SparkSession | |
| spark = SparkSession \ | |
| .builder \ | |
| .appName('Union DFs') \ | |
| .getOrCreate() | |
| df1 = spark.createDataFrame( | |
| [ | |
| (1, "The"), | |
| (2, "Quick"), | |
| ], | |
| ['id', 'text'] | |
| ) | |
| df2 = spark.createDataFrame( | |
| [ | |
| (3, "Brown"), | |
| (4, "Fox"), | |
| ], | |
| ['id', 'text'] | |
| ) | |
| df3 = spark.createDataFrame( | |
| [ | |
| (5, "Jumped"), | |
| (6, "Over"), | |
| ], | |
| ['id', 'text'] | |
| ) | |
| reduce(DataFrame.union, [df1, df2, df3]).show() | |
| # +---+------+ | |
| # | id| text| | |
| # +---+------+ | |
| # | 1| The| | |
| # | 2| Quick| | |
| # | 3| Brown| | |
| # | 4| Fox| | |
| # | 5|Jumped| | |
| # | 6| Over| | |
| # +---+------+ | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment