Skip to content

Instantly share code, notes, and snippets.

@NeerajBhadani
NeerajBhadani / arr_import_lib.scala
Last active May 25, 2020 09:39
Import Required Libraries
import org.apache.spark.sql.functions._
@NeerajBhadani
NeerajBhadani / arr_create_dataframe.scala
Last active May 25, 2020 09:40
Create Spark DataFrame
val initial_df = Seq(
("x", 4, 1),
("x", 6, 2),
("z", 7, 3),
("a", 3, 4),
("z", 5, 2),
("x", 7, 3),
("x", 9, 7),
("z", 1, 8),
("z", 4, 9),
full_df.printSchema()
val arr_contains_df = df.withColumn("result", array_contains($"array_col2", 3))
arr_contains_df.show()
val arr_distinct_df = df.withColumn("result", array_distinct($"array_col2"))
arr_distinct_df.show()
val arr_except_df = full_df.withColumn("result", array_except($"array_col1", $"array_col2"))
arr_except_df.show()
val arr_intersect_df = full_df
.withColumn("result", array_intersect($"array_col1", $"array_col2"))
arr_intersect_df.show()
val arr_join_df = df.withColumn("result", array_join($"array_col2", ","))
arr_join_df.show()
val arr_max_df = df.withColumn("result", array_max($"array_col2"))
arr_max_df.show()
val arr_min_df = df.withColumn("result", array_min($"array_col2"))
arr_min_df.show()