/* Part 0: Initialise */
def randomInt = scala.util.Random.nextInt(10000)
val dataframe = sc.parallelize(
Seq.fill(100000){(randomInt,randomInt,randomInt)}
).toDF("cID", "c2", "c3")
val anotherDataframe = sc.parallelize(
Seq.fill(100000){(randomInt,randomInt,randomInt)}
).toDF("c1", "customerID", "contactID")
/* Part 1: Using an Or in a Join clause */
val resultDF = dataframe.join(anotherDataframe, $"cID" === $"customerID" || $"cID" === $"contactID", "left")
resultDF.explain()
/* Part 2: Using an Union instead */
val resultPart1 = dataframe.join(anotherDataframe, $"cID" === $"customerID", "left")
val resultPart2 = dataframe.join(anotherDataframe, $"cID" === $"contactID", "left")
val resultDF = resultPart1.union(resultPart2)
resultDF.explain()
Created
October 11, 2019 16:58
-
-
Save sujithjay/097b80f7799d5d8389d1df650df377eb to your computer and use it in GitHub Desktop.
Union over Or
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment