Skip to content

Instantly share code, notes, and snippets.

@thanoojgithub
Created January 27, 2021 07:29
Show Gist options
  • Save thanoojgithub/6d9d9b94df3b3afa0f1458edd1dcd582 to your computer and use it in GitHub Desktop.
Save thanoojgithub/6d9d9b94df3b3afa0f1458edd1dcd582 to your computer and use it in GitHub Desktop.
SparkByExamples - Spark By Examples
import org.apache.spark.sql.expressions.Window
import org.apache.spark.sql.functions._
val data = sc.parallelize(Seq((101,"ram","12-01-2021",10001,120.00),(102,"sam","12-01-2021",10002,130.00),(101,"ram","12-01-2021",10003,140.00),(103,"jam","12-01-2021",10004,150.00),(101,"ram","12-01-2021",10005,130.00),(103,"jam","12-01-2021",10006,120.00),(102,"sam","12-01-2021",10007,130.00)))
val dataDF = data.toDF("id","name","date","transid","amount")
val windowSpec = Window.partitionBy("id").orderBy('transid desc)
val dataDF1 = dataDF.withColumn("row_number",rank().over(windowSpec))
dataDF.printSchema
dataDF.show()
dataDF1.printSchema
dataDF1.show()
dataDF1.filter('row_number === 1).show()
dataDF1.createOrReplaceGlobalTempView("transdetails")
val transdetailsSQL = spark.sql("SELECT *,RANK() OVER (PARTITION BY id ORDER BY transid DESC) AS rank FROM global_temp.transdetails")
transdetailsSQL.show()
val transdetailsSQL1 = spark.sql("SELECT * FROM (SELECT *,RANK() OVER (PARTITION BY id ORDER BY transid DESC) AS rank FROM global_temp.transdetails a) where rank=1 ")
transdetailsSQL1.show()
-----------------------------------------------------------------------------------------------------------------------------
root
|-- id: integer (nullable = false)
|-- name: string (nullable = true)
|-- date: string (nullable = true)
|-- transid: integer (nullable = false)
|-- amount: double (nullable = false)
+---+----+----------+-------+------+
| id|name| date|transid|amount|
+---+----+----------+-------+------+
|101| ram|12-01-2021| 10001| 120.0|
|102| sam|12-01-2021| 10002| 130.0|
|101| ram|12-01-2021| 10003| 140.0|
|103| jam|12-01-2021| 10004| 150.0|
|101| ram|12-01-2021| 10005| 130.0|
|103| jam|12-01-2021| 10006| 120.0|
|102| sam|12-01-2021| 10007| 130.0|
+---+----+----------+-------+------+
root
|-- id: integer (nullable = false)
|-- name: string (nullable = true)
|-- date: string (nullable = true)
|-- transid: integer (nullable = false)
|-- amount: double (nullable = false)
|-- row_number: integer (nullable = true)
+---+----+----------+-------+------+----------+
| id|name| date|transid|amount|row_number|
+---+----+----------+-------+------+----------+
|101| ram|12-01-2021| 10005| 130.0| 1|
|101| ram|12-01-2021| 10003| 140.0| 2|
|101| ram|12-01-2021| 10001| 120.0| 3|
|102| sam|12-01-2021| 10007| 130.0| 1|
|102| sam|12-01-2021| 10002| 130.0| 2|
|103| jam|12-01-2021| 10006| 120.0| 1|
|103| jam|12-01-2021| 10004| 150.0| 2|
+---+----+----------+-------+------+----------+
+---+----+----------+-------+------+----------+
| id|name| date|transid|amount|row_number|
+---+----+----------+-------+------+----------+
|101| ram|12-01-2021| 10005| 130.0| 1|
|102| sam|12-01-2021| 10007| 130.0| 1|
|103| jam|12-01-2021| 10006| 120.0| 1|
+---+----+----------+-------+------+----------+
+---+----+----------+-------+------+----------+----+
| id|name| date|transid|amount|row_number|rank|
+---+----+----------+-------+------+----------+----+
|101| ram|12-01-2021| 10005| 130.0| 1| 1|
|101| ram|12-01-2021| 10003| 140.0| 2| 2|
|101| ram|12-01-2021| 10001| 120.0| 3| 3|
|102| sam|12-01-2021| 10007| 130.0| 1| 1|
|102| sam|12-01-2021| 10002| 130.0| 2| 2|
|103| jam|12-01-2021| 10006| 120.0| 1| 1|
|103| jam|12-01-2021| 10004| 150.0| 2| 2|
+---+----+----------+-------+------+----------+----+
+---+----+----------+-------+------+----------+----+
| id|name| date|transid|amount|row_number|rank|
+---+----+----------+-------+------+----------+----+
|101| ram|12-01-2021| 10005| 130.0| 1| 1|
|102| sam|12-01-2021| 10007| 130.0| 1| 1|
|103| jam|12-01-2021| 10006| 120.0| 1| 1|
+---+----+----------+-------+------+----------+----+
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment