Last active
March 7, 2020 04:58
-
-
Save pphetra/7bb3ba7a4407c6cda19ac27bbe9305c6 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pyspark.sql.functions import max | |
covid060320.select(max("Deaths")).take(1) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
covid060320.createOrReplaceTempView("covit06032020") | |
val sqlWay = spark.sql(""" | |
SELECT Country/Region, count(1) | |
FROM covit06032020 | |
GROUP BY Country/Region | |
""") | |
val dataFrameWay = covid060320 | |
.groupBy("Country/Region") | |
.count() | |
spark.sql("SELECT max(Deaths) from covit06032020").take(1) | |
import org.apache.spark.sql.functions.max | |
covid060320.select(max("Deaths")).take(1) | |
// in Scala | |
val maxSql = spark.sql(""" | |
SELECT Country/Region, sum(count) as country | |
FROM covid060320 | |
GROUP BY Country/Region | |
ORDER BY sum(count) DESC | |
LIMIT 5 | |
""") | |
maxSql.show() | |
// in Scala | |
import org.apache.spark.sql.functions.desc | |
covid060320 | |
.groupBy("Country/Region") | |
.sum("count") | |
.withColumnRenamed("sum(count)", "country") | |
.sort(desc("country")) | |
.limit(5) | |
.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment