pphetra · March 7, 2020 04:58
diff --git a/ex2.py b/ex2.py
 from pyspark.sql.functions import max

 covid060320.select(max("Deaths")).take(1)
diff --git a/ex2.scala b/ex2.scala
 covid060320.createOrReplaceTempView("covit06032020")

 val sqlWay = spark.sql("""
 SELECT Country/Region, count(1)
 FROM covit06032020
 GROUP BY Country/Region
 """)

 val dataFrameWay = covid060320
  .groupBy("Country/Region")
  .count()

 spark.sql("SELECT max(Deaths) from covit06032020").take(1)

 import org.apache.spark.sql.functions.max

 covid060320.select(max("Deaths")).take(1)

 // in Scala
 val maxSql = spark.sql("""
 SELECT Country/Region, sum(count) as country
 FROM covid060320
 GROUP BY Country/Region
 ORDER BY sum(count) DESC
 LIMIT 5
 """)

 maxSql.show()

 // in Scala
 import org.apache.spark.sql.functions.desc

 covid060320
  .groupBy("Country/Region")
  .sum("count")
  .withColumnRenamed("sum(count)", "country")
  .sort(desc("country"))
  .limit(5)
  .show()
	from pyspark.sql.functions import max

	covid060320.select(max("Deaths")).take(1)
	covid060320.createOrReplaceTempView("covit06032020")

	val sqlWay = spark.sql("""
	SELECT Country/Region, count(1)
	FROM covit06032020
	GROUP BY Country/Region
	""")

	val dataFrameWay = covid060320
	.groupBy("Country/Region")
	.count()

	spark.sql("SELECT max(Deaths) from covit06032020").take(1)

	import org.apache.spark.sql.functions.max

	covid060320.select(max("Deaths")).take(1)

	// in Scala
	val maxSql = spark.sql("""
	SELECT Country/Region, sum(count) as country
	FROM covid060320
	GROUP BY Country/Region
	ORDER BY sum(count) DESC
	LIMIT 5
	""")

	maxSql.show()

	// in Scala
	import org.apache.spark.sql.functions.desc

	covid060320
	.groupBy("Country/Region")
	.sum("count")
	.withColumnRenamed("sum(count)", "country")
	.sort(desc("country"))
	.limit(5)
	.show()