TomLous · May 9, 2017 09:35
diff --git a/JobApplication.scala b/JobApplication.scala
 import org.apache.spark.sql.{Row, SparkSession}
 import org.apache.spark.sql.functions._

 val spark = SparkSession.builder().master("local").appName("jobposting").getOrCreate()
 import spark.implicits._

 val f1 = udf{(x:Double, y:Row) => y.getInt(0) + 1}
 val f2 = udf{(x:Int, y:Row) => (y.getInt(1) / (x+3)) - y.getInt(0)}
 val f3 = udf{(x:Int)=> x.toChar.toString}

 val t = List((104,630,189), (99,1485,756), (113,904,315), (111,339,0), (31,726,1197), (82,2040,1323), (114,1624,693), (113,2875,1386), (116,2261,1008), (116,1508,630), (96,873,378), (114,1495,630), (110,1356,567), (104,1908,945), (64,192,0), (107,1650,756), (100,824,315), (31,320,441), (96,2425,1386), (98,2020,1071), (31,217,252), (120,480,63), (110,1887,882), (107,1430,630), (31,256,315), (104,1664,819), (57,627,504), (107,2507,1260), (96,2277,1260), (96,1455,756), (61,1088,882), (120,492,63), (59,1298,1197), (115,1755,756), (102,630,189), (31,608,1008), (107,642,189), (98,1666,882), (100,2040,1071), (31,792,1323), (109,1760,819), (31,384,567), (72,1554,1134), (110,1243,504), (97,1940,1071), (105,2120,1071), (50,1122,1197), (106,2725,1386), (108,1296,567), (115,2415,1134), (110,1540,693), (31,155,126), (111,2775,1386), (96,490,126), (99,808,315), (109,666,189), (111,336,0), (31,352,504), (98,2254,1260), (31,744,1323), (104,535,126), (107,330,0), (63,924,693), (31,672,1134), (114,2204,1008), (112,1824,819), (107,990,378), (31,558,945), (96,2231,1260), (45,768,819), (37,912,1323), (103,1957,1008), (83,935,504), (115,1170,441), (31,128,63), (82,1870,1197), (108,545,126), (116,833,252), (115,1035,378), (116,1638,693), (114,1140,441), (110,784,252), (115,2124,945), (120,847,252), (104,954,378), (118,2142,945), (75,924,567), (110,1130,441), (31,714,1134), (59,793,630), (108,1870,882), (97,396,63))

 spark.createDataFrame(t).toDF("a","b","c").groupBy('c).agg(collect_list(struct('a, 'b)).as("d"))
  .select(('c/lit((2 << 5)-1)).alias("e"),explode('d).as("f")).select('e, f1('e,'f).as("g"),f2('e,'f).as("h"), 'f)
  .sort('e.desc, 'h.desc).select(f3('g).as("i")).agg(concat_ws("",collect_list('k)).as("j"))
  .show(false)
	import org.apache.spark.sql.{Row, SparkSession}
	import org.apache.spark.sql.functions._

	val spark = SparkSession.builder().master("local").appName("jobposting").getOrCreate()
	import spark.implicits._

	val f1 = udf{(x:Double, y:Row) => y.getInt(0) + 1}
	val f2 = udf{(x:Int, y:Row) => (y.getInt(1) / (x+3)) - y.getInt(0)}
	val f3 = udf{(x:Int)=> x.toChar.toString}

	val t = List((104,630,189), (99,1485,756), (113,904,315), (111,339,0), (31,726,1197), (82,2040,1323), (114,1624,693), (113,2875,1386), (116,2261,1008), (116,1508,630), (96,873,378), (114,1495,630), (110,1356,567), (104,1908,945), (64,192,0), (107,1650,756), (100,824,315), (31,320,441), (96,2425,1386), (98,2020,1071), (31,217,252), (120,480,63), (110,1887,882), (107,1430,630), (31,256,315), (104,1664,819), (57,627,504), (107,2507,1260), (96,2277,1260), (96,1455,756), (61,1088,882), (120,492,63), (59,1298,1197), (115,1755,756), (102,630,189), (31,608,1008), (107,642,189), (98,1666,882), (100,2040,1071), (31,792,1323), (109,1760,819), (31,384,567), (72,1554,1134), (110,1243,504), (97,1940,1071), (105,2120,1071), (50,1122,1197), (106,2725,1386), (108,1296,567), (115,2415,1134), (110,1540,693), (31,155,126), (111,2775,1386), (96,490,126), (99,808,315), (109,666,189), (111,336,0), (31,352,504), (98,2254,1260), (31,744,1323), (104,535,126), (107,330,0), (63,924,693), (31,672,1134), (114,2204,1008), (112,1824,819), (107,990,378), (31,558,945), (96,2231,1260), (45,768,819), (37,912,1323), (103,1957,1008), (83,935,504), (115,1170,441), (31,128,63), (82,1870,1197), (108,545,126), (116,833,252), (115,1035,378), (116,1638,693), (114,1140,441), (110,784,252), (115,2124,945), (120,847,252), (104,954,378), (118,2142,945), (75,924,567), (110,1130,441), (31,714,1134), (59,793,630), (108,1870,882), (97,396,63))

	spark.createDataFrame(t).toDF("a","b","c").groupBy('c).agg(collect_list(struct('a, 'b)).as("d"))
	.select(('c/lit((2 << 5)-1)).alias("e"),explode('d).as("f")).select('e, f1('e,'f).as("g"),f2('e,'f).as("h"), 'f)
	.sort('e.desc, 'h.desc).select(f3('g).as("i")).agg(concat_ws("",collect_list('k)).as("j"))
	.show(false)
No results found