Last active
May 22, 2017 06:59
-
-
Save dgadiraju/2967a86986c828db16ee1973f234e5e7 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| val path = "/Users/itversity/Research/data/retail_db" or val path = "/public/retail_db" | |
| val orders = sc.textFile(path + "/orders") | |
| // orders sorted by status | |
| orders. | |
| map(order => { | |
| val o = order.split(",") | |
| (o(3), order) | |
| }). | |
| sortByKey(). | |
| map(_._2). | |
| take(100). | |
| foreach(println) | |
| // orders sorted by status and date in descending order | |
| orders. | |
| map(order => { | |
| val o = order.split(",") | |
| ((o(3), o(1)), order) | |
| }). | |
| sortByKey(false). | |
| map(_._2). | |
| take(100). | |
| foreach(println) | |
| // let us get top 5 products in each category from products | |
| val products = sc.textFile(path + "/products") | |
| val productsGroupByCategory = products. | |
| filter(product => product.split(",")(4) != ""). | |
| map(product => { | |
| val p = product.split(",") | |
| (p(1).toInt, product) | |
| }). | |
| groupByKey | |
| productsGroupByCategory. | |
| sortByKey(). | |
| flatMap(rec => { | |
| rec._2.toList.sortBy(r => -r.split(",")(4).toFloat).take(5) | |
| }). | |
| take(100). | |
| foreach(println) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment