Skip to content

Instantly share code, notes, and snippets.

@dgadiraju
Created March 14, 2017 05:54
Show Gist options
  • Save dgadiraju/93d28037d021beb64afe20433470b7ea to your computer and use it in GitHub Desktop.
Save dgadiraju/93d28037d021beb64afe20433470b7ea to your computer and use it in GitHub Desktop.
def topNProducts(rec, topN):
x = [ ]
x = list(sorted(rec, key=lambda k: float(k.split(",")[4]), reverse=True))
import itertools
return (y for y in list(itertools.islice(x, 0, topN)))
products = sc.textFile("/public/retail_db/products")
productsFiltered = products.filter(lambda rec: rec.split(",")[4] != "")
for i in productsFiltered.\
map(lambda rec: (int(rec.split(",")[1]), rec)).\
groupByKey().\
flatMap(lambda rec: topNProducts(rec[1], 5)).\
collect():
print(i)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment