Created
June 8, 2015 16:07
-
-
Save smartkiwi/7ba24daae41f99d3d6d1 to your computer and use it in GitHub Desktop.
scala spark example
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//There is a table with two columns books and readers of these books, where books and readers are book and reader IDs, respectively. I need to remove from this table readers who read more then 10 books. | |
import java.util.Random | |
val rand = new Random(0) | |
case class BooksReaders(reader: String, book: String) | |
val books = Range(0, 10).map("book"+ _) | |
val readers = Range(0, 10).map("reader"+ _) | |
val br100 = for (_ <- Range(0, 100)) yield BooksReaders(readers(rand.nextInt(readers.length)), books(rand.nextInt(books.length))) | |
val br10 = for (_ <- Range(0, 10)) yield BooksReaders(readers(rand.nextInt(readers.length)), books(rand.nextInt(books.length))) | |
val data = sc.parallelize(br10) | |
val readersWithLotsOfBooksRDD = data.map(r => (r.reader, 1)).reduceByKey((x, y) => x + y).filter{ case (_, x) => x > 1 } | |
val readersWithBooksRDD = data.map( r => (r.reader, r.book)) | |
readersWithLotsOfBooksRDD.collect | |
//Array[(String, Int)] = Array((reader9,3), (reader5,2)) | |
readersWithBooksRDD.collect | |
//Array[(String, String)] = Array((reader5,book2), (reader9,book8), (reader8,book0), (reader0,book6), (reader3,book2), (reader4,book4), (reader2,book8), (reader9,book2), (reader9,book4), (reader5,book0)) | |
readersWithBooksRDD.subtractByKey(readersWithLotsOfBooksRDD).collect | |
//Array((reader3,book2), (reader0,book6), (reader8,book0), (reader4,book4), (reader2,book8)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment