Created
August 10, 2012 11:51
-
-
Save remeniuk/3313752 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// extract user ID from hand history record | |
val userId = (playerHistory: PlayerHandHistory) => | |
new Text(playerHistory.getUserId.toString) | |
// Builds basic dixtionary (enumeration, in fact) of all the players, participated in the selected subset of hand | |
// history records | |
class Builder(args: Args) extends Job(args) { | |
// input tap is an HTable with hand history entries: hand history id -> hand history record, serialized with ProtoBuf | |
val input = new HBaseSource("hand", args("hbasehost"), 'handId, Array("d"), Array('blob)) | |
// output tap - plain text file with player IDs | |
val output = TextLine(args("output")) | |
input | |
.read | |
.flatMap('blob -> 'player) { | |
// every hand history record contains the list of players, participated in the hand | |
blob: Array[Byte] => // at the first stage, we simply extract the list of IDs, and add it to the flat list | |
HandHistory.parseFrom(blob).getPlayerList.map(userId) | |
} | |
.unique('player) // remove duplicate user IDs | |
.project('player) // leave only 'player column from the tuple | |
.write(output) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment