Skip to content

Instantly share code, notes, and snippets.

@softberries
Created October 23, 2017 12:12
Show Gist options
  • Save softberries/5a8f631fcc86a3deb53e8449e209cf0d to your computer and use it in GitHub Desktop.
Save softberries/5a8f631fcc86a3deb53e8449e209cf0d to your computer and use it in GitHub Desktop.
Scalding Job Definition
class AgeCounterJob(args: Args) extends Job(args) {
val lines: TypedPipe[String] = TypedPipe.from(TextLine(args("input")))
val tokens: TypedPipe[Token] = lines.flatMap(f => xmlToToken(f))
val byAge = tokens.groupBy(_.age)
byAge.size
.write(TypedTsv[(Int, Long)](args("output")))
def xmlToToken(content: String): Option[Token] = {
val namePattern = "DisplayName=\"(.*)\"".r
val agePattern = "Age=\"([0-9]+)\"".r
for {
name <- namePattern.findFirstMatchIn(content).map(m => m.group(1))
age <- agePattern.findFirstMatchIn(content).map(m => m.group(1))
} yield Token(name, age.toInt)
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment