Skip to content

Instantly share code, notes, and snippets.

View natbusa's full-sized avatar

Nate Busa natbusa

View GitHub Profile
@natbusa
natbusa / wc.mapreduce.sh
Last active August 29, 2015 13:57
word count: mapreduce in java
#beam it up to the hdfs
hadoop fs -mkdir wordcount-input/
hadoop fs -copyFromLocal lorem.txt wordcount-input/
# Maven project in https://github.com/natalinobusa/wordcount/hadoop-mapreduce-java
$HADOOP_HOME/bin/hadoop jar target/wordcount-mapreduce-java-1.0-SNAPSHOT.jar \
com.natalinobusa.WordCount wordcount-input wordcount-mapreduce-java-output
$HADOOP_HOME/bin/hadoop fs -cat wordcount-mapreduce-java-output
@natbusa
natbusa / wc.hadoop.examples.sh
Last active August 29, 2015 13:57
word count: hadoop example wordcount
$HADOOP_HOME/bin/hadoop \
jar $HADOOP_HOME/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.3.0.jar \
wordcount wordcount-input wordcount-output
$HADOOP_HOME/bin/hadoop fs -cat wordcount-output
@natbusa
natbusa / wc.R
Last active August 29, 2015 13:57
Word count in R
#read in and clean
x = scan("lorem.txt", what="", sep=" ")
x.clean = gsub("\\W+","",tolower(x))
#word count
wc = table(x.clean)
#output to stdout
print(as.data.frame(wc), row.names=rep("", nrow(wc)))
@natbusa
natbusa / wc.py
Created March 19, 2014 14:40
word count: python
#readin and filter
txt = [c for c in open('lorem.txt').read().lower() if c.isalpha() or c==' ']
#groupBy in a dictionary
wc = dict()
for w in ''.join(txt).split():
wc[w] = wc.setdefault(w, 0) + 1
#output
for k,v in wc.iteritems():
@natbusa
natbusa / wc.scala
Last active August 29, 2015 13:57
word count: scala
//read in
val text = scala.io.Source.fromFile("lorem.txt").mkString
val wc = text.
toLowerCase.
split("\\W+").
groupBy(identity).
mapValues( _.length )
//writeout
@natbusa
natbusa / wc.bash
Last active August 29, 2015 13:57
word count: linux
cat lorem.txt | tr [:upper:] [:lower:] | sed -E 's/[^[:alpha:]]+/\n/g' | sort | uniq -c | | awk '{ print($2,"\t",$1) }'
< HTTP/1.1 200 OK
< Server: spray-can/1.2-RC2
< Date: Mon, 10 Feb 2014 19:36:43 GMT
< Content-Type: application/json; charset=UTF-8
< Content-Length: 247
<
{
"main": {
"eggs": {
"num": 2
@natbusa
natbusa / request.curl
Created February 10, 2014 21:58
request
> GET /api/v1/breakfast?eggs=2&strips=4&slices=1&juices=1&coffee=2 HTTP/1.1
> User-Agent: curl/7.29.0
> Host: localhost:8888
> Accept: */*
> Content-Type: application/json
>
@natbusa
natbusa / main.scala
Created February 10, 2014 21:45
main
object Boot extends App {
implicit val system = ActorSystem()
// create and start our service actor
val service = system.actorOf(Props[BreakfastApiService], "breakfast-api-service")
// start a new HTTP server with our service actor as the handler
IO(Http) ! Http.Bind(service, "localhost", port = 8888)
}
val serviceRoute = {
pathPrefix("api" / "v1" / "breakfast") {
get {
parameters('eggs.as[Int], 'strips.as[Int], 'slices.as[Int], 'juices.as[Int], 'coffee.as[Int]) {
(friedeggs, baconstrips, breadslices, orangejuices, coffeemugs) =>
complete {
Breakfast(
MainDish(