Skip to content

Instantly share code, notes, and snippets.

View natbusa's full-sized avatar

Nate Busa natbusa

View GitHub Profile
@natbusa
natbusa / all.js
Last active August 29, 2015 14:02
Get all you can from the browser
var all = {
//screen
'screen.width' : screen.width,
'screen.height' : screen.height,
'screen.availWidth' : screen.availWidth,
'screen.availHeight' : screen.availHeight,
'screen.colorDepth' : screen.colorDepth,
'screen.pixelDepth' : screen.pixelDepth,
//location
'location.href' : location.href,
@natbusa
natbusa / flask.api.py
Last active August 29, 2015 14:02
wikipedia live search demo
@app.route('/word/<keyword>')
def fetch_word(keyword):
db = get_cassandra()
pages = []
results = db.fetchWordResults(keyword)
for hit in results:
pages.append(db.fetchPageDetails(hit["url"]))
return Response(json.dumps(pages), status=200,
@natbusa
natbusa / actor.scala
Created May 3, 2014 22:43
Simple example of web api with statistical RPC in R and persistance in cassandra
class AnalyticsActor extends Actor {
def actorRefFactory = context
val dataActor = actorRefFactory.actorOf(Props[NoSqlActor], "cassandra-client")
val statActor = actorRefFactory.actorOf(Props[StatActor], "statistical-engine")
def receive = {
case (a: String, c: String, ctx: RequestContext) =>
val f:Future[Result] =
@natbusa
natbusa / main.scala
Last active August 29, 2015 13:57
Word count inscalding
class WordCount(args : Args) extends Job(args) {
TextLine(args("input"))
.read
.flatMap('line -> 'word){ line : String => line.split("\\s")}
.groupBy('word){group => group.size}
.write(Tsv(args("output")))
}
@natbusa
natbusa / wc.pig
Last active August 29, 2015 13:57
Word count: in pig
A = load 'wordcount-input/lorem.txt' as (line:chararray);
B = foreach A generate FLATTEN(TOKENIZE(line)) as word;
C = foreach B generate LOWER(REPLACE(word,'\\W+','')) as word;
D = group C by word;
E = foreach D generate group, COUNT(C);
store E into 'wordcount-pig-output';
@natbusa
natbusa / beam2hdfs.sh
Created March 24, 2014 13:08
Beam up the lorem.txt to hdfs
#!/bin/sh
#check if the directory exists on hdfs
$HADOOP_HOME/bin/hadoop fs -ls wordcount-input
if [ $? -ne 0 ]
then $HADOOP_HOME/bin/hadoop fs -mkdir wordcount-input/
fi
#check if the lorem.txt exists on hdfs
$HADOOP_HOME/bin/hadoop fs -ls wordcount-input/lorem.txt
@natbusa
natbusa / wc.cascading.java
Created March 22, 2014 11:00
Word count in Haddop cascading
class ScrubFunction extends BaseOperation implements Function
{
public ScrubFunction( Fields fieldDeclaration )
{
super( 1, fieldDeclaration );
}
public void operate( FlowProcess flowProcess, FunctionCall functionCall )
{
TupleEntry argument = functionCall.getArguments();
@natbusa
natbusa / mapper.py
Last active November 2, 2024 11:12
word count: mapper and reducer in python using hadoop streaming
#!/usr/bin/env python
import sys
# input comes from STDIN (standard input)
for line in sys.stdin:
#clean and split in words
linechars = [c for c in line.lower() if c.isalpha() or c==' ']
words = ''.join(linechars).strip().split()
#emit the key-balue pairs
@natbusa
natbusa / wc.HiveQL.sql
Last active August 29, 2015 13:57
word count: hadoop hive using later views and string operators
-- Hive queries for Word Count
drop table if exists doc;
-- 1) create table to load whole file
create table doc(
text string
) row format delimited fields terminated by '\n' stored as textfile;
--2) loads plain text file
--if file is .csv then in replace '\n' by ',' in step no 1 (creation of doc table)
@natbusa
natbusa / wc.mapreduce.java
Last active August 29, 2015 13:57
word count in hadoop: ol' school map reduce in plain java and mpreduce core libraries, in 59 lines of java
package com.natalinobusa;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;