This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
export HADOOP_HOME=/home/hadoop | |
export HCAT_HOME=/usr/local/hcat | |
export PIG_HOME=/home/hadoop/pig-0.10.0 | |
export HIVE_HOME=/home/hadoop/hive-0.9.0 | |
export FORREST_HOME=/home/hadoop/apache-forrest-0.9 | |
export PIG_CLASSPATH=$HCAT_HOME/share/hcatalog/hcatalog-0.4.0.jar:$HIVE_HOME/lib/hive-metastore-0.9.0.jar: | |
$HIVE_HOME/lib/libthrift-0.7.0.jar:$HIVE_HOME/lib/hive-exec-0.9.0.jar:$HIVE_HOME/lib/libfb303-0.7.0.jar: | |
$HIVE_HOME/lib/jdo2-api-2.3-ec.jar:$HIVE_HOME/conf:$HADOOP_HOME/conf:$HIVE_HOME/lib/slf4j-api-1.6.1.jar | |
export PIG_OPTS=-Dhive.metastore.uris=thrift://localhost:10001 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
russell-jurneys-macbook-pro:pig rjueny$ curl - POST "http://localhost:9200/email/email/_search?pretty=true" -d '' | |
{ | |
"query" : { "*" }, | |
"facets" : { | |
"tags" : { "terms" : {"email.froms.from.address" : "[email protected]"} } | |
} | |
} | |
' | |
{ |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[{"total":16,"sent_hour":"08"},{"total":16,"sent_hour":"09"},{"total":24,"sent_hour":"10"},{"total":14,"sent_hour":"11"},{"total":6,"sent_hour":"12"},{"total":22,"sent_hour":"13"},{"total":32,"sent_hour":"14"},{"total":14,"sent_hour":"15"},{"total":10,"sent_hour":"16"},{"total":10,"sent_hour":"17"},{"total":4,"sent_hour":"18"},{"total":8,"sent_hour":"20"},{"total":6,"sent_hour":"21"},{"total":20,"sent_hour":"22"},{"total":2,"sent_hour":"23"}] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def fill_in_blanks(in_data): | |
out_data = list() | |
hours = [ '%02d' % i for i in range(24) ] | |
for hour in hours: | |
entry = [x for x in in_data if x['sent_hour'] == hour] | |
if entry: | |
out_data.append(entry[0]) | |
else: | |
out_data.append({'sent_hour': hour, 'total': 0}) | |
return out_data |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Connect to the MongoDB 'enron' database and its 'emails' collection | |
require("mongodb"); | |
var Db = require("mongodb").Db, | |
Server = require("mongodb").Server; | |
var db = new Db("enron", new Server("127.0.0.1", 27017, {})); | |
db.open(function(err, n_db) { db = n_db }); | |
var collection = db.collection("emails"); | |
// Setup a simple API server returning JSON | |
var http = require('http'); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
grunt> pages = load 'data/textData-00000' using SequenceFileLoader() as (key:chararray, value:chararray); | |
grunt> describe pages; | |
pages: {key: chararray,value: chararray} | |
grunt> ILLUSTRATE pages; | |
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
grunt> describe emails | |
emails: {message_id: chararray,date: chararray,from: (address: chararray,name: chararray),subject: chararray,body: chararray,tos: {ARRAY_ELEM: (address: chararray,name: chararray)},ccs: {ARRAY_ELEM: (address: chararray,name: chararray)},bccs: {ARRAY_ELEM: (address: chararray,name: chararray)}} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Enable /emails and /emails/ to serve the last 20 emaildb in our inbox unless otherwise specified | |
default_offsets={'offset1': 0, 'offset2': 0 + config.EMAIL_RANGE} | |
@app.route('/', defaults=default_offsets) | |
@app.route('/emails', defaults=default_offsets) | |
@app.route('/emails/', defaults=default_offsets) | |
@app.route("/emails/<int:offset1>/<int:offset2>") | |
def list_emaildb(offset1, offset2): | |
offset1 = int(offset1) | |
offset2 = int(offset2) | |
emails = emaildb.find()[offset1:offset2] # Uses a MongoDB cursor |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
characters = load 'example.xml' using XMLLoader('character'); | |
describe characters | |
{properties:map[], name:chararray, born:datetime, qualification:chararray} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* Avro uses json-simple, and is in piggybank until Pig 0.12, where AvroStorage and TrevniStorage are builtins */ | |
REGISTER /me/Software/pig/build/ivy/lib/Pig/avro-1.5.3.jar | |
REGISTER /me/Software/pig/build/ivy/lib/Pig/json-simple-1.1.jar | |
REGISTER /me/Software/pig/contrib/piggybank/java/piggybank.jar | |
DEFINE AvroStorage org.apache.pig.piggybank.storage.avro.AvroStorage(); | |
REGISTER /me/Software/varaha/lib/*.jar /* */ | |
REGISTER /me/Software/varaha/target/varaha-1.0-SNAPSHOT.jar |