Created
May 17, 2012 06:04
-
-
Save rjurney/2716863 to your computer and use it in GitHub Desktop.
The Beauty of ILLUSTRATE in Pig when working with Avro documents and MongoDB
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* Piggybank */ | |
register /me/pig/contrib/piggybank/java/piggybank.jar | |
/* Avro */ | |
register /me/pig/build/ivy/lib/Pig/avro-1.5.3.jar | |
register /me/pig/build/ivy/lib/Pig/json-simple-1.1.jar | |
register /me/pig/build/ivy/lib/Pig/jackson-core-asl-1.7.3.jar | |
register /me/pig/build/ivy/lib/Pig/jackson-mapper-asl-1.7.3.jar | |
register /me/pig/build/ivy/lib/Pig/joda-time-1.6.jar | |
define AvroStorage org.apache.pig.piggybank.storage.avro.AvroStorage(); | |
/* MongoDB */ | |
register /me/mongo-hadoop/mongo-2.7.2.jar | |
register /me/mongo-hadoop/core/target/mongo-hadoop-core-1.0.0.jar | |
register /me/mongo-hadoop/pig/target/mongo-hadoop-pig-1.0.0.jar | |
define MongoStorage com.mongodb.hadoop.pig.MongoStorage(); | |
/* Start here in Pig: | |
enrons: { | |
message_id: chararray, | |
datetime: chararray, | |
from_address: chararray, | |
from_name: chararray, | |
subject: chararray, | |
body: chararray, | |
tos: {ARRAY_ELEM: (address: chararray,name: chararray)}, | |
ccs: {ARRAY_ELEM: (address: chararray,name: chararray)}, | |
bccs: {ARRAY_ELEM: (address: chararray,name: chararray)} | |
} | |
Get here in MongoDB: | |
{ | |
"t" : ISODate("2012-05-17T08:06:10.512Z"), | |
"d" : { | |
"value" : 12.595975877949968 | |
}, | |
"_id" : ObjectId("4fb4793c66bdfe72cf000002") | |
} | |
To get our data in Cube. */ | |
enrons = load '/enron/emails.avro' using AvroStorage(); | |
metrics = foreach enrons generate (int)SIZE(body) as value:int, CONCAT(CONCAT('ISODate(', datetime), ')') as t:chararray; | |
metrics = foreach metrics generate TOBAG(value) as d:bag{tuple(value:int)}, t; | |
store metrics into 'mongodb://localhost/cube_development.pig_events' using MongoStorage(); | |
/* | |
grunt> describe metrics | |
metrics: {d: {(value: int)},t: chararray} | |
-- And MongoDB will ad our _id. Wallah! | |
*/ | |
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | |
| enrons | message_id:chararray | datetime:chararray | from_address:chararray | from_name:chararray | subject:chararray | body:chararray | tos:bag{ARRAY_ELEM:tuple(address:chararray,name:chararray)} | ccs:bag{ARRAY_ELEM:tuple(address:chararray,name:chararray)} | bccs:bag{ARRAY_ELEM:tuple(address:chararray,name:chararray)} | | |
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | |
| | <14873615.1075853871940.JavaMail.evans@thyme> | 2000-12-19T02:19:00.000Z | [email protected] | Chris Germany | Re: Vacation | 18ft kenner fishing boat with a 120johnson (thats a motor not something \nnaughty)\n\n\n\n\nDave Scott@EES\n12/19/2000 10:16 AM\nTo: Chris Germany/HOU/ECT@ECT\ncc: \nSubject: Re: Vacation \n\nI got it. But the crayon sort of smeared and it was hard to read ... what \nwas the item listed after a scooter ?\n\n\n\nChris Germany@ECT\n12/19/2000 10:14 AM\nTo: Dave Scott/HOU/EES@EES\ncc: \nSubject: Re: Vacation \n\nI will. I hope you got my Christmas list.\n\n\n\nDave Scott@EES\n12/19/2000 10:09 AM\nTo: Chris Germany/HOU/ECT@ECT\ncc: \nSubject: Re: Vacation \n\nHave a wonderful time\n\n\n\nChris Germany@ECT\n12/19/2000 08:43 AM\nTo: Jeffrey Porter/HOU/EES@EES, Dave Scott/HOU/EES@EES\ncc: Tricia Spence/HOU/ECT@ECT, Dick Jenkins/HOU/ECT@ECT \nSubject: Vacation\n\nI will be out this afternoon. If you need to do any deals call Dick Jenkins.\n\nThanks\n\n\n\n\n\n\n\n\n\n\n | {([email protected], )} | {} | {} | | |
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | |
------------------------------------------------------------------- | |
| metrics | value:int | t:chararray | | |
------------------------------------------------------------------- | |
| | 883 | ISODate(2000-12-19T02:19:00.000Z) | | |
------------------------------------------------------------------- | |
------------------------------------------------------------------------------------------ | |
| metrics | d:bag{:tuple(value:int)} | t:chararray | | |
------------------------------------------------------------------------------------------ | |
| | {(883)} | ISODate(2000-12-19T02:19:00.000Z) | | |
------------------------------------------------------------------------------------------ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment