kornypoet · April 9, 2012 21:26 · alienrobotwizard · Apr 10, 2012
diff --git a/gistfile1.txt b/gistfile1.txt
 --
 -- If your data looks like this:
 --
 -- {"foo":1,"bar":1}
 -- {"foo":2,"bar":2}
 -- {"foo":3,"bar":3}
 --
 -- Then write your store function this way; it will respect nested hashes and arrays if they are JSONed properly
 -- 
 register wonderdog-1.0-SNAPSHOT.jar;

 data = LOAD '/path/to/data' AS (json:chararray);

 STORE data INTO 'es://index/obj?json=true' USING com.infochimps.elasticsearch.pig.ElasticSearchStorage();

 --
 -- If your data looks like this:
 --
 -- 1   {(foo),(bar),(baz)}
 -- 2   {(foo),(bar),(baz)}
 -- 3   {(foo),(bar),(baz)}
 --
 -- Then you need to serialize the bag somehow beforehand and then use the tsv store function;
 -- My recommendation would be to do this before the LOAD, or else write a UDF to serialize the bag
 --
 register wonderdog-1.0-SNAPSHOT.jar;

 data       = LOAD '/path/to/data' AS (id:int,vals:bag{});

 serialized = FOREACH data GENERATE id AS id, JsonizeBag(vals) AS (vals:chararray);

 STORE serialized INTO 'es://index/obj?json=false' USING com.infochimps.elasticsearch.pig.ElasticSearchStorage();
	--
	-- If your data looks like this:
	--
	-- {"foo":1,"bar":1}
	-- {"foo":2,"bar":2}
	-- {"foo":3,"bar":3}
	--
	-- Then write your store function this way; it will respect nested hashes and arrays if they are JSONed properly
	--
	register wonderdog-1.0-SNAPSHOT.jar;

	data = LOAD '/path/to/data' AS (json:chararray);

	STORE data INTO 'es://index/obj?json=true' USING com.infochimps.elasticsearch.pig.ElasticSearchStorage();

	--
	-- If your data looks like this:
	--
	-- 1 {(foo),(bar),(baz)}
	-- 2 {(foo),(bar),(baz)}
	-- 3 {(foo),(bar),(baz)}
	--
	-- Then you need to serialize the bag somehow beforehand and then use the tsv store function;
	-- My recommendation would be to do this before the LOAD, or else write a UDF to serialize the bag
	--
	register wonderdog-1.0-SNAPSHOT.jar;

	data = LOAD '/path/to/data' AS (id:int,vals:bag{});

	serialized = FOREACH data GENERATE id AS id, JsonizeBag(vals) AS (vals:chararray);

	STORE serialized INTO 'es://index/obj?json=false' USING com.infochimps.elasticsearch.pig.ElasticSearchStorage();