Skip to content

Instantly share code, notes, and snippets.

@jeromatron
Created February 28, 2011 22:15
Show Gist options
  • Save jeromatron/848159 to your computer and use it in GitHub Desktop.
Save jeromatron/848159 to your computer and use it in GitHub Desktop.
***************
*** 18,52 ****
import java.io.IOException;
import java.nio.ByteBuffer;
- import java.util.*;
import org.apache.cassandra.db.Column;
import org.apache.cassandra.db.IColumn;
import org.apache.cassandra.db.SuperColumn;
- import org.apache.cassandra.hadoop.*;
import org.apache.cassandra.thrift.SlicePredicate;
import org.apache.cassandra.thrift.SliceRange;
import org.apache.cassandra.utils.FBUtilities;
-
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.RecordReader;
-
import org.apache.pig.LoadFunc;
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigSplit;
import org.apache.pig.data.DefaultDataBag;
- import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
/**
* A LoadFunc wrapping ColumnFamilyInputFormat.
*
* A row from a standard CF will be returned as nested tuples: (key, ((name1, val1), (name2, val2))).
*/
- public class CassandraStorage extends LoadFunc
{
// system environment variables that can be set to configure connection info:
// alternatively, Hadoop JobConf variables can be set using keys from ConfigHelper
--- 18,71 ----
import java.io.IOException;
import java.nio.ByteBuffer;
+ import java.util.ArrayList;
+ import java.util.Arrays;
+ import java.util.List;
+ import java.util.Map;
+ import java.util.SortedMap;
+ import org.apache.cassandra.avro.ColumnOrSuperColumn;
+ import org.apache.cassandra.avro.Deletion;
+ import org.apache.cassandra.avro.Mutation;
import org.apache.cassandra.db.Column;
import org.apache.cassandra.db.IColumn;
import org.apache.cassandra.db.SuperColumn;
+ import org.apache.cassandra.hadoop.ColumnFamilyInputFormat;
+ import org.apache.cassandra.hadoop.ColumnFamilyOutputFormat;
+ import org.apache.cassandra.hadoop.ConfigHelper;
import org.apache.cassandra.thrift.SlicePredicate;
import org.apache.cassandra.thrift.SliceRange;
import org.apache.cassandra.utils.FBUtilities;
+ import org.apache.commons.logging.Log;
+ import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.Job;
+ import org.apache.hadoop.mapreduce.OutputFormat;
import org.apache.hadoop.mapreduce.RecordReader;
+ import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.pig.LoadFunc;
+ import org.apache.pig.LoadPushDown;
+ import org.apache.pig.ResourceSchema;
+ import org.apache.pig.StoreFuncInterface;
+ import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigSplit;
+ import org.apache.pig.data.DataByteArray;
+ import org.apache.pig.data.DataType;
import org.apache.pig.data.DefaultDataBag;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
+ import org.apache.pig.impl.logicalLayer.FrontendException;
+ import com.google.common.collect.Lists;
+
/**
* A LoadFunc wrapping ColumnFamilyInputFormat.
*
* A row from a standard CF will be returned as nested tuples: (key, ((name1, val1), (name2, val2))).
*/
+ public class CassandraStorage extends LoadFunc implements StoreFuncInterface, LoadPushDown
{
// system environment variables that can be set to configure connection info:
// alternatively, Hadoop JobConf variables can be set using keys from ConfigHelper
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment