Skip to content

Instantly share code, notes, and snippets.

View alienrobotwizard's full-sized avatar

Esme Mora alienrobotwizard

  • BetterLesson
  • California
View GitHub Profile
edges = LOAD 'graph.tsv' AS (v1:chararray, v2:chararray);
--
-- Augment the edges with the sizes of their outgoing adjacency lists.
--
grouped_edges = GROUP edges BY v1;
aug_edges = FOREACH grouped_edges GENERATE FLATTEN(edges) AS (v1, v2), COUNT(edges) AS v1_out;
aug_dups = FOREACH aug_edges GENERATE v1, v2, v1_out;
--
-- Compute the sizes of the intersections of outgoing adjacency lists
--
#!/usr/bin/env ruby
require 'rubygems'
require 'wukong'
require 'wukong/encoding'
require 'configliere' ; Configliere.use(:commandline, :env_var, :define)
require 'set'
Settings.define :id_field, :type => Integer, :default => 0, :required => true, :description => "What field to use as the document id. (-1) to assign ids"
Settings.define :text_field, :type => Integer, :default => 1, :required => true, :description => "Which field is the text field?"
Task Logs: 'attempt_201104192141_0024_r_000000_0'
stdout logs
stderr logs
@SuppressWarnings("unchecked")
@Override
public void putNext(Tuple t) throws IOException {
ResourceFieldSchema[] fieldSchemas = (schema_ == null) ? null : schema_.getFields();
Put put=new Put(objToBytes(t.get(0),
(fieldSchemas == null) ? DataType.findType(t.get(0)) : fieldSchemas[0].getType()));
long ts=System.currentTimeMillis();
for (byte[][] col : columnList_) {
LOG.info("putNext -- col: " + Bytes.toStringBinary(col[0]) + ":" + Bytes.toStringBinary(col[1]));
@alienrobotwizard
alienrobotwizard / hbasestorage_error.log
Created January 24, 2011 22:59
Pig 0.8, hbase 0.89, HBaseStorage
2011-01-24 22:51:25,764 INFO org.apache.hadoop.mapred.TaskInProgress: Error from attempt_201101201925_0046_m_000001_3: java.lang.ClassCastException: org.apache.pig.backend.hadoop.hbase.HBaseStorage cannot be cast to org.apache.pig.StoreFuncInterface
at org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POStore.getStoreFunc(POStore.java:216)
at org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigOutputCommitter.getCommitters(PigOutputCommitter.java:96)
at org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigOutputCommitter.<init>(PigOutputCommitter.java:64)
at org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigOutputFormat.getOutputCommitter(PigOutputFormat.java:235)
at org.apache.hadoop.mapred.Task.initialize(Task.java:486)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:298)
at org.apache.hadoop.mapred.Child$4.run(Child.java:217)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subj
hdp-du /tmp/hbase_out/twitter_user_id/basic/ Found 9 items
/tmp/hbase_out/twitter_user_id/basic/1111375010752534036 45387150 43.3 MB
/tmp/hbase_out/twitter_user_id/basic/136666758009409839 12135 11.9 KB
/tmp/hbase_out/twitter_user_id/basic/1530983227578486788 29377071 28.0 MB
/tmp/hbase_out/twitter_user_id/basic/1684689774724141949 45499033 43.4 MB
/tmp/hbase_out/twitter_user_id/basic/3351496769254818503 35766940 34.1 MB
/tmp/hbase_out/twitter_user_id/basic/564438603170234834 42824690 40.8 MB
/tmp/hbase_out/twitter_user_id/basic/7006774094531206982 85156321 81.2 MB
/tmp/hbase_out/twitter_user_id/basic/7318914576062524713 32049046 30.6 MB
/tmp/hbase_out/twitter_user_id/basic/7518471308926839207 108488854 103.5 MB
package org.apache.hadoop.hbase.mapreduce;
import java.io.ByteArrayOutputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Iterator;
import java.util.TreeSet;
@alienrobotwizard
alienrobotwizard / gist:783905
Created January 18, 2011 02:51
loadtable-log
$: hbase org.jruby.Main ../hbase-0.89.20100924/bin/loadtable.rb JacobsMawfuckingTable /tmp/hbase_test/out
11/01/18 02:43:21 WARN hbase.HBaseConfiguration: instantiating HBaseConfiguration() is deprecated. Please use HBaseConfiguration#create() to construct a plain Configuration
11/01/18 02:43:22 INFO loadtable: Found 1 hfiles
11/01/18 02:43:22 INFO loadtable: 0 read firstkey of 100000031 from hdfs://ip-10-98-71-63.ec2.internal/tmp/hbase_test/out/Magnificent/6133023688808851821
11/01/18 02:43:22 INFO zookeeper.ZooKeeperWrapper: Reconnecting to zookeeper
11/01/18 02:43:22 INFO zookeeper.ZooKeeper: Client environment:zookeeper.version=3.3.1-942149, built on 05/07/2010 17:14 GMT
11/01/18 02:43:22 INFO zookeeper.ZooKeeper: Client environment:host.name=ip-10-98-71-63.ec2.internal
11/01/18 02:43:22 INFO zookeeper.ZooKeeper: Client environment:java.version=1.6.0_22
public class DataChunkToHFiles extends Configured implements Tool {
public static class TextToKeyValues extends Mapper<LongWritable, Text, ImmutableBytesWritable, KeyValue> {
private byte[] columnFamily;
private byte[] tableName;
private int keyField;
private String[] fieldNames;
@Override
protected void setup(Context context) throws IOException, InterruptedException {
java.lang.ClassCastException: org.apache.cassandra.db.Column cannot be cast to org.apache.cassandra.db.SuperColumn
at org.apache.cassandra.db.SuperColumnSerializer.serialize(SuperColumn.java:318)
at org.apache.cassandra.db.SuperColumnSerializer.serialize(SuperColumn.java:298)
at org.apache.cassandra.db.ColumnFamilySerializer.serializeForSSTable(ColumnFamilySerializer.java:82)
at org.apache.cassandra.db.ColumnFamilySerializer.serialize(ColumnFamilySerializer.java:68)
at org.apache.cassandra.db.RowMutationSerializer.freezeTheMaps(RowMutation.java:344)
at org.apache.cassandra.db.RowMutationSerializer.serialize(RowMutation.java:355)
at org.apache.cassandra.db.RowMutationSerializer.serialize(RowMutation.java:333)
at org.apache.cassandra.db.RowMutation.makeRowMutationMessage(RowMutation.java:215)
at org.apache.cassandra.db.MemtableMessenger.createSuperMessage(MemtableMessenger.java:100)