Skip to content

Instantly share code, notes, and snippets.

View alienrobotwizard's full-sized avatar

Esme Mora alienrobotwizard

  • BetterLesson
  • California
View GitHub Profile
hdp-du /tmp/hbase_out/twitter_user_id/basic/ Found 9 items
/tmp/hbase_out/twitter_user_id/basic/1111375010752534036 45387150 43.3 MB
/tmp/hbase_out/twitter_user_id/basic/136666758009409839 12135 11.9 KB
/tmp/hbase_out/twitter_user_id/basic/1530983227578486788 29377071 28.0 MB
/tmp/hbase_out/twitter_user_id/basic/1684689774724141949 45499033 43.4 MB
/tmp/hbase_out/twitter_user_id/basic/3351496769254818503 35766940 34.1 MB
/tmp/hbase_out/twitter_user_id/basic/564438603170234834 42824690 40.8 MB
/tmp/hbase_out/twitter_user_id/basic/7006774094531206982 85156321 81.2 MB
/tmp/hbase_out/twitter_user_id/basic/7318914576062524713 32049046 30.6 MB
/tmp/hbase_out/twitter_user_id/basic/7518471308926839207 108488854 103.5 MB
@alienrobotwizard
alienrobotwizard / hbasestorage_error.log
Created January 24, 2011 22:59
Pig 0.8, hbase 0.89, HBaseStorage
2011-01-24 22:51:25,764 INFO org.apache.hadoop.mapred.TaskInProgress: Error from attempt_201101201925_0046_m_000001_3: java.lang.ClassCastException: org.apache.pig.backend.hadoop.hbase.HBaseStorage cannot be cast to org.apache.pig.StoreFuncInterface
at org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POStore.getStoreFunc(POStore.java:216)
at org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigOutputCommitter.getCommitters(PigOutputCommitter.java:96)
at org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigOutputCommitter.<init>(PigOutputCommitter.java:64)
at org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigOutputFormat.getOutputCommitter(PigOutputFormat.java:235)
at org.apache.hadoop.mapred.Task.initialize(Task.java:486)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:298)
at org.apache.hadoop.mapred.Child$4.run(Child.java:217)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subj
@SuppressWarnings("unchecked")
@Override
public void putNext(Tuple t) throws IOException {
ResourceFieldSchema[] fieldSchemas = (schema_ == null) ? null : schema_.getFields();
Put put=new Put(objToBytes(t.get(0),
(fieldSchemas == null) ? DataType.findType(t.get(0)) : fieldSchemas[0].getType()));
long ts=System.currentTimeMillis();
for (byte[][] col : columnList_) {
LOG.info("putNext -- col: " + Bytes.toStringBinary(col[0]) + ":" + Bytes.toStringBinary(col[1]));
Task Logs: 'attempt_201104192141_0024_r_000000_0'
stdout logs
stderr logs
#!/usr/bin/env ruby
require 'rubygems'
require 'wukong'
require 'wukong/encoding'
require 'configliere' ; Configliere.use(:commandline, :env_var, :define)
require 'set'
Settings.define :id_field, :type => Integer, :default => 0, :required => true, :description => "What field to use as the document id. (-1) to assign ids"
Settings.define :text_field, :type => Integer, :default => 1, :required => true, :description => "Which field is the text field?"
edges = LOAD 'graph.tsv' AS (v1:chararray, v2:chararray);
--
-- Augment the edges with the sizes of their outgoing adjacency lists.
--
grouped_edges = GROUP edges BY v1;
aug_edges = FOREACH grouped_edges GENERATE FLATTEN(edges) AS (v1, v2), COUNT(edges) AS v1_out;
aug_dups = FOREACH aug_edges GENERATE v1, v2, v1_out;
--
-- Compute the sizes of the intersections of outgoing adjacency lists
--
edges = LOAD 'graph.tsv' AS (v1:chararray, v2:chararray);
--
-- Augment the edges with the sizes of their outgoing adjacency lists.
--
grouped_edges = GROUP edges BY v1;
aug_edges = FOREACH grouped_edges GENERATE FLATTEN(edges) AS (v1, v2), COUNT(edges) AS v1_out;
aug_dups = FOREACH aug_edges GENERATE v1, v2, v1_out;
--
-- Compute the sizes of the intersections of outgoing adjacency lists
raw_student_activity = load '$INSTITUTION_PATH/student_activity/*' using civitas.analytics.pig_udf.PigStorageAvroSchema('$STUDENT_ACTIVITY_SCHEMA');
raw_instructor_activity = load '$INSTITUTION_PATH/instructor_activity/*' using civitas.analytics.pig_udf.PigStorageAvroSchema('$INSTRUCTOR_ACTIVITY_SCHEMA');
student_count_mappings = load '/tmp/umuc/student_activity_count_mappings.csv' using PigStorage(',') as (cnt_name:chararray, activity_type:chararray, activity_name:chararray);
instr_count_mappings = load '/tmp/umuc/instructor_activity_count_mappings.csv' using PigStorage(',') as (cnt_name:chararray, activity_type:chararray, activity_name:chararray);
student_event_log = foreach (join raw_student_activity by activity_name left outer, student_count_mappings by activity_type) generate
student_id, section_id, term_id, activity_pk, timestamp,
(student_count_mappings::activity_name is null ? raw_student_activity::activity_type : student_count_mappings::activity_n
@alienrobotwizard
alienrobotwizard / get_logical_plan.rb
Created September 9, 2013 12:34
Get Pig Logical plan easily without running script - jruby
#!/usr/bin/env jruby
require 'java'
PIG_JAR = File.join(ENV['PIG_HOME'], 'pig.jar')
require PIG_JAR
import 'java.util.Properties'
import 'java.io.FileReader'
/*
* Run one iteration of gradient descent for the given weights and features
* with step size alpha. Returns the updated weights.
*
* features - Relation with the following schema:
* {response:double, vector:tuple(f0:double,f1:double,...,fN:double)}
*
* w - Relation with **exactly one tuple** with the following schema:
* {weights:tuple(w0:double,w1:double,...,wN:double)}
*