Skip to content

Instantly share code, notes, and snippets.

View mushkevych's full-sized avatar

Dan Mushkevych mushkevych

  • Portland, OR, USA
View GitHub Profile
@mushkevych
mushkevych / Encoder.java
Created May 19, 2012 00:06
Simplest Integer Encoder Example
import org.apache.log4j.Logger;
import org.apache.mahout.math.Varint;
import java.io.*;
/**
* @author Bohdan Mushkevych
* Description: module presents tuple of two int values: alpha and beta
*/
class Tuple2I {
@mushkevych
mushkevych / ExemplaryRReducer.java
Created May 11, 2012 20:35
Exemplary R Reducer to illustrate basic principles of running R from Hadoop mapreduce
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.io.Writable;
import org.apache.log4j.Logger;
import org.rosuda.JRI.REXP;
import org.rosuda.JRI.RMainLoopCallbacks;
import org.rosuda.JRI.Rengine;
import java.io.IOException;
@mushkevych
mushkevych / RIntegrationExample.java
Created April 19, 2012 20:02
Class illustrating integration of R into stand-alone Java application
import org.apache.log4j.Logger;
import org.rosuda.JRI.REXP;
import org.rosuda.JRI.RMainLoopCallbacks;
import org.rosuda.JRI.Rengine;
import java.util.*;
/**
* @author Bohdan Mushkevych
* date Apr 2012
@mushkevych
mushkevych / ExemplaryMapper.java
Created March 27, 2012 18:19
Exemplary Hadoop Mapper dealing with OutOfMemoryError
/**
* @author Bohdan Mushkevych
* date: 16 Mar 2012
* Description: presents OutOfMemoryError recovery in Hadoop
*/
public class ExemplaryMapper extends Mapper<ImmutableBytesWritable, Result, ImmutableBytesWritable, ImmutableBytesWritable> {
private static Logger log = Logger.getLogger(ExemplaryMapper.class);
@Override
protected void map(ImmutableBytesWritable key, Result result, Context context) throws IOException, InterruptedException {
@mushkevych
mushkevych / CsvImportServer.java
Created March 23, 2012 18:39
Csv -> HBase importer, based on Surus
package com.reinvent.synergy.data.csvimport;
import com.reinvent.synergy.data.model.Constants;
import com.reinvent.synergy.data.model.UserLog;
import com.reinvent.synergy.data.system.PoolManager;
import com.reinvent.synergy.data.system.TableContext;
import com.reinvent.synergy.data.system.TimePeriodHelper;
import org.apache.log4j.Logger;
import org.apache.log4j.PropertyConfigurator;
import org.supercsv.io.CsvMapReader;
@mushkevych
mushkevych / repeat_timer.py
Created March 19, 2012 01:08
RepeatTimer class
"""
Created on 2011-02-10
@author: Bohdan Mushkevych
@author: Brian Curtin
http://code.activestate.com/lists/python-ideas/8982/
"""
from datetime import datetime
import threading
@mushkevych
mushkevych / collection_context.py
Created November 15, 2011 00:48 — forked from awestendorf/mongo_rebalance.py
An example of rebalancing a pymongo MasterSlaveConnection
"""
Created on 2011-04-23
@author: Bohdan Mushkevych
@author: Aaron Westendorf
"""
import functools
import time
from pymongo.errors import AutoReconnect
from pymongo.connection import Connection as MongoConnection