Skip to content

Instantly share code, notes, and snippets.

import java.io.IOException;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
public class pymk_mapper extends MapReduceBase implements
Mapper<LongWritable, Text, Text, Text> {
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
@geofferyzh
geofferyzh / gist:2777245
Created May 23, 2012 19:28
RinAction - Nonparametric Test of Group Difference
########################################################
########################################################
# ---- Nonparametric tests of group differences ---- #
########################################################
########################################################
# NOTE:
# When you have more than 2 groups, you can use ANOVA or Nonparametric approaches
# to test group difference. ANOVA assumes data are independently sampled from normal
# populations. If the parametric assumptions are not met, nonparametric method can
@geofferyzh
geofferyzh / gist:2777240
Created May 23, 2012 19:27
RinAction - T-Tests
########################################################
########################################################
# -------------------- T-Test ----------------- #
########################################################
########################################################
# Group comparisons, assuming continuous outcome variable and normal distribution
#####################
## independent t-test
@geofferyzh
geofferyzh / gist:2777234
Created May 23, 2012 19:26
RinAction - Basic Statistics - Association & Correlations
########################################################
########################################################
# -------- Test of independence & Association -------- #
########################################################
########################################################
##########################
# Evidence of Independence
##########################
@geofferyzh
geofferyzh / gist:2776754
Created May 23, 2012 18:09
RinAction - Converting Table to Flat file
######################################################
# converting a table into a flat file via table2flat
######################################################
table2flat <- function(mytable) {
df <- as.data.frame(mytable)
rows <- dim(df)[1]
cols <- dim(df)[2]
x <- NULL
for (i in 1:rows) {
@geofferyzh
geofferyzh / gist:2776323
Created May 23, 2012 16:51
RinAction - Basic Statistics - Frequency Table
#-----------------------------------------------------------------------------#
#-----------------------------------------------------------------------------#
# R in Action - Basic Statistics
# - Frequency Table
#-----------------------------------------------------------------------------#
#-----------------------------------------------------------------------------#
install.packages(c('npmc', 'ggm', 'gmodels', 'vcd', 'Hmisc','pastecs', 'psych', 'doBy', 'reshape'))
#######################################################
@geofferyzh
geofferyzh / gist:2776306
Created May 23, 2012 16:48
RinAction - Basic Statistics & Grouped Statistics
#-----------------------------------------------------------------------------#
#-----------------------------------------------------------------------------#
# R in Action - Basic Statistics
# - Descriptive Statistics
#-----------------------------------------------------------------------------#
#-----------------------------------------------------------------------------#
install.packages(c('npmc', 'ggm', 'gmodels', 'vcd', 'Hmisc','pastecs', 'psych', 'doBy', 'reshape'))
##############################################
@geofferyzh
geofferyzh / gist:2525736
Created April 29, 2012 02:45
CF - SlopeOne Implementation in R
#########################################################################
#-----------------------------------------------------------------------#
# SlopeOne Recommender Implementation in R #
#########################################################################
#####################
# Sample Data
#####################
# sample data 1
@geofferyzh
geofferyzh / gist:2494809
Created April 26, 2012 00:39
Similarity Metrics Calculation in R (LLR,Correlation,Vector,Tanimoto)
##########################################################################
# -----------------------------------------------------------------------#
# -------------------- Similarity Metrics (CF) --------------------------#
# ---------------------Author: Shaohua Zhang ---------------------------#
##########################################################################
# sample data 1
Mov1 <- c(4,4,3,4,2)
Mov2 <- c(NA,2,NA,4,1)