Skip to content

Instantly share code, notes, and snippets.

@mpobrien
Created June 12, 2013 17:35
Show Gist options
  • Save mpobrien/5767433 to your computer and use it in GitHub Desktop.
Save mpobrien/5767433 to your computer and use it in GitHub Desktop.
#!/bin/sh
#Set your HADOOP_HOME directory here.
export HADOOP_HOME="/Users/mike/hadoop/hadoop-2.0.0-cdh4.3.0"
declare -a job_args
job_args=("jar" "examples/cohort/target/cohort_cdh4.3.0-1.1.0.jar")
# INPUT SOURCE -
# To use a mongo collection as input:
job_args=(${job_args[@]} "-D" "mongo.job.input.format=com.mongodb.hadoop.MongoInputFormat")
job_args=(${job_args[@]} "-D" "mongo.input.uri=mongodb://localhost:27017/demo.cohort")
#Set the classes used for Mapper/Reducer
job_args=(${job_args[@]} "-D" "mongo.job.mapper=cohort.DailyMapper")
job_args=(${job_args[@]} "-D" "mongo.job.reducer=cohort.CohortReducer")
#Set the values used for output keys + values.
job_args=(${job_args[@]} "-D" "mongo.job.output.key=cohort.DailyKey")
job_args=(${job_args[@]} "-D" "mongo.job.output.value=com.mongodb.hadoop.io.BSONWritable")
job_args=(${job_args[@]} "-D" "mongo.job.partitioner=")
job_args=(${job_args[@]} "-D" "mongo.job.sort_comparator=")
job_args=(${job_args[@]} "-D" "mongo.job.background=false")
job_args=(${job_args[@]} "-D" "mongo.output.uri=mongodb://localhost:27017/demo.asfsafaout")
job_args=(${job_args[@]} "-D" "mongo.job.output.format=com.mongodb.hadoop.MongoOutputFormat")
echo "${job_args[@]}"
$HADOOP_HOME/bin/hadoop "${job_args[@]}" "$1"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment