Skip to content

Instantly share code, notes, and snippets.

@rei999
Created July 10, 2013 01:09
Show Gist options
  • Select an option

  • Save rei999/5962719 to your computer and use it in GitHub Desktop.

Select an option

Save rei999/5962719 to your computer and use it in GitHub Desktop.
example hadoop
package com.mycompany.emr;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.conf.*;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.*;
import org.apache.hadoop.mapreduce.lib.db.DBConfiguration;
import org.apache.hadoop.mapreduce.lib.db.DBInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.log4j.Logger;
import com.mycompany.emr.model.EmployeeRecord;
public class Main {
private static String JOB_NAME = "DB WordCount";
private static String MYSQL_DRIVER = "com.mysql.jdbc.Driver";
private static String DB_NAME = "jdbc:mysql://localhost/company";
private static String DB_USER = {your_db_username};
private static String DB_PASS = {your_db_password};
private static String TABLE_NAME = "employee";
private static String PRIMARY_ID = "id";
public static void main( String[] args ) throws Exception {
Configuration conf = new Configuration();
Job job = new Job(conf, JOB_NAME);
job.setJarByClass(Map.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);
job.setInputFormatClass(DBInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
DBConfiguration.configureDB(job.getConfiguration(), MYSQL_DRIVER, DB_NAME, DB_USER, DB_PASS);
String [] fields = { "id", "title" };
DBInputFormat.setInput(job, SerializableBookMetadata.class, TABLE_NAME, null , PRIMARY_ID, fields);
FileOutputFormat.setOutputPath(job, new Path(args[0]));
System.exit(job.waitForCompletion(true)? 0 : 1);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment