Last active
August 29, 2015 13:59
-
-
Save P7h/10546830 to your computer and use it in GitHub Desktop.
Trivia: Generate primes between 1 and 100 million with Hadoop MapReduce job. Inspired by https://gist.github.com/krishnanraman/6346307.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import java.io.IOException; | |
| import org.apache.hadoop.conf.Configuration; | |
| import org.apache.hadoop.fs.Path; | |
| import org.apache.hadoop.io.IntWritable; | |
| import org.apache.hadoop.io.LongWritable; | |
| import org.apache.hadoop.io.Text; | |
| import org.apache.hadoop.io.NullWritable; | |
| import org.apache.hadoop.mapreduce.Job; | |
| import org.apache.hadoop.mapreduce.Mapper; | |
| import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; | |
| import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; | |
| import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; | |
| import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; | |
| /** | |
| * Hadoop MapReduce program to compute the prime numbers between 1 and 100 million. | |
| */ | |
| public final class Primes { | |
| public final static void main(final String[] args) throws Exception { | |
| final Configuration conf = new Configuration(); | |
| final Job job = new Job(conf, "Primes"); | |
| job.setJarByClass(Primes.class); | |
| job.setOutputKeyClass(NullWritable.class); | |
| job.setOutputValueClass(IntWritable.class); | |
| job.setMapperClass(PrimesMap.class); | |
| job.setInputFormatClass(TextInputFormat.class); | |
| job.setOutputFormatClass(TextOutputFormat.class); | |
| FileInputFormat.addInputPath(job, new Path(args[0])); | |
| FileOutputFormat.setOutputPath(job, new Path(args[1])); | |
| job.waitForCompletion(true); | |
| } | |
| public static final class PrimesMap extends Mapper<LongWritable, Text, NullWritable, IntWritable> { | |
| final NullWritable nw = NullWritable.get(); | |
| public final void map(final LongWritable key, final Text value, final Context context) | |
| throws IOException, InterruptedException { | |
| final int number = Integer.parseInt(value.toString()); | |
| if(isPrime(number)) { | |
| context.write(nw, new IntWritable(number)); | |
| } | |
| } | |
| } | |
| private static final boolean isPrime(final int number) { | |
| if (number == 1) { | |
| return false; | |
| } | |
| if (number % 2 == 0 && number != 2 || number % 3 == 0 && number != 3) { | |
| return false; | |
| } | |
| int limit = (int) ((Math.pow(number, 0.5) + 1) / 6.0 + 1); | |
| for (int i = 1; i < limit; i++) { | |
| if(number % (6 * i - 1) == 0){ | |
| return false; | |
| } | |
| if(number % (6 * i + 1) == 0){ | |
| return false; | |
| } | |
| } | |
| return true; | |
| } | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // A pure Java program to compute prime numbers between 1 and 100 million. | |
| public final class PrimesPureJava { | |
| public final static void main(final String[] args) throws Exception { | |
| final long time = System.currentTimeMillis(); | |
| for (int i = 1; i < 100000000; i++) { | |
| if(isPrime(i)) { | |
| System.out.print(i + "\t"); | |
| } | |
| } | |
| System.out.println("\nTime taken to compute primes (in milliseconds): " + System.currentTimeMillis()-time); | |
| } | |
| private static final boolean isPrime(final int number) { | |
| if (number == 1) { | |
| return false; | |
| } | |
| if (number % 2 == 0 && number != 2 || number % 3 == 0 && number != 3) { | |
| return false; | |
| } | |
| int limit = (int) ((Math.pow(number, 0.5) + 1) / 6.0 + 1); | |
| for (int i = 1; i < limit; i++) { | |
| if(number % (6 * i - 1) == 0){ | |
| return false; | |
| } | |
| if(number % (6 * i + 1) == 0){ | |
| return false; | |
| } | |
| } | |
| return true; | |
| } | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Trivia problem: Generate primes between 1 and 100 million with Hadoop MapReduce job. | |
| Input: Numbers from 1 to 100 million, 1 per line - use a simple C program for this. | |
| Output: Emit all the prime numbers. | |
| Execution time: ~2 minutes and 20 seconds [i.e. 140 seconds] in hdfs-local mode on my Cloudera-Udacity-Hadoop VM [VM has been given 3 GB RAM]. | |
| real 2m15.895s | |
| user 0m3.048s | |
| sys 0m0.517s | |
| Part files: 1 | |
| $ ls | |
| part-r-00000 | |
| $ tailf part-r-00000 | |
| 16147529 | |
| 16147531 | |
| 16147561 | |
| 16147591 | |
| 16147597 | |
| 16147609 | |
| 16147613 | |
| 16147627 | |
| 16147639 | |
| 16147643 | |
| [PB] $ more part-r-00000 | |
| 2 | |
| 3 | |
| 5 | |
| 7 | |
| 11 | |
| 13 | |
| 17 | |
| 19 | |
| 23 | |
| 29 | |
| 31 | |
| 37 | |
| 41 | |
| 43 | |
| 47 | |
| 53 | |
| 59 | |
| 61 | |
| 67 | |
| 71 | |
| 73 | |
| 79 | |
| 83 | |
| 89 | |
| 97 | |
| How many primes between 1 and 100 million? | |
| $ wc -l part-r-00000 | |
| 5761455 part-r-00000 | |
| Only 5.76 million primes between 1 to 100 million. | |
| In the same environment, a pure Java program took 5 minutes and 26 seconds [i.e. 326 seconds] for computing prime numbers between 1 and 100 million [same as above]. |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #include <stdio.h> | |
| #include <stdlib.h> | |
| // C Program to generate 100 million numbers. | |
| int main(int argc, char* argv) { | |
| FILE * fp; | |
| int i = 0; | |
| fp = fopen ("numbers", "w+"); | |
| for(i = 1;i<100000000;i++) { | |
| fprintf(fp, "%d\n", i); | |
| } | |
| fclose(fp); | |
| return(0); | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment