Created
January 29, 2012 01:24
-
-
Save reddragon/1696587 to your computer and use it in GitHub Desktop.
Finding the number of palindromes in a text corpus using Hadoop
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.io.IOException; | |
import java.util.StringTokenizer; | |
import org.apache.hadoop.conf.Configuration; | |
import org.apache.hadoop.fs.Path; | |
import org.apache.hadoop.io.IntWritable; | |
import org.apache.hadoop.io.Text; | |
import org.apache.hadoop.mapreduce.Job; | |
import org.apache.hadoop.mapreduce.Mapper; | |
import org.apache.hadoop.mapreduce.Reducer; | |
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; | |
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; | |
import org.apache.hadoop.util.GenericOptionsParser; | |
public class Palindrome { | |
public static class PalindromeMapper | |
extends Mapper<Object, Text, Text, IntWritable> { | |
public void map(Object key, Text value, Context context) | |
throws IOException, InterruptedException { | |
// Format: K1,V1,K2,V2 | |
// So key here probably is the byte offset | |
// Value is the line | |
// Context is the stream to which you will write | |
// Loads of customizations are possible, to get the | |
// key-value format that you want. | |
Text word = new Text(); | |
IntWritable one = new IntWritable(1); | |
StringTokenizer tokens = new StringTokenizer(value.toString()); | |
while(tokens.hasMoreTokens()) { | |
String token = tokens.nextToken(); | |
// Check if palindrome | |
boolean isPalindrome = true; | |
for(int i = 0; i < token.length(); i++) | |
if(token.charAt(i) != token.charAt(token.length()-i-1)) | |
{ | |
isPalindrome = false; | |
break; | |
} | |
if(isPalindrome) | |
{ | |
word.set(token); | |
context.write(word, one); | |
} | |
} | |
} | |
} | |
public static class PalindromeReducer | |
extends Reducer<Text,IntWritable,Text,IntWritable> { | |
private IntWritable result = new IntWritable(); | |
public void reduce(Text key, Iterable<IntWritable> values, Context context) | |
throws IOException, InterruptedException { | |
int sum = 0; | |
for(IntWritable val : values) { | |
sum += val.get(); | |
} | |
result.set(sum); | |
context.write(key, result); | |
} | |
} | |
public static void main(String[] args) throws Exception { | |
Configuration conf = new Configuration(); | |
Job job = new Job(conf, "palindrome"); | |
job.setJarByClass(Palindrome.class); | |
job.setMapperClass(PalindromeMapper.class); | |
job.setReducerClass(PalindromeReducer.class); | |
job.setOutputKeyClass(Text.class); | |
job.setOutputValueClass(IntWritable.class); | |
FileInputFormat.addInputPath(job, new Path(args[0])); | |
FileOutputFormat.setOutputPath(job, new Path(args[1])); | |
System.exit(job.waitForCompletion(true) ? 0 : 1); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
this code doesnot give correct palindrome output,it shows the word is palindrome even if only first and last letters matches ..pls help