Created
July 9, 2013 17:26
-
-
Save rfaisal/5959335 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| public class SimpleSpamDetector { | |
| public static boolean doesDuplicatingMatch(String s,int s_i, int s_j, String p,int p_i, int p_j){ | |
| if(p_j-p_i>s_j-s_i) //if pattern is larger than the original string | |
| return false; | |
| else if(s_j-s_i==0 && p_j-p_i==0) //if both empty | |
| return true; | |
| else if(s_j<s_i||p_j<p_i) | |
| return false; | |
| else if(Character.toLowerCase(s.charAt(s_i)) == Character.toLowerCase(p.charAt(p_i))) | |
| return doesDuplicatingMatch(s,s_i+1,s_j,p,p_i+1,p_j) | |
| ||doesDuplicatingMatch(s,s_i+1,s_j,p,p_i,p_j); | |
| else | |
| return false; | |
| } | |
| public static String getKey(String s, int i, int j){ | |
| StringBuilder key = new StringBuilder(); | |
| key.append(Character.toLowerCase(s.charAt(i))); | |
| for(int k=i+1;k<=j;k++){ | |
| if(Character.toLowerCase(s.charAt(k)) != Character.toLowerCase(s.charAt(k-1))) | |
| key.append(Character.toLowerCase(s.charAt(k))); | |
| } | |
| return key.toString(); | |
| } | |
| public static void insertToHash(HashMap<String, ArrayList<String>> hash, String value){ | |
| if(value == "") return; | |
| String k=getKey(value,0,value.length()-1); | |
| if(hash.get(k)==null) | |
| hash.put(k, new ArrayList<String>()); | |
| hash.get(k).add(value); | |
| } | |
| public static boolean check(HashMap<String, ArrayList<String>> hash, String s, int i, int j){ | |
| String k=getKey(s,i,j); | |
| if(hash.get(k)==null) | |
| return false; | |
| else{ | |
| for(String p:hash.get(k)){ | |
| if(doesDuplicatingMatch(s,i,j,p,0,p.length()-1)) | |
| return true; | |
| } | |
| } | |
| return false; | |
| } | |
| public static int countKeywords(String subjectLine, String[] keywords){ | |
| HashMap<String, ArrayList<String>> hash= new HashMap<String, ArrayList<String>>(); | |
| for(String p:keywords) | |
| insertToHash(hash,p); | |
| int s=0; | |
| int count=0; | |
| boolean isSpace=false; | |
| for(int i=0;i<subjectLine.length();i++){ | |
| if(subjectLine.charAt(i)==' '){ | |
| if(!isSpace){ | |
| if(check(hash,subjectLine,s,i-1)) | |
| count++; | |
| } | |
| isSpace=true; | |
| } | |
| else{ | |
| if(isSpace){ | |
| s=i; | |
| isSpace=false; | |
| } | |
| } | |
| } | |
| if(check(hash,subjectLine,s,subjectLine.length()-1)) | |
| count++; | |
| return count; | |
| } | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| public class SimpleSpamDetectorTest { | |
| @Test | |
| public void testCountKeywords() { | |
| assertEquals(4, SimpleSpamDetector.countKeywords("LoooW INTEREST RATES available dont BE slow", new String[]{"interest","rates","loan","available","LOW"})); | |
| assertEquals(2, SimpleSpamDetector.countKeywords("Dear Richard Get Rich Quick no risk", new String[]{"rich","risk","Quicken","wealth","SAVE"})); | |
| assertEquals(3, SimpleSpamDetector.countKeywords("in debbtt againn and aAgain and AGAaiIN", new String[]{"AGAIN","again","Again","again"})); | |
| assertEquals(3, SimpleSpamDetector.countKeywords("PlAyy ThEE Lottto get Loottoo feever", new String[]{"play","lotto","lottery","looser"})); | |
| assertEquals(0, SimpleSpamDetector.countKeywords(" ", new String[]{"empty","space","does","not","match"})); | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment