-
-
Save PimDeWitte/c04cc17bc5fa9d7e3aee6670d4105941 to your computer and use it in GitHub Desktop.
static Map<String, String[]> words = new HashMap<>(); | |
static int largestWordLength = 0; | |
public static void loadConfigs() { | |
try { | |
BufferedReader reader = new BufferedReader(new InputStreamReader(new URL("https://docs.google.com/spreadsheets/d/1hIEi2YG3ydav1E06Bzf2mQbGZ12kh2fe4ISgLg_UBuM/export?format=csv").openConnection().getInputStream())); | |
String line = ""; | |
int counter = 0; | |
while((line = reader.readLine()) != null) { | |
counter++; | |
String[] content = null; | |
try { | |
content = line.split(","); | |
if(content.length == 0) { | |
continue; | |
} | |
String word = content[0]; | |
String[] ignore_in_combination_with_words = new String[]{}; | |
if(content.length > 1) { | |
ignore_in_combination_with_words = content[1].split("_"); | |
} | |
if(word.length() > largestWordLength) { | |
largestWordLength = word.length(); | |
} | |
words.put(word.replaceAll(" ", ""), ignore_in_combination_with_words); | |
} catch(Exception e) { | |
e.printStackTrace(); | |
} | |
} | |
System.out.println("Loaded " + counter + " words to filter out"); | |
} catch (IOException e) { | |
e.printStackTrace(); | |
} | |
} | |
/** | |
* Iterates over a String input and checks whether a cuss word was found in a list, then checks if the word should be ignored (e.g. bass contains the word *ss). | |
* @param input | |
* @return | |
*/ | |
public static ArrayList<String> badWordsFound(String input) { | |
if(input == null) { | |
return new ArrayList<>(); | |
} | |
// don't forget to remove leetspeak, probably want to move this to its own function and use regex if you want to use this | |
input = input.replaceAll("1","i"); | |
input = input.replaceAll("!","i"); | |
input = input.replaceAll("3","e"); | |
input = input.replaceAll("4","a"); | |
input = input.replaceAll("@","a"); | |
input = input.replaceAll("5","s"); | |
input = input.replaceAll("7","t"); | |
input = input.replaceAll("0","o"); | |
input = input.replaceAll("9","g"); | |
ArrayList<String> badWords = new ArrayList<>(); | |
input = input.toLowerCase().replaceAll("[^a-zA-Z]", ""); | |
// iterate over each letter in the word | |
for(int start = 0; start < input.length(); start++) { | |
// from each letter, keep going to find bad words until either the end of the sentence is reached, or the max word length is reached. | |
for(int offset = 1; offset < (input.length()+1 - start) && offset < largestWordLength; offset++) { | |
String wordToCheck = input.substring(start, start + offset); | |
if(words.containsKey(wordToCheck)) { | |
// for example, if you want to say the word bass, that should be possible. | |
String[] ignoreCheck = words.get(wordToCheck); | |
boolean ignore = false; | |
for(int s = 0; s < ignoreCheck.length; s++ ) { | |
if(input.contains(ignoreCheck[s])) { | |
ignore = true; | |
break; | |
} | |
} | |
if(!ignore) { | |
badWords.add(wordToCheck); | |
} | |
} | |
} | |
} | |
for(String s: badWords) { | |
System.out.println(s + " qualified as a bad word in a username"); | |
} | |
return badWords; | |
} | |
public static String filterText(String input, String username) { | |
ArrayList<String> badWords = badWordsFound(input); | |
if(badWords.size() > 0) { | |
return "This message was blocked because a bad word was found. If you believe this word should not be blocked, please message support."; | |
} | |
return input; | |
} | |
Love it!
what's variable "words"? it's not defined
@hayderma Map<String, String[]> words = new HashMap<>();
Copy pasted and implemented for small java application with Test class : https://github.com/souwoxi/Profanity
Copy pasted and implemented for small java application with Test class : https://github.com/souwoxi/Profanity
its not work
String inputToReturn = input;
for (String swearWord : badWordsFound) {
char[] charsStars = new char[swearWord.length()];
Arrays.fill(charsStars, '*');
final String stars = new String(charsStars);
// The "(?i)" is to make the replacement case insensitive.
inputToReturn = inputToReturn.replaceAll("(?i)" + swearWord, stars);
}
its not return result
Using Google Docs is great.
@jolson490 Your git link is not working. I would really like to check your implementation as I am working on a spring project that needs some input filter :)
Hey guys, didn't realize people actually use this. Here's the link to the background and the tutorial: https://pimdewitte.me/2016/05/28/filtering-combinations-of-bad-words-out-of-string-inputs/
I know it has been a while, but is there a way to replace the swear word with stars and return the full message. @PimDeWitte
Similar to what @souwoxi did but instead of having no spaces, keeping the spaces and punctuation.
I improved performance of this by over a magnitude and completely eliminated garbage (allocations) 😄
https://gist.github.com/Jire/4aa72bd3554cdccdc369c216a230ee56
please can I get the code with spring boot application
anyone help me to fix this... make a **** combination of two words of one for example(f*** you) change to (**** ***) like these
thank you @PimDeWitte - this was helpful to me.
heads up in case this is helpful to anyone else - here's how I did a copy/paste/edit of this code to integrate it into a Spring Boot application (to censor input provided by user via an MVC form): https://github.com/ILMServices/FantasyFootball/commit/dc487b4