Jire · December 12, 2023 23:18 · ChosenQuill · Jan 9, 2019
diff --git a/BadWords.java b/BadWords.java
 import it.unimi.dsi.fastutil.longs.Long2ObjectMap;
 import it.unimi.dsi.fastutil.longs.Long2ObjectOpenHashMap;
 import net.openhft.hashing.LongHashFunction;

 import java.io.BufferedReader;
 import java.io.IOException;
 import java.io.InputStreamReader;
 import java.net.URL;

 /**
 * Originally created by Pim De Witte.
 * 
 * Performance drastically improved by over an order of magnitude by Thomas G. P. Nappo (Jire).
 * Garbage production has been eliminated as well.
 */
 public class BadWords {
 	
 	static Long2ObjectMap<String[]> words = new Long2ObjectOpenHashMap<>();
 	static int largestWordLength = 0;
 	
 	public static void flag(String word) {
 		String[] ignore_in_combination_with_words = new String[]{};
 		if (word.length() > largestWordLength) {
 			largestWordLength = word.length();
 		}
 		words.put(LongHashFunction.xx().hashChars(word.replaceAll(" ", "")), ignore_in_combination_with_words);
 	}
 	
 	public static void loadConfigs() {
 		try {
 			BufferedReader reader = new BufferedReader(new InputStreamReader(new URL("https://docs.google.com/spreadsheets/d/1hIEi2YG3ydav1E06Bzf2mQbGZ12kh2fe4ISgLg_UBuM/export?format=csv").openConnection().getInputStream()));
 			String line = "";
 			int counter = 0;
 			while((line = reader.readLine()) != null) {
 				counter++;
 				String[] content = null;
 				try {
 					content = line.split(",");
 					if(content.length == 0) {
 						continue;
 					}
 					String word = content[0];
 					String[] ignore_in_combination_with_words = new String[]{};
 					if(content.length > 1) {
 						ignore_in_combination_with_words = content[1].split("_");
 					}
 					
 					if(word.length() > largestWordLength) {
 						largestWordLength = word.length();
 					}
 					words.put(LongHashFunction.xx().hashChars(word.replace(" ", "")), ignore_in_combination_with_words);
 					
 				} catch(Exception e) {
 					e.printStackTrace();
 				}
 				
 			}
 			System.out.println("Loaded " + counter + " words to filter out");
 		} catch (IOException e) {
 			e.printStackTrace();
 		}
 	}
 	
 	private static final char[][] convert = {
 			{'o', '0'},
 			{'i', '1'},
 			{'l', '1'},
 			{'t', '+'},
 			{'e', '3'},
 			{'i', '!'},
 			{'l', '!'},
 			{'s', '$'},
 			{'a', '&'},
 			{'a', '@'},
 			{'c', '('},
 			{'d', ')'},
 			{'d', '0'},
 			{'g', '6'},
 			{'t', '7'},
 			{'g', '9'},
 			{'s', '5'},
 			{'a', '4'}
 	};
 	
 	private static final ThreadLocal<StringBuilder> sb = ThreadLocal.withInitial(StringBuilder::new); // make this regular if you don't need thread safety.
 	
 	/**
 	 * Iterates over a String input and checks whether a cuss word was found in a list, then checks if the word should be ignored (e.g. bass contains the word *ss).
 	 *
 	 * @param input
 	 * @return
 	 */
 	public static boolean badWordsFound(String input) {
 		if (input == null) {
 			return false;
 		}
 		
 		StringBuilder sb = BadWords.sb.get();
 		sb.setLength(0);
 		
 		removeLeetspeak:
 		for (int i = 0; i < input.length(); i++) {
 			char c = input.charAt(i);
 			if (Character.isLetter(c)) {
 				sb.append(Character.toLowerCase(c));
 			} else {
 				for (char[] conversion : convert) {
 					if (c == conversion[1]) {
 						sb.append(conversion[0]);
 						continue removeLeetspeak;
 					}
 				}
 			}
 		}
 		
 		// iterate over each letter in the word
 		for (int start = 0; start < sb.length(); start++) {
 			// from each letter, keep going to find bad words until either the end of the sentence is reached, or the max word length is reached.
 			for (int offset = 1; offset < (sb.length() + 1 - start) && offset < largestWordLength; offset++) {
 				long hash = LongHashFunction.xx().hashChars(sb, start, offset);
 				if (words.containsKey(hash)) {
 					// for example, if you want to say the word bass, that should be possible.
 					String[] ignoreCheck = words.get(hash);
 					boolean ignore = false;
 					for (int s = 0; s < ignoreCheck.length; s++) {
 						if (indexOf(sb, ignoreCheck[s]) >= 0) {
 							ignore = true;
 							break;
 						}
 					}
 					if (!ignore) {
 						return true;
 					}
 				}
 			}
 		}
 		
 		return false;
 	}
 	
 	private static int indexOf(CharSequence source, CharSequence target) {
 		int sourceCount = source.length();
 		int targetCount = target.length();
 		int sourceOffset = 0;
 		int targetOffset = 0;
 		
 		if (0 >= sourceCount) {
 			return (targetCount == 0 ? sourceCount : -1);
 		}
 		if (targetCount == 0) {
 			return 0;
 		}
 		
 		char first = target.charAt(targetOffset);
 		int max = sourceOffset + (sourceCount - targetCount);
 		
 		for (int i = sourceOffset; i <= max; i++) {
 			/* Look for first character. */
 			if (source.charAt(i) != first) {
 				while (++i <= max && source.charAt(i) != first);
 			}
 			
 			/* Found first character, now look at the rest of v2 */
 			if (i <= max) {
 				int j = i + 1;
 				int end = j + targetCount - 1;
 				for (int k = targetOffset + 1; j < end && source.charAt(j)
 						== target.charAt(k); j++, k++);
 				
 				if (j == end) {
 					/* Found whole string. */
 					return i - sourceOffset;
 				}
 			}
 		}
 		return -1;
 	}
 	
 }
	import it.unimi.dsi.fastutil.longs.Long2ObjectMap;
	import it.unimi.dsi.fastutil.longs.Long2ObjectOpenHashMap;
	import net.openhft.hashing.LongHashFunction;

	import java.io.BufferedReader;
	import java.io.IOException;
	import java.io.InputStreamReader;
	import java.net.URL;

	/**
	* Originally created by Pim De Witte.
	*
	* Performance drastically improved by over an order of magnitude by Thomas G. P. Nappo (Jire).
	* Garbage production has been eliminated as well.
	*/
	public class BadWords {

	static Long2ObjectMap<String[]> words = new Long2ObjectOpenHashMap<>();
	static int largestWordLength = 0;

	public static void flag(String word) {
	String[] ignore_in_combination_with_words = new String[]{};
	if (word.length() > largestWordLength) {
	largestWordLength = word.length();
	}
	words.put(LongHashFunction.xx().hashChars(word.replaceAll(" ", "")), ignore_in_combination_with_words);
	}

	public static void loadConfigs() {
	try {
	BufferedReader reader = new BufferedReader(new InputStreamReader(new URL("https://docs.google.com/spreadsheets/d/1hIEi2YG3ydav1E06Bzf2mQbGZ12kh2fe4ISgLg_UBuM/export?format=csv").openConnection().getInputStream()));
	String line = "";
	int counter = 0;
	while((line = reader.readLine()) != null) {
	counter++;
	String[] content = null;
	try {
	content = line.split(",");
	if(content.length == 0) {
	continue;
	}
	String word = content[0];
	String[] ignore_in_combination_with_words = new String[]{};
	if(content.length > 1) {
	ignore_in_combination_with_words = content[1].split("_");
	}

	if(word.length() > largestWordLength) {
	largestWordLength = word.length();
	}
	words.put(LongHashFunction.xx().hashChars(word.replace(" ", "")), ignore_in_combination_with_words);

	} catch(Exception e) {
	e.printStackTrace();
	}

	}
	System.out.println("Loaded " + counter + " words to filter out");
	} catch (IOException e) {
	e.printStackTrace();
	}
	}

	private static final char[][] convert = {
	{'o', '0'},
	{'i', '1'},
	{'l', '1'},
	{'t', '+'},
	{'e', '3'},
	{'i', '!'},
	{'l', '!'},
	{'s', '$'},
	{'a', '&'},
	{'a', '@'},
	{'c', '('},
	{'d', ')'},
	{'d', '0'},
	{'g', '6'},
	{'t', '7'},
	{'g', '9'},
	{'s', '5'},
	{'a', '4'}
	};

	private static final ThreadLocal<StringBuilder> sb = ThreadLocal.withInitial(StringBuilder::new); // make this regular if you don't need thread safety.

	/**
	* Iterates over a String input and checks whether a cuss word was found in a list, then checks if the word should be ignored (e.g. bass contains the word *ss).
	*
	* @param input
	* @return
	*/
	public static boolean badWordsFound(String input) {
	if (input == null) {
	return false;
	}

	StringBuilder sb = BadWords.sb.get();
	sb.setLength(0);

	removeLeetspeak:
	for (int i = 0; i < input.length(); i++) {
	char c = input.charAt(i);
	if (Character.isLetter(c)) {
	sb.append(Character.toLowerCase(c));
	} else {
	for (char[] conversion : convert) {
	if (c == conversion[1]) {
	sb.append(conversion[0]);
	continue removeLeetspeak;
	}
	}
	}
	}

	// iterate over each letter in the word
	for (int start = 0; start < sb.length(); start++) {
	// from each letter, keep going to find bad words until either the end of the sentence is reached, or the max word length is reached.
	for (int offset = 1; offset < (sb.length() + 1 - start) && offset < largestWordLength; offset++) {
	long hash = LongHashFunction.xx().hashChars(sb, start, offset);
	if (words.containsKey(hash)) {
	// for example, if you want to say the word bass, that should be possible.
	String[] ignoreCheck = words.get(hash);
	boolean ignore = false;
	for (int s = 0; s < ignoreCheck.length; s++) {
	if (indexOf(sb, ignoreCheck[s]) >= 0) {
	ignore = true;
	break;
	}
	}
	if (!ignore) {
	return true;
	}
	}
	}
	}

	return false;
	}

	private static int indexOf(CharSequence source, CharSequence target) {
	int sourceCount = source.length();
	int targetCount = target.length();
	int sourceOffset = 0;
	int targetOffset = 0;

	if (0 >= sourceCount) {
	return (targetCount == 0 ? sourceCount : -1);
	}
	if (targetCount == 0) {
	return 0;
	}

	char first = target.charAt(targetOffset);
	int max = sourceOffset + (sourceCount - targetCount);

	for (int i = sourceOffset; i <= max; i++) {
	/* Look for first character. */
	if (source.charAt(i) != first) {
	while (++i <= max && source.charAt(i) != first);
	}

	/* Found first character, now look at the rest of v2 */
	if (i <= max) {
	int j = i + 1;
	int end = j + targetCount - 1;
	for (int k = targetOffset + 1; j < end && source.charAt(j)
	== target.charAt(k); j++, k++);

	if (j == end) {
	/* Found whole string. */
	return i - sourceOffset;
	}
	}
	}
	return -1;
	}

	}