Skip to content

Instantly share code, notes, and snippets.

@charleehu
Created March 27, 2015 08:16
Show Gist options
  • Save charleehu/a538ae8f237a3857e333 to your computer and use it in GitHub Desktop.
Save charleehu/a538ae8f237a3857e333 to your computer and use it in GitHub Desktop.
/**
* DFA敏感词过滤
* <pre>
* 五 = {
* isEnd = 0
* 星 = {
* isEnd = 0
* 红 = {
* isEnd = 0
* 旗 = { isEnd = 1 }
* }
* }
* }
* </pre>
* @author <a href="mailto:[email protected]">Xiaowei Hu</a>
* @version 1.0 Aug 6, 2014 10:29:45 AM
*/
public class SensitivewordFilter {
private static HashMap<Object, Object> sensitivewordMap;
static {
List<DictKeyWord> listSensitiveword = DictManager.KEY_WORD.getDictList();
initSensitivewordMap(listSensitiveword);
}
/**
* 构建敏感词树
*
* @param wordList
*/
@SuppressWarnings({ "rawtypes", "unchecked" })
private static void initSensitivewordMap(List<DictKeyWord> wordList) {
sensitivewordMap = new HashMap<>(wordList.size() / 2);
Map nowMap = null;
Map newWorMap = null;
for (DictKeyWord kw : wordList) {
nowMap = sensitivewordMap;
String word = kw.getKeyword();
if (word != null) {
word = word.trim();
for (int i = 0; i < word.length(); i++) {
char keyChar = word.charAt(i);
Object wordMap = nowMap.get(keyChar);
if (wordMap != null) {
nowMap = (Map) wordMap;
} else {
newWorMap = new HashMap(2);
newWorMap.put("isEnd", "0");
nowMap.put(keyChar, newWorMap);
nowMap = newWorMap;
}
if (i == word.length() - 1) {
nowMap.put("isEnd", "1");//置结束标志
}
}
}
}
}
/**
* 判断指定字符是否包含敏感词
*
* @param txt
* @return
*/
@SuppressWarnings("rawtypes")
public static boolean isContaintSensitiveWord(String txt) {
boolean flag = false; //敏感词结束标识位:用于敏感词只有1位的情况
char word = 0;
Map nowMap = sensitivewordMap;
a:for (int j = 0; j < txt.length(); j++) {
for(int i = j; i < txt.length() ; i++){
word = txt.charAt(i);
nowMap = (Map) nowMap.get(word); //获取指定key
if(nowMap != null){ //存在,则判断是否为最后一个
if("1".equals(nowMap.get("isEnd"))){ //如果为最后一个匹配规则,结束循环,返回匹配标识数
flag = true; //结束标志位为true
break a;//最小匹配
}
}
else {
nowMap = sensitivewordMap;
}
}
}
return flag;
}
public static void main(String[] args) {
System.out.println(isContaintSensitiveWord("小三"));
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment