Created
March 27, 2015 08:16
-
-
Save charleehu/a538ae8f237a3857e333 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* DFA敏感词过滤 | |
* <pre> | |
* 五 = { | |
* isEnd = 0 | |
* 星 = { | |
* isEnd = 0 | |
* 红 = { | |
* isEnd = 0 | |
* 旗 = { isEnd = 1 } | |
* } | |
* } | |
* } | |
* </pre> | |
* @author <a href="mailto:[email protected]">Xiaowei Hu</a> | |
* @version 1.0 Aug 6, 2014 10:29:45 AM | |
*/ | |
public class SensitivewordFilter { | |
private static HashMap<Object, Object> sensitivewordMap; | |
static { | |
List<DictKeyWord> listSensitiveword = DictManager.KEY_WORD.getDictList(); | |
initSensitivewordMap(listSensitiveword); | |
} | |
/** | |
* 构建敏感词树 | |
* | |
* @param wordList | |
*/ | |
@SuppressWarnings({ "rawtypes", "unchecked" }) | |
private static void initSensitivewordMap(List<DictKeyWord> wordList) { | |
sensitivewordMap = new HashMap<>(wordList.size() / 2); | |
Map nowMap = null; | |
Map newWorMap = null; | |
for (DictKeyWord kw : wordList) { | |
nowMap = sensitivewordMap; | |
String word = kw.getKeyword(); | |
if (word != null) { | |
word = word.trim(); | |
for (int i = 0; i < word.length(); i++) { | |
char keyChar = word.charAt(i); | |
Object wordMap = nowMap.get(keyChar); | |
if (wordMap != null) { | |
nowMap = (Map) wordMap; | |
} else { | |
newWorMap = new HashMap(2); | |
newWorMap.put("isEnd", "0"); | |
nowMap.put(keyChar, newWorMap); | |
nowMap = newWorMap; | |
} | |
if (i == word.length() - 1) { | |
nowMap.put("isEnd", "1");//置结束标志 | |
} | |
} | |
} | |
} | |
} | |
/** | |
* 判断指定字符是否包含敏感词 | |
* | |
* @param txt | |
* @return | |
*/ | |
@SuppressWarnings("rawtypes") | |
public static boolean isContaintSensitiveWord(String txt) { | |
boolean flag = false; //敏感词结束标识位:用于敏感词只有1位的情况 | |
char word = 0; | |
Map nowMap = sensitivewordMap; | |
a:for (int j = 0; j < txt.length(); j++) { | |
for(int i = j; i < txt.length() ; i++){ | |
word = txt.charAt(i); | |
nowMap = (Map) nowMap.get(word); //获取指定key | |
if(nowMap != null){ //存在,则判断是否为最后一个 | |
if("1".equals(nowMap.get("isEnd"))){ //如果为最后一个匹配规则,结束循环,返回匹配标识数 | |
flag = true; //结束标志位为true | |
break a;//最小匹配 | |
} | |
} | |
else { | |
nowMap = sensitivewordMap; | |
} | |
} | |
} | |
return flag; | |
} | |
public static void main(String[] args) { | |
System.out.println(isContaintSensitiveWord("小三")); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment