Skip to content

Instantly share code, notes, and snippets.

@fitzyyf
Last active December 11, 2015 18:19
Show Gist options
  • Save fitzyyf/4640364 to your computer and use it in GitHub Desktop.
Save fitzyyf/4640364 to your computer and use it in GitHub Desktop.
Google Guava BloomFilter
/*
* Copyright (c) 2012-2013 www.iflytek.com. All Rights Reserved.
* This software for customer relationship management system, developed by Ifly@ZY team.
* Software code and design for the team, copy rights reserved.
*/
package com.ifkytek.irime;
import java.util.Set;
import com.google.common.base.Strings;
import com.google.common.collect.Sets;
import com.google.common.hash.BloomFilter;
import com.google.common.hash.Funnel;
import com.google.common.hash.PrimitiveSink;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
/**
* <p>
* 自定义Funner的布隆过滤器的处理.
* </p>
*
* @author poplar.yfyang
* @version 1.0 2013-01-26 11:36 AM
* @since JDK 1.5
*/
public class BlootFilterFunnelTest {
// email规则
private static final String EMAIL = "^[a-zA-Z0-9]+([_.]?[a-zA-Z0-9])*@([a-zA-Z0-9]+\\.)+[a-zA-Z0-9]{2,3}$";
/** 模拟数据容器 */
private Set<String> mail_list = Sets.newHashSet();
/** 不匹配的数据 */
private Set<String> no_mail_list = Sets.newHashSet();
/** 布隆过滤器 */
private BloomFilter<String> bloomFilter;
/**
* 初始化数据
*
* @throws Exception 测试异常
*/
@Before
public void setUp() throws Exception {
for (int i = 1; i <= 10000; i++) {
if (i % 2 == 0) {
no_mail_list.add("iflytek" + i);
} else {
mail_list.add("iflytek" + i + "@iflytek.com");
}
}
}
@Test
public void test_BlootFilter() throws Exception {
//创建过滤器
int size = mail_list.size();
//注意第二个参数
bloomFilter = BloomFilter.create(stringFunnel(), size);
addStoredStringToBloomFilter();
int falsePositiveCount = 0;
for (String s : no_mail_list) {
boolean mightContain = bloomFilter.mightContain(s);
if (!mightContain) {
System.out.println("no equal bloomFilter:" + s);
falsePositiveCount++;
}
}
Assert.assertEquals(falsePositiveCount,no_mail_list.size());
}
/** 将数据增加到布隆过滤器中 */
private void addStoredStringToBloomFilter() {
for (String email : mail_list) {
bloomFilter.put(email);
}
}
/**
* Returns a funnel that extracts the characters from a {@code CharSequence}.
*/
public static Funnel<String> stringFunnel() {
return StringFunnel.INSTANCE;
}
/** 自定义Funnel */
private enum StringFunnel implements Funnel<String> {
INSTANCE;
@Override
public void funnel(String from, PrimitiveSink into) {
//如果不是邮箱地址,则不进入Slink接收器
if (isEmail(from)) {
into.putString(from);
}
}
/**
* 验证字符串是否为合法的Email (Email格式是指:字母、数字、下划线与'@'和'.'的组合,'@'数量不得超过1;连续两位字符不能为'_'
* 或'@'与'.';不得以'@'或'.'或'_'开头;倒数第三位或第四位必须为'.')
*
* @param email 要验证的字符串
* @return 验证通过返回 true,否则返回 false
*/
public boolean isEmail(String email) {
return !Strings.isNullOrEmpty(email) && email.matches(EMAIL);
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment