Last active
January 12, 2017 03:39
-
-
Save yangl/173a90e3141d5a43394c24efbf11d995 to your computer and use it in GitHub Desktop.
根据ipip.net提供的手机号码归属地生成客户端本地数据文件,供通讯录快速匹配数据,原来数据14.2M压缩后436k
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package com.uxin.feerate.util; | |
import com.google.common.base.Charsets; | |
import com.google.common.collect.Maps; | |
import com.google.common.collect.Sets; | |
import com.google.common.io.Files; | |
import io.netty.buffer.ByteBuf; | |
import io.netty.buffer.Unpooled; | |
import lombok.extern.slf4j.Slf4j; | |
import org.apache.commons.lang3.StringUtils; | |
import java.io.File; | |
import java.io.IOException; | |
import java.util.List; | |
import java.util.Map; | |
import java.util.Set; | |
/** | |
* 根据ipip.net提供的手机号码归属地生成客户端本地数据文件,供通讯录快速匹配数据 | |
* | |
* @author YANGLiiN | |
* @date 2016-09-25 10:25 | |
*/ | |
@Slf4j | |
public class Phones2 { | |
public static Map<Integer, Integer> phoneAreaIds = Maps.newTreeMap(); | |
public static Map<Integer, String> idAreas = Maps.newTreeMap(); | |
// 根据手机号查询归属地 | |
public static String findAreaByPhone(String phone) throws Exception { | |
String area = null; | |
if (StringUtils.isBlank(phone) || StringUtils.length(phone) != 11) { | |
return area; | |
} | |
String p = StringUtils.substring(phone, 0, 7); | |
int searchPhone = Integer.parseInt(p); | |
// 由于相连的数据只保存了最开始的那条数据,固没有就要往上查找 | |
// 归属地id从1开始 | |
Integer areaId = -1; | |
int findDepth = -1; | |
for (int i = 0; i < 1000; i++) { | |
areaId = phoneAreaIds.get(searchPhone - i); | |
findDepth = i; | |
if (areaId != null && areaId.intValue() > 0) { | |
break; | |
} | |
} | |
area = idAreas.get(areaId); | |
log.debug("该号码查询深度:{}该号码[{}]查询归属地为:{}", findDepth, phone, area); | |
return area; | |
} | |
// 建议在系统启动的时候把归属地数据加载至内存 | |
// @PostConstruct | |
public static void loadData(String dataFilePath) { | |
// 以下可在系统启动时写入static变量,后边一直使 | |
final File d = new File(dataFilePath); | |
byte[] data = new byte[0]; | |
try { | |
data = Files.toByteArray(d); | |
} | |
catch (IOException e) { | |
log.error("--加载归属地数据出错了--", e); | |
} | |
ByteBuf buf = Unpooled.copiedBuffer(data); | |
int phoneLength = buf.readInt(); | |
while (buf.isReadable()) { | |
if (buf.readerIndex() < phoneLength + 4) { | |
int phone = buf.readMedium(); | |
int areaId = buf.readShort(); | |
phoneAreaIds.put(phone, areaId); | |
} else { | |
int aredId = buf.readShort(); | |
int length = buf.readByte(); | |
byte[] abs = new byte[length]; | |
buf.readBytes(abs); | |
// ByteBufUtil.getBytes()方法在Netty 4.1才有的 | |
// byte[] aredBytes = ByteBufUtil.getBytes(buf, buf.readerIndex(), length); | |
// buf.readerIndex(buf.readerIndex() + length); | |
String area = new String(abs, Charsets.UTF_8); | |
idAreas.put(aredId, area); | |
} | |
} | |
} | |
public static void convertData(String source, String target) throws IOException { | |
final File f = new File(source); | |
final File d = new File(target); | |
Set<String> areaSet = Sets.newTreeSet(); | |
Map<String, String> phoneAreas = Maps.newTreeMap(); | |
Map<String, Integer> areaIds = Maps.newTreeMap(); | |
Map<Integer, String> idAreas = Maps.newTreeMap(); | |
Map<String, Integer> phoneAreaIds = Maps.newTreeMap(); | |
List<String> lines = Files.readLines(f, Charsets.UTF_8); | |
// 1.手机号-归属地名称、归属地列表 | |
// 注意一定要使用key有序的TreeMap | |
String pre = null; | |
// 最大查询深度,在查询的使用 | |
int findDepth = 0; | |
int sameDepth = 0; | |
for (String s : lines) { | |
String[] ll = StringUtils.split(s); | |
if (ll != null && ll.length >= 3) { | |
// 1300022 上海 上海 中国联通网络 | |
String phone = ll[0]; | |
String province = ll[1]; | |
String city = ll[2]; | |
String area; | |
if (StringUtils.equals(province, city)) { | |
area = city; | |
} else { | |
area = province + city; | |
} | |
// 当前这条数据的归属地和前一条不同才保存 | |
if (!StringUtils.equals(pre, area)) { | |
phoneAreas.put(phone, area); | |
findDepth = Math.max(findDepth, sameDepth); | |
sameDepth = 0; | |
} else { | |
sameDepth += 1; | |
} | |
areaSet.add(area); | |
pre = area; | |
} | |
} | |
System.out.println("最大查询深度:" + findDepth); | |
// 2.归属地编码-归属地名称、归属地名称-归属地编码 | |
String[] areas = areaSet.toArray(new String[areaSet.size()]); | |
for (int i = 0; i < areas.length; i++) { | |
String area = areas[i]; | |
// 归属地编码从1开始 | |
int areaId = i + 1; | |
idAreas.put(areaId, area); | |
areaIds.put(area, areaId); | |
} | |
// 3.手机号-归属地编码 | |
for (Map.Entry<String, String> e : phoneAreas.entrySet()) { | |
String phone = e.getKey(); | |
String area = e.getValue(); | |
int areaId = areaIds.get(area); | |
phoneAreaIds.put(phone, areaId); | |
} | |
// 4.手机号-归属地编码、归属地编码-归属地名称 写入最终数据文件 | |
ByteBuf pBuf = Unpooled.directBuffer(); | |
ByteBuf aBuf = Unpooled.directBuffer(); | |
for (Map.Entry<String, Integer> e : phoneAreaIds.entrySet()) { | |
// 3位号码前缀、2位归属地编号 | |
pBuf.writeMedium(Integer.parseInt(e.getKey())).writeShort(e.getValue()); | |
} | |
for (Map.Entry<Integer, String> e : idAreas.entrySet()) { | |
byte[] v = e.getValue().getBytes(Charsets.UTF_8); | |
// 2位归属地编号、1位归属地编号长度、bytes字节归属地名称 | |
aBuf.writeShort(e.getKey()).writeByte(v.length).writeBytes(v); | |
} | |
// 由于初始固定长度,切掉没有使用的数据 | |
pBuf.capacity(pBuf.writerIndex()); | |
aBuf.capacity(aBuf.writerIndex()); | |
// 前4位保存 手机号-归属地编码 一共的长度 | |
// 然后bytes写入 手机号-归属地编码 数据 | |
// 然后bytes写入 归属地编码-归属地名称 数据 | |
ByteBuf p = Unpooled.buffer(); | |
p.writeInt(pBuf.writerIndex()); | |
p.writeBytes(pBuf); | |
p.writeBytes(aBuf); | |
// 由于初始固定长度,切掉没有使用的数据 | |
p.capacity(p.writerIndex()); | |
Files.write(p.array(), d); | |
} | |
public static void main(String[] args) throws Exception { | |
convertData("/data/conf/phone_number_2016-08-11.txt", "/data/conf/phones.dat"); | |
loadData("/data/conf/phones.dat"); | |
System.out.println(findAreaByPhone("18117008955")); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment