|
import java.io.BufferedReader; |
|
import java.io.BufferedWriter; |
|
import java.io.File; |
|
import java.io.FileReader; |
|
import java.io.FileWriter; |
|
import java.io.PrintWriter; |
|
import java.util.BitSet; |
|
|
|
public class PhoneNumber { |
|
public static void main(String[] args) throws Exception { |
|
File f = new File("c:\\abc.txt"); |
|
BufferedReader r = new BufferedReader(new FileReader(f), |
|
1024 * 1024 * 10); |
|
PrintWriter w = new PrintWriter(new BufferedWriter(new FileWriter( |
|
"c:\\def.txt"), 1024 * 1024 * 10)); |
|
BitSet[] bss = new BitSet[10]; |
|
try { |
|
String line = null; |
|
int i = 0; |
|
int[] phone = { -1, -1 }, index = { -1, -1 }; |
|
while ((line = r.readLine()) != null /* && i++ < 100 */) { |
|
parsePhone(line, phone); |
|
if (phone[0] >= 0) { |
|
offset(phone, index); |
|
if (index != null && index[1] >= 0) { |
|
BitSet bs = bss[index[0]]; |
|
if (bs == null) { |
|
bs = new BitSet(10 * 10000); // 可以尝试优化这里的初始化大小 |
|
bss[index[0]] = bs; |
|
} |
|
if (!bs.get(index[1])) { |
|
bs.set(index[1]); |
|
w.println(phone[0] + "" + phone[1]); |
|
} |
|
} |
|
} |
|
} |
|
} finally { |
|
r.close(); |
|
w.close(); |
|
} |
|
} |
|
|
|
/** |
|
* 偏移号码到int型可表示的范围内 |
|
* |
|
* @param phone |
|
* @param index |
|
*/ |
|
static void offset(int[] phone, int[] index) { |
|
int offset = -1, i = -1; |
|
if (130 <= phone[0] && phone[0] <= 139) { |
|
i = 0; |
|
offset = phone[0] - 130; // 0, 0-9 |
|
} else if (150 <= phone[0] && phone[0] <= 159) { |
|
i = 0; |
|
offset = phone[0] - 140; // 0, 10-19 |
|
} else if (180 <= phone[0] && phone[0] <= 189) { |
|
i = 1; |
|
offset = phone[0] - 180; // 1, 0-9 |
|
} |
|
// offset 在0-20之间 包括20 |
|
// i 在上面BitSet[]的长度之内 |
|
// 上面的代码可以改写成switch以提高速度 |
|
index[0] = i; |
|
index[1] = offset * 100000000 + phone[1]; |
|
} |
|
|
|
/** |
|
* 138 1234 5678 => 138,12345678 |
|
* |
|
* @param l |
|
* @param phone |
|
* @return |
|
*/ |
|
static void parsePhone(String l, int[] phone) { |
|
int p1 = 0, p2 = 0; |
|
if (l.length() == 11) { |
|
// 进一步解析需要正则,速度会慢 |
|
try { |
|
p1 = Integer.parseInt(l.substring(0, 3), 10); |
|
p2 = Integer.parseInt(l.substring(3, 11), 10); |
|
} catch (NumberFormatException e) { |
|
} |
|
} |
|
if (p1 != 0 && p2 != 0) { |
|
phone[0] = p1; |
|
phone[1] = p2; |
|
} else { |
|
phone[0] = -1; |
|
} |
|
} |
|
} |
还可以对offset优化 不使用* 而使用位运算, offset可以表示5位2进制长度的数据, 即0-31