Skip to content

Instantly share code, notes, and snippets.

@aoking
Last active March 23, 2016 23:07
Show Gist options
  • Save aoking/f5967a65eda71bb7e72c to your computer and use it in GitHub Desktop.
Save aoking/f5967a65eda71bb7e72c to your computer and use it in GitHub Desktop.
package p;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.regex.Pattern;
import lombok.Getter;
import org.apache.commons.lang3.RandomStringUtils;
import org.apache.commons.lang3.RandomUtils;
public class SlowRegexPatternSearcher {
private static final int GENE_LENGTH = 20; // 対象文字列の長さ
private static final int ELITES_SIZE = 1000; // 現世代から次世代に残す遺伝子の数
private static final int MUTATION_SIZE = 1000; // 突然変異として現れる文字列の個数
private static final int POOL_SIZE = 10000; // 対象文字列の個数
private static final Comparator<ScoredGene> SCORE_COMPARATOR = new Comparator<ScoredGene>() {
@Override
public int compare(ScoredGene o1, ScoredGene o2) {
return o2.getTime() - o1.getTime();
}
};
private final Pattern pattern;
public SlowRegexPatternSearcher(Pattern pattern) {
this.pattern = pattern;
}
private Set<String> getRandomGenes() {
Set<String> genes = new HashSet<>();
// 初期化
for (int i = 0; i < POOL_SIZE; i++) {
genes.add(newGene());
}
return genes;
}
public void search() {
Set<String> genes = getRandomGenes(); // 初期遺伝子はランダムなものを使用する
int count = 0;
while (true) {
// 文字列それぞれについて計算時間を取得
List<ScoredGene> scores = calculateScore(genes);
// マッチにかかった時間ごとにソート
Collections.sort(scores, SCORE_COMPARATOR);
// 先頭を表示
System.out.println("generation " + count + ":\t" + scores.get(0));
count++;
// 次世代の文字列を生成
genes = genereateNextGenes(scores);
}
}
private Set<String> genereateNextGenes(List<ScoredGene> genes) {
// 次世代を生成する文字列群
List<String> elites = new ArrayList<>();
// エリートたちを取得
for (ScoredGene gene : genes.subList(0, ELITES_SIZE)) {
elites.add(gene.getGene());
}
// 突然変異としてランダムな文字列も入れる
for (int i = 0; i < MUTATION_SIZE; i++) {
elites.add(newGene());
}
// 次世代の遺伝子群
Set<String> next = new HashSet<>();
next.addAll(elites); // エリート達はそのまま次世代に残る
// 既定のプールサイズになるまで遺伝子を生成し続ける
while (next.size() <= POOL_SIZE) {
String father = elites.get(RandomUtils.nextInt(0, elites.size()));
String mother = elites.get(RandomUtils.nextInt(0, elites.size()));
Set<String> children = breed(father, mother);
next.addAll(children);
}
return next;
}
// 一点交叉法で二つの子を生成
private Set<String> breed(String father, String mother) {
int pivot = RandomUtils.nextInt(0, GENE_LENGTH);
String part1 = father.substring(0, pivot);
String part2 = mother.substring(pivot);
Set<String> children = new HashSet<>();
children.add(part1 + part2);
children.add(part2 + part1);
return children;
}
private List<ScoredGene> calculateScore(Set<String> genes) {
List<ScoredGene> scores = new ArrayList<>();
for (String gene : genes) {
scores.add(calclateScore(gene));
}
return scores;
}
private ScoredGene calclateScore(String gene) {
long start = System.nanoTime();
pattern.matcher(gene).matches();
long end = System.nanoTime();
return new ScoredGene(gene, (int) (end - start));
}
private String newGene() {
return RandomStringUtils.random(GENE_LENGTH);
}
@Getter
class ScoredGene {
private final String gene;
private final int time; // nano sec
public ScoredGene(String str, int score) {
this.gene = str;
this.time = score;
}
@Override
public String toString() {
return gene + "\t" + (time / 1000) + "ms";
}
}
public static void main(String[] args) throws IOException {
Pattern patten = Pattern.compile("(\\w|\\\\@|[-\\%+_!/=.]){1,64}@");
SlowRegexPatternSearcher search = new SlowRegexPatternSearcher(patten);
search.search();
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment