Last active
March 23, 2016 23:07
-
-
Save aoking/f5967a65eda71bb7e72c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package p; | |
import java.io.IOException; | |
import java.util.ArrayList; | |
import java.util.Collections; | |
import java.util.Comparator; | |
import java.util.HashSet; | |
import java.util.List; | |
import java.util.Set; | |
import java.util.regex.Pattern; | |
import lombok.Getter; | |
import org.apache.commons.lang3.RandomStringUtils; | |
import org.apache.commons.lang3.RandomUtils; | |
public class SlowRegexPatternSearcher { | |
private static final int GENE_LENGTH = 20; // 対象文字列の長さ | |
private static final int ELITES_SIZE = 1000; // 現世代から次世代に残す遺伝子の数 | |
private static final int MUTATION_SIZE = 1000; // 突然変異として現れる文字列の個数 | |
private static final int POOL_SIZE = 10000; // 対象文字列の個数 | |
private static final Comparator<ScoredGene> SCORE_COMPARATOR = new Comparator<ScoredGene>() { | |
@Override | |
public int compare(ScoredGene o1, ScoredGene o2) { | |
return o2.getTime() - o1.getTime(); | |
} | |
}; | |
private final Pattern pattern; | |
public SlowRegexPatternSearcher(Pattern pattern) { | |
this.pattern = pattern; | |
} | |
private Set<String> getRandomGenes() { | |
Set<String> genes = new HashSet<>(); | |
// 初期化 | |
for (int i = 0; i < POOL_SIZE; i++) { | |
genes.add(newGene()); | |
} | |
return genes; | |
} | |
public void search() { | |
Set<String> genes = getRandomGenes(); // 初期遺伝子はランダムなものを使用する | |
int count = 0; | |
while (true) { | |
// 文字列それぞれについて計算時間を取得 | |
List<ScoredGene> scores = calculateScore(genes); | |
// マッチにかかった時間ごとにソート | |
Collections.sort(scores, SCORE_COMPARATOR); | |
// 先頭を表示 | |
System.out.println("generation " + count + ":\t" + scores.get(0)); | |
count++; | |
// 次世代の文字列を生成 | |
genes = genereateNextGenes(scores); | |
} | |
} | |
private Set<String> genereateNextGenes(List<ScoredGene> genes) { | |
// 次世代を生成する文字列群 | |
List<String> elites = new ArrayList<>(); | |
// エリートたちを取得 | |
for (ScoredGene gene : genes.subList(0, ELITES_SIZE)) { | |
elites.add(gene.getGene()); | |
} | |
// 突然変異としてランダムな文字列も入れる | |
for (int i = 0; i < MUTATION_SIZE; i++) { | |
elites.add(newGene()); | |
} | |
// 次世代の遺伝子群 | |
Set<String> next = new HashSet<>(); | |
next.addAll(elites); // エリート達はそのまま次世代に残る | |
// 既定のプールサイズになるまで遺伝子を生成し続ける | |
while (next.size() <= POOL_SIZE) { | |
String father = elites.get(RandomUtils.nextInt(0, elites.size())); | |
String mother = elites.get(RandomUtils.nextInt(0, elites.size())); | |
Set<String> children = breed(father, mother); | |
next.addAll(children); | |
} | |
return next; | |
} | |
// 一点交叉法で二つの子を生成 | |
private Set<String> breed(String father, String mother) { | |
int pivot = RandomUtils.nextInt(0, GENE_LENGTH); | |
String part1 = father.substring(0, pivot); | |
String part2 = mother.substring(pivot); | |
Set<String> children = new HashSet<>(); | |
children.add(part1 + part2); | |
children.add(part2 + part1); | |
return children; | |
} | |
private List<ScoredGene> calculateScore(Set<String> genes) { | |
List<ScoredGene> scores = new ArrayList<>(); | |
for (String gene : genes) { | |
scores.add(calclateScore(gene)); | |
} | |
return scores; | |
} | |
private ScoredGene calclateScore(String gene) { | |
long start = System.nanoTime(); | |
pattern.matcher(gene).matches(); | |
long end = System.nanoTime(); | |
return new ScoredGene(gene, (int) (end - start)); | |
} | |
private String newGene() { | |
return RandomStringUtils.random(GENE_LENGTH); | |
} | |
@Getter | |
class ScoredGene { | |
private final String gene; | |
private final int time; // nano sec | |
public ScoredGene(String str, int score) { | |
this.gene = str; | |
this.time = score; | |
} | |
@Override | |
public String toString() { | |
return gene + "\t" + (time / 1000) + "ms"; | |
} | |
} | |
public static void main(String[] args) throws IOException { | |
Pattern patten = Pattern.compile("(\\w|\\\\@|[-\\%+_!/=.]){1,64}@"); | |
SlowRegexPatternSearcher search = new SlowRegexPatternSearcher(patten); | |
search.search(); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment