Skip to content

Instantly share code, notes, and snippets.

@rendon
Last active August 3, 2017 03:48
Show Gist options
  • Save rendon/bed43af1ac75430de74922c85a955d8a to your computer and use it in GitHub Desktop.
Save rendon/bed43af1ac75430de74922c85a955d8a to your computer and use it in GitHub Desktop.
A new implementation of the Aho-Corasick algorithm.
/* Copyright 2017 Rafael Rendón Pablo <[email protected]> */
// region Template
#include <bits/stdc++.h>
using namespace std;
typedef long long int64;
typedef unsigned long long uint64;
const double kEps = 10e-8;
const int kMax = 1000;
const int kInf = 1 << 30;
const int kFail = -1;
// endregion
int trie[kMax][26+1];
int fail[kMax];
vector<int> output[kMax];
int newState;
// Helper function to match paper notation
inline int g(int state, int a) {
return trie[state][a];
}
// Helper function to match paper notation
inline int f(int state) {
return fail[state];
}
void add(const string& word, int index) {
int state = 0;
for (int i = 0; i < int(word.length()); i++) {
int c = word[i] - 'a';
if (trie[state][c] == kFail) {
newState++;
trie[state][c] = newState;
state = newState;
} else {
state = trie[state][c];
}
}
output[state].push_back(index);
}
void init() {
memset(trie, kFail, sizeof trie);
memset(fail, kFail, sizeof fail);
for (int i = 0; i < kMax; i++) {
output[i].clear();
}
newState = 0;
}
void buildTrie(const vector<string>& words) {
for (int i = 0; i < int(words.size()); i++) {
add(words[i], i);
}
for (int a = 0; a < 26; a++) {
if (trie[0][a] == kFail) {
trie[0][a] = 0;
}
}
}
void buildFailFunction() {
queue<int> Q;
for (int a = 0; a < 26; a++) {
int s = trie[0][a];
if (s != 0) {
Q.push(s);
fail[s] = 0;
}
}
while (!Q.empty()) {
int r = Q.front();
Q.pop();
for (int a = 0; a < 26; a++) {
int s = trie[r][a];
if (s == kFail) {
continue;
}
Q.push(s);
int state = fail[r];
while (trie[state][a] == kFail) {
state = fail[state];
}
fail[5] = trie[2]['r']; // 8
// Merge output
for (int out : output[fail[s]]) {
output[s].push_back(out);
}
}
}
}
void countOccurrences(int tc, int n, const string& text) {
map<int, int> count;
int state = 0;
cout << "Case " << tc << ":\n";
for (char ch : text) {
int a = ch - 'a';
while (g(state, a) == kFail) {
state = f(state);
}
state = g(state, a);
for (int out : output[state]) {
count[out]++;
}
}
for (int i = 0; i < n; i++) {
cout << count[i] << "\n";
}
}
int main() {
ios::sync_with_stdio(false);
cin.tie(nullptr);
int tests;
cin >> tests;
for (int tc = 1; tc <= tests; tc++) {
int n;
string text;
cin >> n >> text;
vector<string> words(n);
for (string& word : words) {
cin >> word;
}
init();
buildTrie(words);
buildFailFunction();
countOccurrences(tc, n, text);
}
return EXIT_SUCCESS;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment