Created
November 1, 2025 00:39
-
-
Save monyone/a8a5b12a96b219eb6058a38adce37432 to your computer and use it in GitHub Desktop.
ダブル配列のLeftmost-Longest-AC法
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| class Trie { | |
| public readonly parent: Trie | null = null; | |
| private goto: Map<number, Trie> = new Map<number, Trie>(); | |
| public constructor(parent?: Trie) { | |
| this.parent = parent ?? null; | |
| } | |
| public has(s: number): boolean { | |
| return this.goto.has(s); | |
| } | |
| public set(s: number, next: Trie) { | |
| return this.goto.set(s, next); | |
| } | |
| public go(s: number) { | |
| return this.goto.get(s); | |
| } | |
| public keys(): Iterable<number> { | |
| return this.goto.keys(); | |
| } | |
| } | |
| class DoubleArray { | |
| private code: Map<string, number>; | |
| private base: number[] = [0]; | |
| private check: number[] = [-1]; | |
| private failure: number[] = [-1]; | |
| private keyword: (string | null)[] = [null]; | |
| public constructor(keywords: string[]) { | |
| const set = new Set<string>(); | |
| for (const keyword of keywords) { | |
| for (let i = 0; i < keyword.length; i++) { | |
| const character = keyword[i]; | |
| set.add(character); | |
| } | |
| } | |
| this.code = new Map<string, number>(Array.from(set.values()).map((v, i) => [v, i + 1])); | |
| const unique = new Set(keywords); | |
| const words = Array.from(unique.values()).map((keyword) => keyword.split('').map((character) => this.code.get(character)!)); | |
| // construct Trie | |
| const root = new Trie(); | |
| for (const word of words) { | |
| let node = root; | |
| for (const character of word) { | |
| if (!node.has(character)) { | |
| node.set(character, new Trie(node)); | |
| } | |
| node = node.go(character)!; | |
| } | |
| } | |
| // construct Double Array | |
| { | |
| let left = 0; | |
| const queue: [number, Trie][] = [[0, root]]; | |
| while (queue.length > 0) { | |
| const [node, trie] = queue.shift()!; | |
| const leafs = Array.from(trie.keys()); | |
| const max_leaf = leafs.reduce((a, b) => Math.max(a, b), 0); | |
| let offset = left; | |
| while (offset < this.check.length && this.check[offset] >= 0) { offset++; } | |
| LOOP: | |
| while (true) { | |
| for (const leaf of leafs) { | |
| const next = offset + leaf; | |
| if (next < this.check.length && this.check[next] >= 0) { | |
| offset = next + 1; | |
| continue LOOP; | |
| } | |
| } | |
| break; | |
| } | |
| this.base[node] = offset - node; | |
| left = offset; | |
| const max = offset + max_leaf; | |
| while (this.base.length <= max) { this.base.push(0); } | |
| while (this.check.length <= max) { this.check.push(-1); } | |
| while (this.keyword.length <= max) { this.keyword.push(null); } | |
| while (this.failure.length <= max) { this.failure.push(-1); } | |
| for (const leaf of leafs) { | |
| const next = offset + leaf; | |
| this.check[next] = node; | |
| queue.push([next, trie.go(leaf)!]); | |
| } | |
| } | |
| } | |
| // Register Keyword | |
| { | |
| for (const keyword of keywords) { | |
| let node = 0; | |
| for (const character of keyword) { | |
| node += this.base[node] + this.code.get(character)! | |
| } | |
| this.keyword[node] = keyword; | |
| } | |
| } | |
| // Build Failure | |
| { | |
| const queue: [number, Trie][] = [[0, root]]; | |
| while (queue.length > 0) { | |
| const [parent, trie] = queue.shift()!; | |
| const leafs = Array.from(trie.keys()); | |
| for (const leaf of leafs) { | |
| const node = parent + this.base[parent] + leaf; | |
| let next = -1, failure = this.failure[parent]; | |
| if (this.keyword[node] == null) { | |
| while (failure >= 0 && this.check[(failure + this.base[failure] + leaf)] !== failure) { | |
| failure = this.failure[failure]; | |
| } | |
| next = failure < 0 ? -1 : (failure + this.base[failure] + leaf); | |
| } | |
| this.failure[node] = next < 0 ? 0 : this.check[next] !== failure ? 0 : next; | |
| if (this.keyword[node] == null) { | |
| this.keyword[node] = this.keyword[this.failure[node]]; | |
| } | |
| queue.push([node, trie.go(leaf)!]); | |
| } | |
| } | |
| } | |
| } | |
| public go(node: number, character: string): number { | |
| const code = this.code.get(character); | |
| if (code == null) { return 0; } | |
| while (node >= 0) { | |
| const next = node + this.base[node] + code; | |
| if (this.check[next] === node) { return next; } | |
| node = this.failure[node]; | |
| } | |
| return Math.max(node, 0); | |
| } | |
| public value(node: number): string | null { | |
| return this.keyword[node]; | |
| } | |
| } | |
| export class AhoCorasick { | |
| private trie: DoubleArray; | |
| constructor(keywords: string[]) { | |
| this.trie = new DoubleArray(keywords); | |
| } | |
| public matchInText(text: string): { begin: number, end: number, keyword: string }[] { | |
| const candidates: { begin: number, end: number, keyword: string }[] = []; | |
| let node = 0; | |
| for (let i = 0; i < text.length; i++) { | |
| if (this.trie.value(node) != null) { | |
| const keyword = this.trie.value(node)!; | |
| const length = keyword.length; | |
| const begin = i - length; | |
| const end = i; | |
| while (true) { | |
| if (candidates.length === 0) { | |
| candidates.push({ begin, end, keyword }) | |
| break; | |
| } | |
| const stack = candidates.length - 1; | |
| if (candidates[candidates.length - 1].end <= begin) { | |
| candidates.push({ begin, end, keyword }); | |
| break; | |
| } else if (begin > candidates[stack].begin) { | |
| break; | |
| } else { | |
| candidates.pop(); | |
| } | |
| } | |
| } | |
| const ch = text[i]; | |
| node = this.trie.go(node, ch); | |
| } | |
| if (this.trie.value(node) != null) { | |
| const keyword = this.trie.value(node)!; | |
| const length = keyword.length; | |
| const begin = text.length - length; | |
| const end = text.length; | |
| while (true) { | |
| if (candidates.length === 0) { | |
| candidates.push({ begin, end, keyword }) | |
| break; | |
| } | |
| const stack = candidates.length - 1; | |
| if (candidates[candidates.length - 1].end <= begin) { | |
| candidates.push({ begin, end, keyword }); | |
| break; | |
| } else if (begin > candidates[stack].begin) { | |
| break; | |
| } else { | |
| candidates.pop(); | |
| } | |
| } | |
| } | |
| return candidates; | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment