Created
January 17, 2020 07:41
-
-
Save mythagel/f87ea955f9ae891ac7225a8cf37b4e34 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// (c) 2020 Nicholas Gill GPLv3 | |
#include <cstdio> | |
#include <cstdint> | |
#include <cstring> | |
template <typename CharT, unsigned N> | |
class KMPStream | |
{ | |
public: | |
bool begin(const CharT* search, size_t size) | |
{ | |
reset(); | |
return buildTable(search, size); | |
} | |
// Returns current partial match count, matches at ABSOLUTE stream index from last reset() | |
int push(const CharT* buf, size_t size) | |
{ | |
size_t off = absPos; | |
auto pos = [off, this] { return absPos - off; }; | |
while (pos() != size) | |
{ | |
while (matchIndex > -1 && search[matchIndex] != buf[pos()]) | |
matchIndex = pmTable[matchIndex]; | |
++matchIndex; | |
++absPos; | |
if (matchIndex >= pmTableSize) | |
{ | |
printf("MATCH@%ld\n", absPos - matchIndex); | |
matchIndex = pmTable[matchIndex]; | |
} | |
} | |
return matchIndex; | |
} | |
// Reset position - safe when no current partial match | |
void reset() | |
{ | |
absPos = 0; | |
matchIndex = 0; | |
} | |
// classic KMP (debugging) | |
/* void match(const CharT* buf, size_t size) const | |
{ | |
size_t pos = 0; | |
int matchIndex = 0; | |
while (pos != size) | |
{ | |
while (matchIndex > -1 && search[matchIndex] != buf[pos]) | |
matchIndex = pmTable[matchIndex]; | |
++matchIndex; | |
++pos; | |
if (matchIndex >= pmTableSize) | |
{ | |
printf("MATCH@%lu\n", pos - matchIndex); | |
matchIndex = pmTable[matchIndex]; | |
} | |
} | |
}*/ | |
private: | |
bool buildTable(const CharT* buf, size_t size) | |
{ | |
if (size > N) return false; | |
pmTable[0] = -1; | |
pmTableSize = size; | |
size_t pos = 0; | |
int i = -1; | |
while (pos != pmTableSize) | |
{ | |
search[pos] = buf[pos]; | |
while (i > -1 && buf[pos] != buf[i]) | |
i = pmTable[i]; | |
++pos; | |
++i; | |
pmTable[pos] = (buf[pos] == buf[i]) ? pmTable[i] : i; | |
} | |
return true; | |
} | |
private: | |
int pmTable[N]; | |
size_t pmTableSize; | |
CharT search[N]; | |
// State | |
size_t absPos; | |
int matchIndex; | |
}; | |
int main() | |
{ | |
const char* needle = "ABCDABD"; | |
KMPStream<char, 70> kmp; | |
kmp.begin(needle, strlen(needle)); | |
const char* haystack[] = {"ABC ABCDAB ABCD", "AB", "DABDE"}; | |
for (auto & c : haystack) | |
{ | |
int i = kmp.push(c, strlen(c)); | |
// if (i == 0) kmp.reset(); // Can reset mid stream if no match in progress | |
printf("i: %d\n", i); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment