Last active
July 20, 2018 11:55
-
-
Save anvol/a169bd93a581f942396f406b6065368c to your computer and use it in GitHub Desktop.
Knuth–Morris–Pratt algorithm modified to use with byte streams
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <stdint.h> | |
#include <string.h> | |
#include <stdlib.h> | |
typedef struct { | |
int32_t * lps; // longest proper prefix | |
uint16_t lps_size; // pattern size == lps size | |
char * pattern; | |
int32_t step; | |
uint8_t substring_found; // indicates that substring was found | |
} kmp_ctx; | |
int32_t *kmp_init(kmp_ctx *ctx, char *pattern, int psize); | |
uint8_t kmp_step(kmp_ctx *ctx, char c); | |
int32_t *kmp_init(kmp_ctx *ctx, char *pattern, int psize){ | |
if (ctx->lps) free(ctx->lps); | |
ctx->substring_found = 0; | |
ctx->lps_size = 0; | |
ctx->step = -1; | |
ctx->pattern = 0; | |
int step = -1; | |
int i = 1; | |
ctx->lps = malloc(sizeof(int32_t) * psize); | |
if (!ctx->lps) | |
return NULL; | |
ctx->lps_size = psize; | |
ctx->pattern = pattern; | |
ctx->lps[0] = step; | |
for (i = 1; i < psize; i++) { | |
while (step > -1 && pattern[step+1] != pattern[i]) | |
step = ctx->lps[step]; | |
if (pattern[i] == pattern[step+1]) | |
step++; | |
ctx->lps[i] = step; | |
} | |
return ctx->lps; | |
} | |
uint8_t kmp_step(kmp_ctx *ctx, char c){ | |
if (!ctx->lps) return 0; | |
if (ctx->substring_found) return 1; | |
while (ctx->step > -1 && ctx->pattern[ctx->step+1] != c) | |
ctx->step = ctx->lps[ctx->step]; | |
if (c == ctx->pattern[ctx->step+1]) | |
ctx->step++; | |
if (ctx->step == ctx->lps_size - 1) { | |
ctx->substring_found = 1; | |
} | |
return ctx->substring_found; | |
} | |
void main(){ | |
int i; | |
char * p = "test"; | |
char * s = "Hello, this is a test. Should return 1"; | |
kmp_ctx ctx; | |
ctx.lps = NULL; | |
kmp_init(&ctx, p, strlen(p)); | |
for (i = 0; i < strlen(s); i++) { | |
kmp_step(&ctx, s[i]); | |
printf("%c > %u\r\n", s[i], ctx.substring_found); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment