Skip to content

Instantly share code, notes, and snippets.

@cacharle
Created October 26, 2024 07:54
Show Gist options
  • Save cacharle/4b0b4244eff7361ab56147668cbb9100 to your computer and use it in GitHub Desktop.
Save cacharle/4b0b4244eff7361ab56147668cbb9100 to your computer and use it in GitHub Desktop.
Levenshtein distance benchmark
#define _POSIX_C_SOURCE 200809L
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <string.h>
#include <time.h>
int min(int a, int b, int c)
{
if (a < b && a < c)
return a;
else if (b < a && b < c)
return b;
else
return c;
}
int lev_naive(char *s1, char *s2)
{
if (*s1 == '\0')
return strlen(s2);
if (*s2 == '\0')
return strlen(s1);
if (*s1 == *s2)
return lev_naive(s1 + 1, s2 + 1);
return 1 + min(
lev_naive(s1, s2 + 1),
lev_naive(s1 + 1, s2),
lev_naive(s1 + 1, s2 + 1)
);
}
int lev(char *s1, char *s2)
{
static int diffs[128][128] = {{0}};
memset(diffs, 0, sizeof diffs);
int s1_len, i;
for (i = 1; s1[i-1] != '\0'; i++)
diffs[i][0] = i;
s1_len = i;
int s2_len, j;
for (j = 1; s2[j-1] != '\0'; j++)
diffs[0][j] = j;
s2_len = j;
for (int j = 1; s2[j-1] != '\0'; j++)
{
for (int i = 1; s1[i-1] != '\0'; i++)
{
int substitution_cost = s1[i] == s2[j] ? 0 : 1;
diffs[i][j] = min(
diffs[i-1][j] + 1,
diffs[i][j-1] + 1,
diffs[i-1][j-1] + substitution_cost
);
}
}
return diffs[s1_len - 1][s2_len - 1];
}
int main(void)
{
struct timespec start, end;
FILE *f = fopen("/usr/share/dict/words", "r");
assert(f != NULL);
char *target = "house";
// int dist = lev(target, "housf");
// printf("%s: %d\n", target, dist);
// return 0;
char *line = NULL;
size_t line_size = 0;
ssize_t read_size;
clock_gettime(CLOCK_MONOTONIC, &start);
while ((read_size = getline(&line, &line_size, f)) != -1)
{
line[read_size - 1] = '\0';
// int dist = lev_naive(target, line);
int dist = lev(target, line);
// printf("%s: %d\n", line, dist);
}
clock_gettime(CLOCK_MONOTONIC, &end);
free(line);
fclose(f);
time_t delta_seconds = end.tv_sec - start.tv_sec;
long long int delta_ns = end.tv_nsec - start.tv_nsec;
if (delta_ns < 0)
{
delta_seconds -= 1;
delta_ns += 1000000000;
}
printf("took: %zus %lldms\n", delta_seconds, delta_ns / 1'000'000);
return 0;
}
~/git/tried master*
❯ gcc -std=c23 -O3 t.c && ./a.out # <-- cache distance matrix
took: 0s 107ms
~/git/tried master*
❯ gcc -std=c23 -O3 t.c && ./a.out # <-- naive
took: 8s 8ms
~/git/tried master* 8s
❯ wc -l /usr/share/dict/words
123985 /usr/share/dict/words
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment