Last active
May 17, 2024 05:24
-
-
Save anthonyquizon/af97674f8ff621e80e02f9ad1fb74e9c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <string.h> | |
#include <assert.h> | |
#include <stdbool.h> | |
#include <stdlib.h> | |
#include <regex.h> | |
typedef int8_t i8; typedef uint8_t u8; typedef int16_t i16; typedef uint16_t u16; typedef int32_t i32; typedef uint32_t u32; typedef int64_t i64; typedef uint64_t u64; typedef double f64; typedef float f32; typedef u64 ux; | |
#define MAX_MATCHES 1024 | |
#define MAX_GROUPS 1024 | |
#define MAX_OUT 1024 | |
i32 regex_c(char* out, char* source, char* regex_str) { | |
regex_t regex_compiled; | |
regmatch_t group_arr[MAX_GROUPS]; | |
u64 m=0; | |
i32 out_offs=0; | |
char* cursor=source; | |
i32 out_n=0; | |
memset(out, '\0', sizeof(char) * MAX_OUT); | |
if (regcomp(®ex_compiled, regex_str, REG_EXTENDED)) { | |
printf("Could not compile regular expression.\n"); | |
return 0; | |
}; | |
while (out_n<MAX_MATCHES) { | |
i32 r=regexec(®ex_compiled, cursor, MAX_GROUPS, group_arr, 0); | |
u32 offset=0; | |
if (r == REG_NOMATCH) { break; } | |
for (u32 g=0; g<MAX_GROUPS; g++) { | |
if (group_arr[g].rm_so == (size_t)-1) { break; } /* No more groups */ | |
if (g == 0) { offset = group_arr[g].rm_eo; } | |
i32 n= group_arr[g].rm_eo-group_arr[g].rm_so; | |
strncpy(out + out_offs, cursor+ group_arr[g].rm_so, n); | |
out[out_offs+n] = 0; | |
out_offs+=n + 1; | |
out_n++; | |
} | |
cursor += offset; | |
} | |
regfree(®ex_compiled); | |
return out_n; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
lib.so: lib.c | |
gcc -shared lib.c -o lib.so |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
regex_c ← "./lib.so" •FFI ⟨"i64","regex_c", "&u8:i8", "*u8:c8", "*u8:c8"⟩ | |
Regex←{ | |
⟨n, ch⟩← Regex_C ⟨1024⥊0, 𝕩∾@+0, 𝕨∾@+0⟩ | |
@⊸+¨(ch=0)(⊢-˜(+`׬))⊸⊔ch | |
} | |
•Show "[a-z]+" Regex "hello abc world!" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment