Created
November 7, 2025 01:57
-
-
Save matrixcascade/89f9f4dae47b2cf6400aadffb26409de to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #include "PainterEngine.h" | |
| #include <stdio.h> | |
| #include <string.h> | |
| #include <stdlib.h> | |
| #include <immintrin.h> | |
| #define ALPHABET_SIZE 256 | |
| #define MAX_PATTERN_LEN 1024 | |
| px_dword NativeElapsed; | |
| px_dword opNativeElapsed; | |
| px_dword KMPElapsed; | |
| px_dword BMElapsed; | |
| px_dword BMHElapsed; | |
| px_dword SundayElapsed; | |
| px_dword RabinKarpElapsed; | |
| px_dword TwoWayElapsed; | |
| px_dword dummy; | |
| const px_char* content; | |
| px_int content_length, c1, c2; | |
| px_char abstract_string[1024] = { 0 }; | |
| px_int abstract_string_length = 0; | |
| /******************************** | |
| * 1. Naive Search(朴素匹配) | |
| ********************************/ | |
| static inline int ctz32(unsigned x) { | |
| unsigned long idx; | |
| _BitScanForward(&idx, x); | |
| return (int)idx; | |
| } | |
| int find_op_naive(const char src[], const char pattern[]) { | |
| if (!src || !pattern) return -1; | |
| size_t n = content_length; | |
| size_t m = abstract_string_length; | |
| if (m == 0 || n < m) return -1; | |
| __m256i first = _mm256_set1_epi8(pattern[0]); | |
| for (size_t i = 0; i + 32 <= n; i += 32) { | |
| __m256i block = _mm256_loadu_si256((__m256i*)(src + i)); | |
| __m256i cmp = _mm256_cmpeq_epi8(block, first); | |
| unsigned mask = _mm256_movemask_epi8(cmp); | |
| while (mask != 0) { | |
| int bit = ctz32(mask); | |
| size_t pos = i + bit; | |
| if (pos + m <= n && memcmp(src + pos, pattern, m) == 0) | |
| return (int)pos; | |
| mask &= mask - 1; | |
| } | |
| } | |
| for (size_t i = n - (n % 32); i < n; ++i) { | |
| if (src[i] == pattern[0]) { | |
| if (i + m <= n && memcmp(src + i, pattern, m) == 0) | |
| return (int)i; | |
| } | |
| } | |
| return -1; | |
| } | |
| int find_naive(const char src[], const char pattern[]) { | |
| int i, j; | |
| for (i = 0; src[i]; i++) | |
| { | |
| for (j = 0; j < abstract_string_length; j++) | |
| { | |
| if (src[i + j] != pattern[j]) | |
| break; | |
| } | |
| if (j == abstract_string_length) | |
| return i; | |
| } | |
| return -1; | |
| } | |
| /******************************** | |
| * 2. KMP | |
| ********************************/ | |
| static int kmp_next[MAX_PATTERN_LEN]; | |
| static int kmp_m = 0; | |
| static char kmp_pat[MAX_PATTERN_LEN + 1]; | |
| void init_kmp(const char pattern[]) { | |
| kmp_m = strlen(pattern); | |
| if (kmp_m < 1) return; | |
| PX_strcpy(kmp_pat, pattern, MAX_PATTERN_LEN); | |
| kmp_pat[MAX_PATTERN_LEN] = '\0'; | |
| kmp_next[0] = 0; | |
| for (int i = 1, j = 0; i < kmp_m; i++) { | |
| while (j > 0 && pattern[i] != pattern[j]) j = kmp_next[j - 1]; | |
| if (pattern[i] == pattern[j]) j++; | |
| kmp_next[i] = j; | |
| } | |
| } | |
| int find_kmp(const char src[]) { | |
| int n = content_length; | |
| for (int i = 0, j = 0; i < n; i++) { | |
| while (j > 0 && src[i] != kmp_pat[j]) j = kmp_next[j - 1]; | |
| if (src[i] == kmp_pat[j]) j++; | |
| if (j == kmp_m) return i - kmp_m + 1; | |
| } | |
| return -1; | |
| } | |
| /******************************** | |
| * 3. Boyer–Moore(BM) | |
| ********************************/ | |
| static int bm_bad[ALPHABET_SIZE]; | |
| static int bm_m = 0; | |
| static char bm_pat[MAX_PATTERN_LEN + 1]; | |
| void init_bm(const char pattern[]) { | |
| bm_m = strlen(pattern); | |
| if (bm_m < 1) return; | |
| PX_strcpy(bm_pat, pattern, MAX_PATTERN_LEN); | |
| bm_pat[MAX_PATTERN_LEN] = '\0'; | |
| for (int i = 0; i < ALPHABET_SIZE; i++) bm_bad[i] = bm_m; | |
| for (int i = 0; i < bm_m - 1; i++) | |
| bm_bad[(unsigned char)pattern[i]] = bm_m - 1 - i; | |
| } | |
| int find_bm(const char src[]) { | |
| int n =content_length; | |
| int i = 0; | |
| while (i <= n - bm_m) { | |
| int j = bm_m - 1; | |
| while (j >= 0 && bm_pat[j] == src[i + j]) j--; | |
| if (j < 0) return i; | |
| i += bm_bad[(unsigned char)src[i + bm_m - 1]]; | |
| } | |
| return -1; | |
| } | |
| /******************************** | |
| * 4. Boyer–Moore–Horspool(BMH) | |
| ********************************/ | |
| static int bmh_shift[ALPHABET_SIZE]; | |
| static int bmh_m = 0; | |
| static char bmh_pat[MAX_PATTERN_LEN + 1]; | |
| void init_bmh(const char pattern[]) { | |
| bmh_m = strlen(pattern); | |
| if (bmh_m < 1) return; | |
| PX_strcpy(bmh_pat, pattern, MAX_PATTERN_LEN); | |
| bmh_pat[MAX_PATTERN_LEN] = '\0'; | |
| for (int i = 0; i < ALPHABET_SIZE; i++) bmh_shift[i] = bmh_m; | |
| for (int i = 0; i < bmh_m - 1; i++) | |
| bmh_shift[(unsigned char)pattern[i]] = bmh_m - 1 - i; | |
| } | |
| int find_bmh(const char src[]) { | |
| int n =content_length; | |
| int i = 0; | |
| while (i <= n - bmh_m) { | |
| int j = bmh_m - 1; | |
| while (j >= 0 && bmh_pat[j] == src[i + j]) j--; | |
| if (j < 0) return i; | |
| i += bmh_shift[(unsigned char)src[i + bmh_m - 1]]; | |
| } | |
| return -1; | |
| } | |
| /******************************** | |
| * 5. Sunday | |
| ********************************/ | |
| static int sunday_shift[ALPHABET_SIZE]; | |
| static int sunday_m = 0; | |
| static char sunday_pat[MAX_PATTERN_LEN + 1]; | |
| void init_sunday(const char pattern[]) { | |
| sunday_m = strlen(pattern); | |
| if (sunday_m < 1) return; | |
| PX_strcpy(sunday_pat, pattern, MAX_PATTERN_LEN); | |
| sunday_pat[MAX_PATTERN_LEN] = '\0'; | |
| for (int i = 0; i < ALPHABET_SIZE; i++) sunday_shift[i] = sunday_m + 1; | |
| for (int i = 0; i < sunday_m; i++) | |
| sunday_shift[(unsigned char)pattern[i]] = sunday_m - i; | |
| } | |
| int find_sunday(const char src[]) { | |
| int n = content_length; | |
| int i = 0; | |
| while (i <= n - sunday_m) { | |
| int j = 0; | |
| while (j < sunday_m && src[i + j] == sunday_pat[j]) j++; | |
| if (j == sunday_m) return i; | |
| if (i + sunday_m >= n) break; | |
| i += sunday_shift[(unsigned char)src[i + sunday_m]]; | |
| } | |
| return -1; | |
| } | |
| /******************************** | |
| * 6. Rabin–Karp | |
| ********************************/ | |
| #define BASE 256 | |
| #define MOD 101 | |
| static int rk_m = 0; | |
| static char rk_pat[MAX_PATTERN_LEN + 1]; | |
| static int rk_hash_pat = 0; | |
| static int rk_h = 1; | |
| void init_rabin_karp(const char pattern[]) { | |
| rk_m = strlen(pattern); | |
| if (rk_m < 1) return; | |
| PX_strcpy(rk_pat, pattern, MAX_PATTERN_LEN); | |
| rk_pat[MAX_PATTERN_LEN] = '\0'; | |
| rk_h = 1; | |
| for (int i = 0; i < rk_m - 1; i++) rk_h = (rk_h * BASE) % MOD; | |
| rk_hash_pat = 0; | |
| for (int i = 0; i < rk_m; i++) | |
| rk_hash_pat = (BASE * rk_hash_pat + pattern[i]) % MOD; | |
| } | |
| int find_rabin_karp(const char src[]) { | |
| int n = content_length; | |
| if (n < rk_m) return -1; | |
| int th = 0; | |
| for (int i = 0; i < rk_m; i++) | |
| th = (BASE * th + src[i]) % MOD; | |
| for (int i = 0; i <= n - rk_m; i++) { | |
| if (rk_hash_pat == th) { | |
| int j = 0; | |
| while (j < rk_m && src[i + j] == rk_pat[j]) j++; | |
| if (j == rk_m) return i; | |
| } | |
| if (i < n - rk_m) | |
| th = (BASE * (th - src[i] * rk_h) + src[i + rk_m]) % MOD; | |
| if (th < 0) th += MOD; | |
| } | |
| return -1; | |
| } | |
| /******************************** | |
| * 7. Two-Way | |
| ********************************/ | |
| static char tw_pat[MAX_PATTERN_LEN + 1]; | |
| static int tw_m = 0; | |
| static int tw_per = 0; | |
| static int tw_k = 0; | |
| void init_two_way(const char pattern[]) { | |
| tw_m = strlen(pattern); | |
| if (tw_m < 1) return; | |
| if (tw_m > MAX_PATTERN_LEN) tw_m = MAX_PATTERN_LEN; | |
| memcpy(tw_pat, pattern, tw_m); | |
| tw_pat[tw_m] = '\0'; | |
| // Booth’s algorithm to find minimal rotation | |
| int i = 0, j = 1, k = 0; | |
| while (i + k < tw_m && j + k < tw_m) { | |
| if (tw_pat[i + k] == tw_pat[j + k]) k++; | |
| else if (tw_pat[i + k] > tw_pat[j + k]) { | |
| i = i + k + 1; | |
| if (i <= j) i = j + 1; | |
| k = 0; | |
| } | |
| else { | |
| j = j + k + 1; | |
| if (j <= i) j = i + 1; | |
| k = 0; | |
| } | |
| } | |
| tw_k = (i < j) ? i : j; | |
| tw_per = tw_m - tw_k; | |
| } | |
| int find_two_way(const char src[]) { | |
| int n = content_length; | |
| int pos = 0; | |
| while (pos <= n - tw_m) { | |
| int i = 0; | |
| while (i < tw_m && src[pos + i] == tw_pat[i]) i++; | |
| if (i == tw_m) return pos; | |
| pos += (i <= tw_k) ? tw_per : (i - tw_k); | |
| if (tw_per <= 0) pos++; // avoid infinite loop | |
| } | |
| return -1; | |
| } | |
| px_void get_random_abstract_string( px_int length) | |
| { | |
| px_int offset; | |
| while (PX_TRUE) | |
| { | |
| offset = (px_int)PX_randRange(0, content_length - length); | |
| while (offset) | |
| { | |
| if (content[offset] != '.') | |
| { | |
| offset--; | |
| } | |
| else | |
| { | |
| offset++; | |
| break; | |
| } | |
| } | |
| while ((content[offset] == ' '|| content[offset] == '\n' || content[offset] == '\r') && content[offset]!= '\0') | |
| { | |
| offset++; | |
| } | |
| if (content_length - offset < length) | |
| { | |
| continue; | |
| } | |
| break; | |
| } | |
| PX_memcpy(abstract_string, content + offset, length); | |
| abstract_string_length = length; | |
| abstract_string[length] = 0; | |
| } | |
| px_int main() | |
| { | |
| PX_IO_Data data; | |
| PainterEngine_Initialize(800, 600); | |
| data=PX_LoadFileToIOData("harrypotter.txt"); | |
| content = (const px_char *)data.buffer; | |
| content_length = data.size-1; | |
| for (px_int i = 0; i < 1000; i++) | |
| { | |
| px_int time; | |
| get_random_abstract_string(PX_APO(PX_randRange(1,256))); | |
| time = PX_TimeGetTime(); | |
| dummy +=find_naive(content, abstract_string); | |
| NativeElapsed += PX_TimeGetTime() - time; | |
| time = PX_TimeGetTime(); | |
| dummy += find_op_naive(content, abstract_string); | |
| opNativeElapsed += PX_TimeGetTime() - time; | |
| time = PX_TimeGetTime(); | |
| init_kmp(abstract_string); | |
| dummy += find_kmp(content); | |
| KMPElapsed += PX_TimeGetTime() - time; | |
| time = PX_TimeGetTime(); | |
| init_bm(abstract_string); | |
| dummy += find_bm(content); | |
| BMElapsed += PX_TimeGetTime() - time; | |
| time = PX_TimeGetTime(); | |
| init_bmh(abstract_string); | |
| dummy += find_bmh(content); | |
| BMHElapsed += PX_TimeGetTime() - time; | |
| time = PX_TimeGetTime(); | |
| init_sunday(abstract_string); | |
| dummy += find_sunday(content); | |
| SundayElapsed += PX_TimeGetTime() - time; | |
| time = PX_TimeGetTime(); | |
| init_rabin_karp(abstract_string); | |
| dummy += find_rabin_karp(content); | |
| RabinKarpElapsed += PX_TimeGetTime() - time; | |
| time = PX_TimeGetTime(); | |
| init_two_way(abstract_string); | |
| dummy += find_two_way(content); | |
| TwoWayElapsed += PX_TimeGetTime() - time; | |
| } | |
| printf(" Naive Average Elapsed: %u\n", NativeElapsed); | |
| printf(" Optimized Naive Average Elapsed: %u\n", opNativeElapsed); | |
| printf(" KMP Average Elapsed: %u\n", KMPElapsed); | |
| printf(" BM Average Elapsed: %u\n", BMElapsed); | |
| printf(" BMH Average Elapsed: %u\n", BMHElapsed); | |
| printf(" Sunday Average Elapsed: %u\n", SundayElapsed); | |
| printf(" Rabin-Karp Average Elapsed: %u\n", RabinKarpElapsed); | |
| printf(" Two-Way Average Elapsed: %u\n", TwoWayElapsed); | |
| return 0; | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment