Last active
May 29, 2023 16:29
-
-
Save B-Y-P/781bdbb0b145690768a22f7d2400c6d2 to your computer and use it in GitHub Desktop.
char class structure
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/// 256 length bit-vector | |
struct CharClass{ | |
u64 b[4]; | |
bool lo_empty(){ return (b[0] | b[1]) == 0; } | |
bool hi_empty(){ return (b[2] | b[3]) == 0; } | |
u64 lo_count(){ return PopCnt(b[0]) + PopCnt(b[1]); } | |
u64 hi_count(){ return PopCnt(b[2]) + PopCnt(b[3]); } | |
bool empty(){ return (b[0] | b[1] | b[2] | b[3]) == 0; } | |
u64 count(){ return lo_count() + hi_count(); } | |
void clear(){ b[0] = b[1] = b[2] = b[3] = 0llu; } | |
void fill(){ b[0] = b[1] = b[2] = b[3] = ~0llu; } | |
void invert(){ | |
b[0] ^= ~0llu; | |
b[1] ^= ~0llu; | |
b[2] ^= ~0llu; | |
b[3] ^= ~0llu; | |
} | |
bool contains(u8 c){ | |
return (b[c>>6] & (1llu << (c & 0x3F))) != 0; | |
} | |
void flip(u8 c){ b[c>>6] ^= (1llu << (c & 0x3F)); } | |
void insert(u8 c){ b[c>>6] |= (1llu << (c & 0x3F)); } | |
void remove(u8 c){ b[c>>6] &= ~(1llu << (c & 0x3F)); } | |
void insert(u8 c, u8 d){ | |
if(c > d){ return; } | |
if(c == d){ insert(c); return; } | |
u64 idx_c = c>>6; | |
u64 idx_d = d>>6; | |
u64 bit_c = (1llu << (c & 0x3F)); | |
u64 bit_d = (1llu << (d & 0x3F)); | |
u64 top = (bit_d - 1)|bit_d; | |
u64 bot = ~(bit_c - 1); | |
if(idx_c == idx_d){ | |
b[idx_c] |= top & bot; | |
}else{ | |
b[idx_d] |= top; | |
b[idx_c] |= bot; | |
if(idx_c < 1 && 1 < idx_d){ b[1] = ~0llu; }; | |
if(idx_c < 2 && 2 < idx_d){ b[2] = ~0llu; }; | |
} | |
} | |
u8 pop_i(u64 i){ | |
u64 block = b[i]; | |
u64 bit = FirstBitSet(block); | |
b[i] = (block & (block-1)); | |
return u8(i*64 + bit); | |
} | |
i32 pop(){ | |
return (b[0] ? pop_i(0) : | |
b[1] ? pop_i(1) : | |
b[2] ? pop_i(2) : | |
b[3] ? pop_i(3) : -1); | |
} | |
bool pack(i64 *out){ | |
/// Could pack to literal, tuple, or remain a CharClass | |
u64 lo = lo_count(); | |
u64 hi = hi_count(); | |
u64 count = lo + hi; | |
if(count == 1){ | |
*out = i64(pop()); | |
return true; | |
}else if(lo <= 9 && hi == 0){ | |
i64 tuple = 0; | |
while(!empty()){ | |
tuple = (tuple<<7) | i64(cc.pop()); | |
} | |
tuple |= 0x8000000000000000; | |
*out = tuple; | |
return true; | |
} | |
return false; | |
} | |
}; | |
bool operator==(CharClass x, CharClass y){ | |
return (x.b[0] == y.b[0] && | |
x.b[1] == y.b[1] && | |
x.b[2] == y.b[2] && | |
x.b[3] == y.b[3]); | |
} | |
bool operator!=(CharClass x, CharClass y){ return !(x==y); } | |
/// Include whatever bitwise binary-ops you want | |
/// c is assumed to never be '\0' (tuple from CharClass::pack() will false-positive) | |
bool TupleContains(i64 tuple, u8 c){ | |
u64 f = u64(c); | |
tuple ^= (f<<56)|(f<<49)|(f<<42)|(f<<35)|(f<<28)|(f<<21)|(f<<14)|(f<<7)|(f); | |
u64 lo_bits = 0x0102040810204081; | |
u64 hi_bits = 0x4081020408102040; | |
return ((tuple - lo_bits) & (~tuple & hi_bits)); | |
#if 0 | |
return ((((tuple>> 0) & 0x7F) == c) || | |
(((tuple>> 7) & 0x7F) == c) || | |
(((tuple>>14) & 0x7F) == c) || | |
(((tuple>>21) & 0x7F) == c) || | |
(((tuple>>28) & 0x7F) == c) || | |
(((tuple>>35) & 0x7F) == c) || | |
(((tuple>>42) & 0x7F) == c) || | |
(((tuple>>49) & 0x7F) == c) || | |
(((tuple>>56) & 0x7F) == c)); | |
#endif | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment