Skip to content

Instantly share code, notes, and snippets.

@ljmccarthy
Last active September 14, 2018 07:40
Show Gist options
  • Save ljmccarthy/be535523269f1b7f50c88beda121746e to your computer and use it in GitHub Desktop.
Save ljmccarthy/be535523269f1b7f50c88beda121746e to your computer and use it in GitHub Desktop.
import re
def regex_to_bitset(r):
'''Convert character class into bitset. Assumes ASCII range (0<=ch<=127).'''
bitset = []
for i in range(0, 128, 32):
word = 0
for j in range(i, i+32):
if r.match(chr(j)):
word |= 1 << (j - i)
bitset.append(word)
return bitset
def format_bitset(name, bitset):
size = len(bitset)
hex_words = ', '.join(map(hex, bitset))
return f'static constexpr uint32_t {name}[{size}] = {{{hex_words}}};'
def format_bitset_func(name, bitset_name):
return f'static constexpr bool {name}(int ch) {{ return ({bitset_name}[ch >> 5 & 0x3] & 1 << (ch & 0x1f)) != 0; }}'
if __name__ == '__main__':
ident0 = regex_to_bitset(re.compile(r'[A-Za-z!$%^&*\-_+=@~#<>?/\\|]'))
identn = regex_to_bitset(re.compile(r'[A-Za-z0-9!$%^&*\-_+=@~#<>?/\\|]'))
special = regex_to_bitset(re.compile(r'[()\[\]{}"\'.,;:]'))
print(format_bitset('ident0', ident0))
print(format_bitset('identn', identn))
print(format_bitset('special', special))
print(format_bitset_func('is_ident0', 'ident0'))
print(format_bitset_func('is_identn', 'identn'))
print(format_bitset_func('is_special', 'special'))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment