Skip to content

Instantly share code, notes, and snippets.

@leveryd
Created April 12, 2024 15:06
Show Gist options
  • Save leveryd/1e4a0f231a54cd96f033de44279da2a7 to your computer and use it in GitHub Desktop.
Save leveryd/1e4a0f231a54cd96f033de44279da2a7 to your computer and use it in GitHub Desktop.
关键字编码,包括overlong、hex、unicode
import os
class Encoder(object):
@staticmethod
def chr_to_hex(i):
"""
49 -> '\x31'
:param i:
:return:
"""
assert 0 <= i <= 256
# a = int(i % 16)
# b = int((i - a) / 16)
# return "\\x%s%s" % (hex(b), hex(a))
return "\\x{:02X}".format(i)
def chrs_to_hex(self, chars):
ret = []
for char in chars:
ret.append(self.chr_to_hex(char))
return "".join(ret)
@staticmethod
def chr_to_unicode(i):
return '\\\\u{:04x}'.format(ord(chr(i)))
def one_byte_rule(self, one_byte):
raise Exception()
def encode_bytes(self, byte_s):
ret = []
index = 0
while True:
if index == len(byte_s):
break
one_byte = byte_s[index]
if one_byte == b"."[0] and index + 1 != len(byte_s) and byte_s[index+1] == b"*"[0]:
ret.append(".*")
index += 2
else:
ret.append(self.one_byte_rule(one_byte))
index += 1
return "".join(ret)
class FastJsonHexEncoder(Encoder):
def one_byte_rule(self, one_byte):
s = "\\%s" % self.chr_to_hex(one_byte)
return s
class OverLongEncoder(Encoder):
@staticmethod
def encode_one_byte(one_byte, size):
ret = list()
ch = one_byte
if size == 2:
ret.append(((ch >> 6) & 0b11111) | 0b11000000)
ret.append((ch & 0b111111) | 0b10000000)
else:
ret.append(0b11100000)
ret.append((ch >> 6 & 0b111111) | 0b10000000)
ret.append((ch & 0b111111) | 0b10000000)
return ret
def one_byte_rule(self, one_byte):
raw = one_byte
two = self.encode_one_byte(one_byte, 2)
three = self.encode_one_byte(one_byte, 3)
return "(%s|%s|%s)" % (self.chr_to_hex(raw), self.chrs_to_hex(two), self.chrs_to_hex(three))
class FastJsonUnicodeEncoder(Encoder):
def one_byte_rule(self, one_byte):
return self.chr_to_unicode(one_byte)
class RawEncoder(Encoder):
def one_byte_rule(self, one_byte):
return self.chr_to_hex(one_byte)
class RuleGenerate(object):
def __init__(self):
self.encoder = ""
self.file_name = ""
self.encoder_map = {
"fastjsonHex": FastJsonHexEncoder,
"overlong": OverLongEncoder,
"fastjsonUnicode": FastJsonUnicodeEncoder,
"raw": RawEncoder
}
def set_encoder(self, name):
self.encoder = name
return self
def set_file_name(self, file_name):
self.file_name = file_name
return self
def mixed_encoder_line(self, line):
encoder_list = []
for i in self.encoder.split(","):
encoder_list.append(self.encoder_map[i]())
ret = ""
for b in line:
tmp = []
for e in encoder_list:
tmp.append(e.one_byte_rule(b))
ret += "(" + "|".join(tmp) + ")"
return ret
def run(self):
if os.path.exists(self.file_name) is False:
print("file '%s' do not exist" % self.file_name)
exit(0)
# if self.encoder not in self.encoder_map:
# print("encoder name '%s' not in %s" % (self.encoder, self.encoder_map.keys()))
# exit(0)
ret = []
# encoder_ins = self.encoder_map[self.encoder]()
with open(self.file_name, "rb") as f:
for line in f.readlines():
if line.strip() == "":
continue
# ret.append(encoder_ins.encode_bytes(line.strip()))
ret.append(self.mixed_encoder_line(line.strip()))
return "(" + ")|(".join(ret) + ")"
def test_overlong():
assert OverLongEncoder().chr_to_hex(49) == r"\x31"
assert OverLongEncoder().encode_bytes(b"a.*b") == r"(\x61|\xC1\xA1|\xE0\x81\xA1).*(\x62|\xC1\xA2|\xE0\x81\xA2)"
a = OverLongEncoder().encode_bytes(b"a.b")
assert a == r"(\x61|\xC1\xA1|\xE0\x81\xA1)(\x2E|\xC0\xAE|\xE0\x80\xAE)(\x62|\xC1\xA2|\xE0\x81\xA2)"
def test_fastjson():
pass
test_overlong()
print(RuleGenerate().set_encoder("overlong").set_file_name("/tmp/1").run())
# print(RuleGenerate().set_encoder("fastjsonHex,fastjsonUnicode,raw").set_file_name("/tmp/1").run())
# print(FastJsonHexEncoder().encode_bytes(b"abcd"))
@leveryd
Copy link
Author

leveryd commented Apr 12, 2024

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment