Created
April 12, 2024 15:06
-
-
Save leveryd/1e4a0f231a54cd96f033de44279da2a7 to your computer and use it in GitHub Desktop.
关键字编码,包括overlong、hex、unicode
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
class Encoder(object): | |
@staticmethod | |
def chr_to_hex(i): | |
""" | |
49 -> '\x31' | |
:param i: | |
:return: | |
""" | |
assert 0 <= i <= 256 | |
# a = int(i % 16) | |
# b = int((i - a) / 16) | |
# return "\\x%s%s" % (hex(b), hex(a)) | |
return "\\x{:02X}".format(i) | |
def chrs_to_hex(self, chars): | |
ret = [] | |
for char in chars: | |
ret.append(self.chr_to_hex(char)) | |
return "".join(ret) | |
@staticmethod | |
def chr_to_unicode(i): | |
return '\\\\u{:04x}'.format(ord(chr(i))) | |
def one_byte_rule(self, one_byte): | |
raise Exception() | |
def encode_bytes(self, byte_s): | |
ret = [] | |
index = 0 | |
while True: | |
if index == len(byte_s): | |
break | |
one_byte = byte_s[index] | |
if one_byte == b"."[0] and index + 1 != len(byte_s) and byte_s[index+1] == b"*"[0]: | |
ret.append(".*") | |
index += 2 | |
else: | |
ret.append(self.one_byte_rule(one_byte)) | |
index += 1 | |
return "".join(ret) | |
class FastJsonHexEncoder(Encoder): | |
def one_byte_rule(self, one_byte): | |
s = "\\%s" % self.chr_to_hex(one_byte) | |
return s | |
class OverLongEncoder(Encoder): | |
@staticmethod | |
def encode_one_byte(one_byte, size): | |
ret = list() | |
ch = one_byte | |
if size == 2: | |
ret.append(((ch >> 6) & 0b11111) | 0b11000000) | |
ret.append((ch & 0b111111) | 0b10000000) | |
else: | |
ret.append(0b11100000) | |
ret.append((ch >> 6 & 0b111111) | 0b10000000) | |
ret.append((ch & 0b111111) | 0b10000000) | |
return ret | |
def one_byte_rule(self, one_byte): | |
raw = one_byte | |
two = self.encode_one_byte(one_byte, 2) | |
three = self.encode_one_byte(one_byte, 3) | |
return "(%s|%s|%s)" % (self.chr_to_hex(raw), self.chrs_to_hex(two), self.chrs_to_hex(three)) | |
class FastJsonUnicodeEncoder(Encoder): | |
def one_byte_rule(self, one_byte): | |
return self.chr_to_unicode(one_byte) | |
class RawEncoder(Encoder): | |
def one_byte_rule(self, one_byte): | |
return self.chr_to_hex(one_byte) | |
class RuleGenerate(object): | |
def __init__(self): | |
self.encoder = "" | |
self.file_name = "" | |
self.encoder_map = { | |
"fastjsonHex": FastJsonHexEncoder, | |
"overlong": OverLongEncoder, | |
"fastjsonUnicode": FastJsonUnicodeEncoder, | |
"raw": RawEncoder | |
} | |
def set_encoder(self, name): | |
self.encoder = name | |
return self | |
def set_file_name(self, file_name): | |
self.file_name = file_name | |
return self | |
def mixed_encoder_line(self, line): | |
encoder_list = [] | |
for i in self.encoder.split(","): | |
encoder_list.append(self.encoder_map[i]()) | |
ret = "" | |
for b in line: | |
tmp = [] | |
for e in encoder_list: | |
tmp.append(e.one_byte_rule(b)) | |
ret += "(" + "|".join(tmp) + ")" | |
return ret | |
def run(self): | |
if os.path.exists(self.file_name) is False: | |
print("file '%s' do not exist" % self.file_name) | |
exit(0) | |
# if self.encoder not in self.encoder_map: | |
# print("encoder name '%s' not in %s" % (self.encoder, self.encoder_map.keys())) | |
# exit(0) | |
ret = [] | |
# encoder_ins = self.encoder_map[self.encoder]() | |
with open(self.file_name, "rb") as f: | |
for line in f.readlines(): | |
if line.strip() == "": | |
continue | |
# ret.append(encoder_ins.encode_bytes(line.strip())) | |
ret.append(self.mixed_encoder_line(line.strip())) | |
return "(" + ")|(".join(ret) + ")" | |
def test_overlong(): | |
assert OverLongEncoder().chr_to_hex(49) == r"\x31" | |
assert OverLongEncoder().encode_bytes(b"a.*b") == r"(\x61|\xC1\xA1|\xE0\x81\xA1).*(\x62|\xC1\xA2|\xE0\x81\xA2)" | |
a = OverLongEncoder().encode_bytes(b"a.b") | |
assert a == r"(\x61|\xC1\xA1|\xE0\x81\xA1)(\x2E|\xC0\xAE|\xE0\x80\xAE)(\x62|\xC1\xA2|\xE0\x81\xA2)" | |
def test_fastjson(): | |
pass | |
test_overlong() | |
print(RuleGenerate().set_encoder("overlong").set_file_name("/tmp/1").run()) | |
# print(RuleGenerate().set_encoder("fastjsonHex,fastjsonUnicode,raw").set_file_name("/tmp/1").run()) | |
# print(FastJsonHexEncoder().encode_bytes(b"abcd")) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
参考 https://github.com/phith0n/zkar