Last active
December 6, 2024 15:33
-
-
Save vndee/2c3a02a717f8c467b1e5d5129b8b30bf to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class RDBParser: | |
def __init__(self, file_path: str): | |
self.file_path = file_path | |
self.data: Dict[int, Dict[str, Any]] = {} # database -> key -> value | |
self.aux_fields: Dict[str, str] = {} # Store auxiliary fields | |
self.current_db = 0 | |
def parse(self) -> Dict[int, Dict[str, Any]]: | |
"""Parse the RDB file and return the data structure""" | |
with open(self.file_path, "rb") as f: | |
magic = f.read(5) | |
if magic != b"REDIS": | |
raise ValueError("Invalid RDB file format") | |
version = f.read(4) | |
if not version.startswith(b"00"): | |
raise ValueError(f"Unsupported RDB version: {version.decode()}") | |
while True: | |
type_byte = f.read(1) | |
if not type_byte: | |
break | |
op_type = type_byte[0] | |
if op_type == RDBOperationType.EOF: | |
break | |
elif op_type == RDBOperationType.SELECTDB: | |
self.current_db = self._read_length(f) | |
if self.current_db not in self.data: | |
self.data[self.current_db] = {} | |
elif op_type == RDBOperationType.EXPIRETIME: | |
expire_time = struct.unpack("I", f.read(4))[0] | |
self._process_key_value_pair(f, expire_time * 1000) | |
elif op_type == RDBOperationType.EXPIRETIME_MS: | |
expire_time = struct.unpack("Q", f.read(8))[0] | |
self._process_key_value_pair(f, expire_time) | |
elif op_type == RDBOperationType.AUX: | |
aux_key = self._read_string(f) | |
aux_value = self._read_string(f) | |
self.aux_fields[aux_key] = aux_value | |
elif op_type == RDBOperationType.RESIZEDB: | |
# Read hash table sizes | |
db_size = self._read_length(f) # noqa: F841 | |
expire_size = self._read_length(f) # noqa: F841 | |
# Could store these sizes if needed | |
else: | |
self._process_key_value_pair(f, None, op_type) | |
# Verify CRC64 (8 bytes) | |
crc = f.read(8) # noqa: F841 | |
# TODO: Implement CRC verification | |
return self.data |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment