Created
March 23, 2024 23:57
-
-
Save dcondrey/7fb4ee9d84cd41c7a745de5949ed4c1b to your computer and use it in GitHub Desktop.
Identify repeating patterns in .dat file
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import struct | |
import re | |
from collections import Counter | |
class DatPatterns: | |
def __init__(self, filepath): | |
self.filepath = filepath | |
self.content = None | |
def read_file(self): | |
with open(self.filepath, 'rb') as file: | |
self.content = file.read() | |
def find_repeating_patterns(self, pattern_size=4): | |
pattern_counts = Counter() | |
for i in range(len(self.content) - pattern_size): | |
pattern = self.content[i:i + pattern_size] | |
if re.match(b'\x00+', pattern): | |
continue | |
pattern_counts[pattern] += 1 | |
# Only consider patterns that repeat more than a threshold to indicate potential structure | |
return {pattern: count for pattern, count in pattern_counts.items() if count > 3} | |
def analyze_patterns(self, patterns): | |
for pattern, count in patterns.items(): | |
print(f"\nAnalyzing Pattern {pattern.hex()} (occurrences: {count}):") | |
pattern_pos = self.content.find(pattern) | |
while pattern_pos != -1: | |
# Example analysis: Print 10 bytes before and after the pattern | |
context_before = self.content[max(0, pattern_pos-10):pattern_pos] | |
context_after = self.content[pattern_pos+len(pattern):pattern_pos+len(pattern)+10] | |
print(f"At offset {pattern_pos}: {context_before.hex()} [{pattern.hex()}] {context_after.hex()}") | |
pattern_pos = self.content.find(pattern, pattern_pos + 1) | |
def run(self): | |
self.read_file() | |
patterns = self.find_repeating_patterns() | |
self.analyze_patterns(patterns) | |
if __name__ == "__main__": | |
decoder = DatPatterns("/System/Library/PrivateFrameworks/CoreEmoji.framework/Versions/A/Resources/emoji.dat") | |
decoder.run() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment