here i'm parsing some test data with kaitai to see what it can extract.
this is the dumper that i'm using:
import binascii
import tabulate
PRIMATIVE_TYPES = (str, bytes, int)| /** | |
| * @brief Enumeration of options for curl_easy_setopt() based on libcurl. | |
| * | |
| * Note: The explicit integer values match those used by libcurl. | |
| * Options without explicit values increment from the previous one. | |
| */ | |
| typedef enum { | |
| /* Options expecting a long */ | |
| CURLOPT_PORT = 3, | |
| CURLOPT_TIMEOUT = 13, | 
| eval (ssh-agent -c) | |
| set -Ux SSH_AUTH_SOCK $SSH_AUTH_SOCK | |
| set -Ux SSH_AGENT_PID $SSH_AGENT_PID | 
| import "dotnet" | |
| rule DotnetStartupHook { | |
| meta: | |
| description = "might be a .NET startup hook module" | |
| author = "William Ballenthin <[email protected]>" | |
| strings: | |
| $a1 = "StartupHook" | |
| $a2 = "Initialize" | |
| condition: | 
| # decoding_routines.py | |
| # | |
| # An example of using FLOSS as a library to identify potential decoding routines. | |
| # It will print an ordered list of function addresses and their "score", | |
| # ranked from most likely to least likely to be a decoding routine. | |
| # | |
| # Usage: | |
| # | |
| # $ python decoding_routines.py /path/to/input.exe | |
| # 0x401000: 0.99 | 
| #!/usr/bin/env python3 | |
| ''' | |
| compare vivisect analysis comparison across versions. | |
| pip install devtools[pygments] pydantic viv-utils termcolor | |
| ''' | |
| import sys | |
| import time | |
| import os.path | |
| import logging | 
| #NoEnv | |
| #Warn | |
| SendMode Input | |
| SetWorkingDir %A_ScriptDir% | |
| SetCapsLockState AlwaysOff | |
| CapsLock::Send {esc} | |
| CapsLock & j::Send {Down} | |
| CapsLock & k::Send {Up} | |
| CapsLock & h::Send {Left} | 
| import mmap | |
| def lines(m): | |
| line = m.readline() | |
| while line: | |
| yield line.decode("utf-8").rstrip("\n") | |
| line = m.readline() | |
| def filelines(path): | |
| with open(path, "rb") as f: | 
| """ | |
| sort the given jsonl document (distinct json documents separated by newline) | |
| by the given key, writing the output to STDOUT. | |
| example: | |
| python sort-jsonl-by-key.py log.jsonl "timestamp" | |
| this does require reading the entire document into memory, first. | |
| a future revision could maybe use a mmap to avoid keeping things in memory. | 
| 2c4a910a1299cdae2a4e55988a2f102e ./APT_Backdoor_SUNBURST/019085a76ba7126fff22770d71bd901c325fc68ac55aa743327984e89f4b0134 | |
| b91ce2fa41029f6955bff20079468448 ./APT_Backdoor_SUNBURST/32519b85c0b422e4656de6e6c41878e95fd95026267daab4215ee59c107d6c77 | |
| e18a6a21eb44e77ca8d739a72209c370 ./APT_Backdoor_SUNBURST/a25cadd48d70f6ea0c4a241d99c5241269e6faccb4054e62d16784640f8e53bc | |
| 9ac181cb35f27e860d59e8d2a6309d35 ./APT_Backdoor_SUNBURST/ad2fbf4add71f61173975989d1a18395afb8538ed889012b9d2e21c19e98bbd1 | |
| 846e27a652a5e1bfbd0ddd38a16dc865 ./APT_Backdoor_SUNBURST/ce77d116a074dab7a22a0fd4f2c1ab475f16eec42e1ded3c0b0aa8211fe858d6 | |
| 3e329a4c9030b26ba152fb602a1d5893 ./APT_Backdoor_SUNBURST/d3c6785e18fba3749fb785bc313cf8346182f532c59172b69adfb31b96a5d0af | |
| 393702fab1c5d09d9f94e8a63114746d ./APT_Dropper_Win64_TEARDROP/6e4050c6a2d2e5e49606d96dd2922da480f2e0c70082cc7e54449a7dc0d20f8d | |
| 56ceb6d0011d87b6e4d7023d7ef85676 ./APT_Webshell_SUPERNOVA/c15abaf51e78ca56c0376522d699c978217bf041a3bd3c71d09193efa5717c71 | |
| b32892d699c39949e9b648d6b72fe5cf | 
here i'm parsing some test data with kaitai to see what it can extract.
this is the dumper that i'm using:
import binascii
import tabulate
PRIMATIVE_TYPES = (str, bytes, int)