Last active
July 23, 2020 22:13
-
-
Save 3lpsy/ee9bad21d7cfc8c2c5a6a2190f0f24a7 to your computer and use it in GitHub Desktop.
Parse output from Find-InterestingDomainShareFile to filter out only paths. This makes the data greppable. Also options for size and and exension.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import sys | |
from pathlib import Path | |
import argparse | |
KB = 1024 | |
MB = 1000 * KB | |
DEFAULT_MAX = 2 * MB | |
COMMON_EXTS = [ | |
".doc", | |
".docx", | |
".docs", | |
".xls", | |
".xlsx", | |
".xml", | |
".csv", | |
".txt", | |
".config", | |
".conf", | |
".ini", | |
".db", | |
".sql", | |
".sqlite", | |
".pdf", | |
] | |
class Item: | |
def __init__(self): | |
self.path = "" | |
self.length = 1 | |
self.owner = "" | |
KEYS = "CreationTime", "LastAccessTime", "Length", "Path", "LastWriteTime" | |
def parse_item(f, owner_line): | |
item = Item() | |
item.owner = owner_line.split(":")[1][1:].rstrip("\n") | |
# Assumes owner is always first | |
line = f.readline() | |
while line.strip(): | |
if line.startswith("Path"): | |
item.path = line.split(":")[1][1:].rstrip("\n") | |
line = f.readline() | |
while not line.startswith(tuple(KEYS)) and line.strip(): | |
item.path = item.path + line[17:].rstrip("\n") | |
line = f.readline() | |
elif line.startswith("Length"): | |
length_str = line.split(":")[1][1:].strip() | |
if len(length_str) < 1: | |
length = 0 | |
else: | |
length = int(length_str) | |
item.length = length | |
line = f.readline() | |
return f, item | |
def parse(target, min=2, max=DEFAULT_MAX, exts=None): | |
errors = 0 | |
ebag = [] | |
with Path(target).open() as f: | |
data = [] | |
line = f.readline() | |
while line: | |
if line.startswith("Owner"): | |
try: | |
f, item = parse_item(f, line) | |
data.append(item) | |
if item.length > min and item.length <= max: | |
path_ext = item.path.split(".")[-1] | |
if exts: | |
if path_ext.lower() in exts: | |
print(item.path) | |
else: | |
print(item.path) | |
except Exception as e: | |
errors = errors + 1 | |
ebag.append(e) | |
line = f.readline() | |
if errors > 0: | |
print(f"Error Count: {errors}") | |
for e in ebag: | |
print(e) | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser() | |
parser.add_argument("-m", "--min", type=int, default=2, help="Min length (bytes)") | |
parser.add_argument( | |
"-M", | |
"--max", | |
type=int, | |
default=DEFAULT_MAX, | |
help=f"Max length (bytes). Default: {DEFAULT_MAX} (2MB)", | |
) | |
parser.add_argument( | |
"-e", | |
"--ext", | |
type=str, | |
action="append", | |
help="Extension to include. Option can appear multiple times. Defaults: any.", | |
) | |
parser.add_argument( | |
"-E", | |
"--use-common-ext", | |
action="store_true", | |
help=f"Use commond extensions: {COMMON_EXTS}", | |
) | |
parser.add_argument("file", help="File to parse") | |
args = parser.parse_args() | |
targ = args.file | |
if not Path(targ).exists(): | |
print("[!] File {targ} does not exist") | |
sys.exit(1) | |
exts = [] | |
if args.use_common_ext: | |
for e in COMMON_EXTS: | |
e = e.lower() | |
e = e.lstrip(".") | |
exts.append(e) | |
elif args.ext and len(args.ext) > 0: | |
exts = [] | |
for e in args.ext: | |
e = e.lower() | |
e = e.lstrip(".") | |
exts.append(e) | |
parse(targ, min=args.min, max=args.max, exts=exts) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
If you run Find-InterestingDomainShareFile, the output is often on multiple lines making grepping annoying. Instead of using bash-fu, you can just parse out the paths in python. This parser does that. It assumes that the Find-InterestingDomainShareFile was run without any extra modifications with regards to formating (no passing to Format-Table, and no filtering params via select or Properties).