Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save davidbegin/661b1775dead155c7953554dcb5c792c to your computer and use it in GitHub Desktop.
Save davidbegin/661b1775dead155c7953554dcb5c792c to your computer and use it in GitHub Desktop.
from __future__ import print_function
# import os
import json
# import sys
from pathlib import Path
# Why did you not use Glob on Path???
import glob
def findfiles():
# HARDCODED MAGIC STRING
# WHERE DID THIS MAGIC STRING COME FROM
logpath = './json-data/json-formatted/'
logs = []
hashes = []
hashset = {}
# Why not use pathlib???
logfiles = [log for log in glob.glob(logpath + '**/*.json', recursive=True)]
for log in logfiles:
with open(log) as logfile:
# X?????
# WHAT KIND OF VARIABLE NAME IS THAT???
# WHAT IS THIS C? GO?
x = json.load(logfile)
# WHY DO WE GRAB THE FIRST ELEMET?
# WhAT IF X is Len 0
# WHATS UP WITH THIS MAGIC cowrie String?????
if x[0]['eventid'] == 'cowrie.session.file_download':
outfile = x['outfile']
# THIS MAKES ME UNCOMFORTABLE
print_function(outfile)
logs.append(outfile)
# WHAT IS A VARIABLE FOR ANTS:???
for i in logs:
# AGAIN WITH THE XXXXXXX??
x = Path(i).stem
hashes.append(x)
# This is just to enforce uniquenesss????
hashset = list(set(hashes))
# filehandle.......ugh
with open('hashes.txt', 'w') as filehandle:
# WHAT ARE YOU DOING
# HASH IS KEYWORD
# ITS HIGHLIGHTED
# STOP USING HASH AS A TEMP VARIABLE
filehandle.writelines("%s\n" % hash for hash in hashset)
# WHAT THE HECK IS VT
# Virgiina Tech resource
# Sig ->
def vtfiles_resource_sig():
# A ./ in a MAGIC STRING!!!!!!!!!!!!!
vt_report_path = './vt/'
files = []
# AGAIN WITH THE GLOBBING
allfiles = [f for f in glob.glob(vt_report_path + '**/*.json', recursive=True)]
# WHAT ARE YOU DOING, STOP USING KEYWORDS AS TEMP VARIABLES
# PEOPLE COULD GET HURT
for file in allfiles:
with open(file) as f:
# What is this file isn't VALID JSON???
j = json.load(f)
# WHAT is J is len(0)???
# This really depends on the structure of theese files
# Why check again '1' and 1,
# why not convert to int, then compare????
if j[0]['response_code'] == '1' or j[0]['response_code'] == 1:
# This is reaching twice into a datastructure,
# That we don't know will match this
resources = j[0]['resource']
files.append(resources)
with open('resources.txt', 'w') as filehandle:
filehandle.writelines("%s\n" % resource for resource in files)
def compare_res_sig_to_hash():
matches = {}
# A, B??!?!?
# VARIABLE SHADOWING
a = open('resources.txt', 'r').read().splitlines()
a = set(a)
b = open('hashes.txt', 'r').read().splitlines()
b = set(b)
matches = set(a).intersection(b)
with open('matches.txt', 'w') as filehandle:
filehandle.writelines("%s\n" % match for match in matches)
if __name__ == '__main__':
findfiles()
# vtfiles_resource_sig()
# compare_res_sig_to_hash()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment