Last active
March 11, 2019 14:11
-
-
Save fryguy04/51487ce7b138162d49ad5453e9b875b1 to your computer and use it in GitHub Desktop.
Given VirusTotal API Key & Malware Hash, summarize key words that AntiVirus companies used to describe it .. Example output for PWdump hash: d1337b9e8bac0ee285492b89f895cadb pwdump trojan pswtool hacktool malicious riskware virus malware pwdump7 pwcrack 001b9ce61 hktl orsam unsafe genericpmf
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# File: vti_summarize.py | |
# Description: Given VirusTotal API Key & Malware Hash, summarize key words that AntiVirus companies used to describe it | |
# Author: Fred Frey | |
# Date: 3/10/2019 | |
import requests | |
import json | |
import re | |
from collections import OrderedDict, Counter | |
tokens_to_remove = ['win32', 'w32', 'gen', 'win32', 'confidence', 'cloud', 'samples', 'static', 'generic', 'agent', 'score', 'tool'] | |
def get_vti_report(vti_api_key, hash): | |
'''Call VirusTotal API and get the malware report for given hash''' | |
params = {'apikey': VTI_API_KEY, 'resource': hash} | |
headers = { | |
"Accept-Encoding": "gzip, deflate", | |
"User-Agent" : "gzip, My Python requests library example client or username" | |
} | |
response = requests.get('https://www.virustotal.com/vtapi/v2/file/report', | |
params=params, headers=headers) | |
return(response.json()) | |
def consolidate_vti_result(vti_report): | |
'''Given VTi file report, just return AntiVirus descriptions''' | |
vti_result_descriptions = [] | |
try: | |
for x in vti_report.get('scans'): | |
result = vti_report['scans'][x].get('result') | |
if result != None: | |
vti_result_descriptions.append(result) | |
#print(json_response['scans'][x].get('result')) | |
except: | |
print('ERROR Parsing json results') | |
return(vti_result_descriptions) | |
def remove_from_list(orig_list, remove_me): | |
'''Utility function to Remove all occurences of remove_me (str) from orig_list(list)''' | |
return( [y for y in orig_list if y != remove_me] ) | |
def clean_tokenize_frequency(vti_string): | |
'''Given string of all VTi Descriptions, clean data by lowercasing, removing short strings, tokenizing and return ordered frequency List''' | |
# Lowercase everything | |
vti_string = vti_string.lower() | |
# Replace junk chars with spaces so we tokenize more atomically | |
vti_string = re.sub('[^a-z0-9]', ' ', vti_string) | |
tokens = vti_string.split(' ') | |
for token in tokens: | |
# Remove short strings, usually not useful | |
if len(token) <= 3: | |
tokens = remove_from_list(tokens, token) | |
# Remove some common VTi descriptions that don't provide much context | |
if token in tokens_to_remove: | |
tokens = remove_from_list(tokens, token) | |
tokens_freq_results = token_freq(tokens) | |
return(tokens_freq_results) | |
def token_freq(tokens): | |
'''Given array of tokens, return OrderedDict with frequency count''' | |
# Count each word's occurance in tokens(list) and create dict | |
freq = Counter(tokens) | |
# Order dictionary by value(occurance count) highest to lowest | |
return( OrderedDict(sorted(freq.items(), key=lambda kv: kv[1], reverse=True ))) | |
def top_most(vti_tokens, top_count=0): | |
'''Create a string from the top_count most Values in vti_tokens''' | |
count = 0 | |
top_string = '' | |
for key,val in vti_tokens.items(): | |
if top_count != 0 and count >= top_count: | |
break | |
count+=1 | |
top_string += ' ' + str(key) | |
return(top_string.strip()) | |
def vti_summarize(vti_api_key, hash, max_strings=0): | |
# Call VTi, get malware report by Hash | |
vti_report = get_vti_report(vti_api_key, hash) | |
# Create an array with all VTi AntiVirus report descriptions | |
vti_result_descriptions = consolidate_vti_result(vti_report) | |
vti_result_string = ' '.join(vti_result_descriptions) | |
# Tokenize by spaces, clean up junk words and return ordered dict with frequency | |
vti_tokens = clean_tokenize_frequency(vti_result_string) | |
return(top_most(vti_tokens, max_strings)) | |
if __name__ == "__main__": | |
VTI_API_KEY = 'USER_SUPPLIED' | |
hash = 'd1337b9e8bac0ee285492b89f895cadb' # Example hash for PWdump | |
summary = vti_summarize(VTI_API_KEY, hash, 15) | |
print(hash, summary) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment