Skip to content

Instantly share code, notes, and snippets.

Last active March 11, 2019 14:11
Show Gist options
  • Save fryguy04/51487ce7b138162d49ad5453e9b875b1 to your computer and use it in GitHub Desktop.
Save fryguy04/51487ce7b138162d49ad5453e9b875b1 to your computer and use it in GitHub Desktop.
Given VirusTotal API Key & Malware Hash, summarize key words that AntiVirus companies used to describe it .. Example output for PWdump hash: d1337b9e8bac0ee285492b89f895cadb pwdump trojan pswtool hacktool malicious riskware virus malware pwdump7 pwcrack 001b9ce61 hktl orsam unsafe genericpmf
# File:
# Description: Given VirusTotal API Key & Malware Hash, summarize key words that AntiVirus companies used to describe it
# Author: Fred Frey
# Date: 3/10/2019
import requests
import json
import re
from collections import OrderedDict, Counter
tokens_to_remove = ['win32', 'w32', 'gen', 'win32', 'confidence', 'cloud', 'samples', 'static', 'generic', 'agent', 'score', 'tool']
def get_vti_report(vti_api_key, hash):
'''Call VirusTotal API and get the malware report for given hash'''
params = {'apikey': VTI_API_KEY, 'resource': hash}
headers = {
"Accept-Encoding": "gzip, deflate",
"User-Agent" : "gzip, My Python requests library example client or username"
response = requests.get('',
params=params, headers=headers)
def consolidate_vti_result(vti_report):
'''Given VTi file report, just return AntiVirus descriptions'''
vti_result_descriptions = []
for x in vti_report.get('scans'):
result = vti_report['scans'][x].get('result')
if result != None:
print('ERROR Parsing json results')
def remove_from_list(orig_list, remove_me):
'''Utility function to Remove all occurences of remove_me (str) from orig_list(list)'''
return( [y for y in orig_list if y != remove_me] )
def clean_tokenize_frequency(vti_string):
'''Given string of all VTi Descriptions, clean data by lowercasing, removing short strings, tokenizing and return ordered frequency List'''
# Lowercase everything
vti_string = vti_string.lower()
# Replace junk chars with spaces so we tokenize more atomically
vti_string = re.sub('[^a-z0-9]', ' ', vti_string)
tokens = vti_string.split(' ')
for token in tokens:
# Remove short strings, usually not useful
if len(token) <= 3:
tokens = remove_from_list(tokens, token)
# Remove some common VTi descriptions that don't provide much context
if token in tokens_to_remove:
tokens = remove_from_list(tokens, token)
tokens_freq_results = token_freq(tokens)
def token_freq(tokens):
'''Given array of tokens, return OrderedDict with frequency count'''
# Count each word's occurance in tokens(list) and create dict
freq = Counter(tokens)
# Order dictionary by value(occurance count) highest to lowest
return( OrderedDict(sorted(freq.items(), key=lambda kv: kv[1], reverse=True )))
def top_most(vti_tokens, top_count=0):
'''Create a string from the top_count most Values in vti_tokens'''
count = 0
top_string = ''
for key,val in vti_tokens.items():
if top_count != 0 and count >= top_count:
top_string += ' ' + str(key)
def vti_summarize(vti_api_key, hash, max_strings=0):
# Call VTi, get malware report by Hash
vti_report = get_vti_report(vti_api_key, hash)
# Create an array with all VTi AntiVirus report descriptions
vti_result_descriptions = consolidate_vti_result(vti_report)
vti_result_string = ' '.join(vti_result_descriptions)
# Tokenize by spaces, clean up junk words and return ordered dict with frequency
vti_tokens = clean_tokenize_frequency(vti_result_string)
return(top_most(vti_tokens, max_strings))
if __name__ == "__main__":
hash = 'd1337b9e8bac0ee285492b89f895cadb' # Example hash for PWdump
summary = vti_summarize(VTI_API_KEY, hash, 15)
print(hash, summary)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment