Skip to content

Instantly share code, notes, and snippets.

@juan-fdz-hawa
Created June 28, 2022 01:16
Show Gist options
  • Save juan-fdz-hawa/a3eb1cf33f149f7473a37469ecb9feda to your computer and use it in GitHub Desktop.
Save juan-fdz-hawa/a3eb1cf33f149f7473a37469ecb9feda to your computer and use it in GitHub Desktop.
import json
import csv
from collections import defaultdict
from difflib import SequenceMatcher
from functools import lru_cache
nvd = None
with open('extract.json', 'r') as f_handler:
nvd = json.load(f_handler)
bundles = defaultdict(set)
with open('bundles.csv', 'r') as f_handler:
reader = csv.reader(f_handler)
# Skip header
next(reader, None)
for row in reader:
# Take everything up to the last part, this is how iOS 7 computes the 'vendor id'
# https://developer.apple.com/documentation/uikit/uidevice/1620059-identifierforvendor
bundle_id_parts = row[1].split('.')
vendor = '.'.join(bundle_id_parts[:len(bundle_id_parts)-1])
exec_name = row[0]
bundle_name = row[2]
bundles[vendor].add((exec_name, bundle_name,))
software_found = []
software_not_found = []
matched_vendors = {}
# Should black list all top level domains
vendor_terms_blacklist = {
'com',
'app',
'us',
'net',
'id',
'ui',
'ru',
}
vendor_terms_overrides = {
'postmanlabs': 'getpostman',
'tinyspeck': 'slack',
'getdropbox': 'dropbox',
'keepcoder': 'telegram',
'virtualbox': 'oracle',
'Cisco-Systems': 'cisco',
'kovidgoyal': 'calibre-ebook',
}
for b in bundles:
found = False
for p in b.split('.'):
p = vendor_terms_overrides.get(p, p).lower()
if p in vendor_terms_blacklist:
continue
if p in nvd:
found = True
software_found.append(f"Found: {p} - {b}")
matched_vendors[b] = p
break
if not found:
software_not_found.append(f"Not found: {b}")
print("----------------------------")
print("VENDORS")
print("----------------------------")
for v in software_found:
print(v)
for v in software_not_found:
print(v)
print(len(software_found), len(bundles))
print("\n\n----------------------------")
print("Apps")
print("----------------------------")
software_found = []
software_not_found = []
apps_override = {
'oracle': {
'VirtualBox': 'vm_virtualbox',
},
'agilebits': {
'1Password 7':'1password',
},
'zoom': {
'zoom.us': 'zoom',
},
'microsoft': {
'Microsoft AutoUpdate': 'autoupdate',
'Microsoft Edge': 'edge',
'Code': 'visual_studio_code',
},
'osquery': {
'osqueryd': 'osquery'
}
}
for v1, v2 in matched_vendors.items():
installed = bundles[v1]
nvd_entries = nvd[v2]
for exec_name, bundle_name in installed:
n_exec_name = exec_name
n_bundle_name = bundle_name
if v2 in apps_override:
n_exec_name = apps_override[v2].get(exec_name, exec_name)
n_bundle_name = apps_override[v2].get(bundle_name, bundle_name)
n_exec_name = n_exec_name.lower().replace(' ', '_').replace('-', '_')
n_bundle_name = n_bundle_name.lower().replace(' ', '_').replace('-', '_')
found = False
for nvd_e in nvd_entries:
if nvd_e == n_exec_name:
software_found.append(f"Vendor: {v2}, Exec name: {exec_name}, NVD entry: {nvd_e}")
found = True
break
if nvd_e == n_bundle_name:
software_found.append(f"Vendor: {v2}, Bundle name: {bundle_name}, NVD entry: {nvd_e}")
found = True
break
# Approximate
# if not found:
# threshold = 0.8
# for nvd_e in nvd_entries:
# ratio = SequenceMatcher(None, nvd_e, exec_name).ratio()
# if ratio > threshold:
# found = True
# software_found.append(f"Vendor: {v2}, Exec name: {exec_name}, NVD: {nvd_e}, Ratio: {ratio}")
# break
# ratio = SequenceMatcher(None, nvd_e, bundle_name).ratio()
# if ratio > threshold:
# found = True
# software_found.append(f"Vendor: {v2}, Bundle name: {exec_name}, NVD: {nvd_e}, Ratio: {ratio}")
# break
if not found:
software_not_found.append(f"Vendor: {v2}, Exec name: {exec_name}, Bundle name: {bundle_name}")
software_found.sort()
for v in software_found:
print(f"Found: {v}")
software_not_found.sort()
for v in software_not_found:
print(f"Not found: {v}")
print(f"Found: {len(software_found)} Not found: {len(software_not_found)}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment