Skip to content

Instantly share code, notes, and snippets.

@kohnakagawa
Created November 25, 2018 06:38
Show Gist options
  • Save kohnakagawa/7ed2c1bb61b523bf50e455e79054a2e3 to your computer and use it in GitHub Desktop.
Save kohnakagawa/7ed2c1bb61b523bf50e455e79054a2e3 to your computer and use it in GitHub Desktop.
pehash imphashを計算するためのツール
import pandas as pd
import numpy as np
import sys
from hashlib import sha256
from pefile import PE
from pehash import totalhash_hex, anymaster_hex, anymaster_v1_0_1_hex
from pehash import endgame_hex, crits_hex
from sklearn.metrics import homogeneity_score
HASH_FUNCTIONS = [
anymaster_hex, anymaster_v1_0_1_hex,
endgame_hex, crits_hex, imphash_hex,
ssdeep_hex, impfuzzy_hex
]
HASH_NAMES = [
"anymaster", "anymaster_v1_0_1",
"endgame", "crits", "imphash"
"ssdeep", "impfuzzy"
]
def imphash_hex(pe):
return pe.get_imphash()
def show_clustering_result(result):
for hash_ in HASH_NAMES:
print("{} {}".format(hash_, homogeneity_score(result["class"].values, result[hash_].values)))
def calc_hashes(row):
fname = row["path"]
exe = PE(fname)
result = {hash_name: hash_func(pe=exe) for hash_name, hash_func in zip(HASH_NAMES, HASH_FUNCTIONS)}
with open(fname, "rb") as f:
result["sha256"] = sha256(f.read()).hexdigest()
return pd.Series(result)
def main():
if len(sys.argv) != 2:
print("Usage: {} label.csv".format(sys.argv[0]))
sys.exit(1)
label_file = sys.argv[1]
df = pd.read_csv(label_file)
df = pd.concat([df, df.apply(calc_hashes, axis=1)], axis=1)
show_clustering_result(df)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment