Created
November 25, 2018 06:38
-
-
Save kohnakagawa/7ed2c1bb61b523bf50e455e79054a2e3 to your computer and use it in GitHub Desktop.
pehash imphashを計算するためのツール
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import pandas as pd | |
| import numpy as np | |
| import sys | |
| from hashlib import sha256 | |
| from pefile import PE | |
| from pehash import totalhash_hex, anymaster_hex, anymaster_v1_0_1_hex | |
| from pehash import endgame_hex, crits_hex | |
| from sklearn.metrics import homogeneity_score | |
| HASH_FUNCTIONS = [ | |
| anymaster_hex, anymaster_v1_0_1_hex, | |
| endgame_hex, crits_hex, imphash_hex, | |
| ssdeep_hex, impfuzzy_hex | |
| ] | |
| HASH_NAMES = [ | |
| "anymaster", "anymaster_v1_0_1", | |
| "endgame", "crits", "imphash" | |
| "ssdeep", "impfuzzy" | |
| ] | |
| def imphash_hex(pe): | |
| return pe.get_imphash() | |
| def show_clustering_result(result): | |
| for hash_ in HASH_NAMES: | |
| print("{} {}".format(hash_, homogeneity_score(result["class"].values, result[hash_].values))) | |
| def calc_hashes(row): | |
| fname = row["path"] | |
| exe = PE(fname) | |
| result = {hash_name: hash_func(pe=exe) for hash_name, hash_func in zip(HASH_NAMES, HASH_FUNCTIONS)} | |
| with open(fname, "rb") as f: | |
| result["sha256"] = sha256(f.read()).hexdigest() | |
| return pd.Series(result) | |
| def main(): | |
| if len(sys.argv) != 2: | |
| print("Usage: {} label.csv".format(sys.argv[0])) | |
| sys.exit(1) | |
| label_file = sys.argv[1] | |
| df = pd.read_csv(label_file) | |
| df = pd.concat([df, df.apply(calc_hashes, axis=1)], axis=1) | |
| show_clustering_result(df) | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment