Last active
June 6, 2017 07:26
-
-
Save cestella/8dd83031b8898a732b6a5a60fce1b616 to your computer and use it in GitHub Desktop.
REST API for the DGA model from https://github.com/ClickSecurity/data_hacking/tree/master/dga_detection
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sklearn | |
import numpy as np | |
import math | |
import pickle | |
import collections | |
class DGA: | |
def __init__(self): | |
self.model = { 'clf': pickle.loads(open('./dga_model_random_forest.model','rb').read()) | |
, 'alexa_vc': pickle.loads(open('./dga_model_alexa_vectorizor.model','rb').read()) | |
, 'alexa_counts': pickle.loads(open('./dga_model_alexa_counts.model','rb').read()) | |
, 'dict_vc': pickle.loads(open('./dga_model_dict_vectorizor.model','rb').read()) | |
, 'dict_counts': pickle.loads(open('./dga_model_dict_counts.model','rb').read()) } | |
def evaluate_domain(self, domain): | |
alexa_match = self.model['alexa_counts'] * self.model['alexa_vc'].transform([domain]).T | |
dict_match = self.model['dict_counts'] * self.model['dict_vc'].transform([domain]).T | |
# Assemble feature matrix (for just one domain) | |
X = [len(domain), self.entropy(domain), alexa_match, dict_match] | |
y_pred = self.model['clf'].predict([ X ])[0] | |
return y_pred | |
def entropy(self, s): | |
p, lns = collections.Counter(s), float(len(s)) | |
return -sum( count/lns * math.log(count/lns, 2) for count in p.values()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import model | |
from flask import Flask | |
from flask import request,jsonify | |
import socket | |
app = Flask(__name__) | |
@app.route("/apply", methods=['GET']) | |
def predict(): | |
# We expect one argument, the hostname without TLD. | |
h = request.args.get('host') | |
r = {} | |
r['is_malicious'] = model.evaluate_domain(h) | |
# We will return a JSON map with one field, 'is_malicious' which will be | |
# 'legit' or 'dga', the two possible outputs of our model. | |
return jsonify(r) | |
if __name__ == "__main__": | |
# Create my model object that I want to expose. | |
model = model.DGA() | |
# In order to register with model as a service, we need to bind to a port | |
# and inform the discovery service of the endpoint. Therefore, | |
# we will bind to a port and close the socket to reserve it. | |
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) | |
sock.bind(('localhost', 0)) | |
port = sock.getsockname()[1] | |
sock.close() | |
with open("endpoint.dat", "w") as text_file: | |
# To inform the discovery service, we need to write a file with a simple | |
# JSON Map indicating the full URL that we've bound to. | |
text_file.write("{\"url\" : \"http://0.0.0.0:%d\"}" % port) | |
# Make sure flask uses the port we reserved | |
app.run(threaded=True, host="0.0.0.0", port=port) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
PYTHONPATH="${PYTHONPATH}:." /opt/anaconda/bin/python rest.py |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
The model pickle files referenced by
model.py
's constructor are the ones output from https://github.com/ClickSecurity/data_hacking/blob/master/dga_detection/dga_model_gen.py