-
-
Save mwiewior/3b2b15ed4f095bac56de0c9cbb2c4278 to your computer and use it in GitHub Desktop.
REST API for the DGA model from https://github.com/ClickSecurity/data_hacking/tree/master/dga_detection
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sklearn | |
import numpy as np | |
import math | |
import pickle | |
import collections | |
class DGA: | |
def __init__(self): | |
self.model = { 'clf': pickle.loads(open('./dga_model_random_forest.model','rb').read()) | |
, 'alexa_vc': pickle.loads(open('./dga_model_alexa_vectorizor.model','rb').read()) | |
, 'alexa_counts': pickle.loads(open('./dga_model_alexa_counts.model','rb').read()) | |
, 'dict_vc': pickle.loads(open('./dga_model_dict_vectorizor.model','rb').read()) | |
, 'dict_counts': pickle.loads(open('./dga_model_dict_counts.model','rb').read()) } | |
def evaluate_domain(self, domain): | |
alexa_match = self.model['alexa_counts'] * self.model['alexa_vc'].transform([domain]).T | |
dict_match = self.model['dict_counts'] * self.model['dict_vc'].transform([domain]).T | |
# Assemble feature matrix (for just one domain) | |
X = [len(domain), self.entropy(domain), alexa_match, dict_match] | |
y_pred = self.model['clf'].predict([ X ])[0] | |
return y_pred | |
def entropy(self, s): | |
p, lns = collections.Counter(s), float(len(s)) | |
return -sum( count/lns * math.log(count/lns, 2) for count in p.values()) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import model | |
from flask import Flask | |
from flask import request,jsonify | |
import socket | |
app = Flask(__name__) | |
@app.route("/apply", methods=['GET']) | |
def predict(): | |
# We expect one argument, the hostname without TLD. | |
h = request.args.get('host') | |
r = {} | |
r['is_malicious'] = model.evaluate_domain(h) | |
# We will return a JSON map with one field, 'is_malicious' which will be | |
# 'legit' or 'dga', the two possible outputs of our model. | |
return jsonify(r) | |
if __name__ == "__main__": | |
# Create my model object that I want to expose. | |
model = model.DGA() | |
# In order to register with model as a service, we need to bind to a port | |
# and inform the discovery service of the endpoint. Therefore, | |
# we will bind to a port and close the socket to reserve it. | |
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) | |
sock.bind(('localhost', 0)) | |
port = sock.getsockname()[1] | |
sock.close() | |
with open("endpoint.dat", "w") as text_file: | |
# To inform the discovery service, we need to write a file with a simple | |
# JSON Map indicating the full URL that we've bound to. | |
text_file.write("{\"url\" : \"http://0.0.0.0:%d\"}" % port) | |
# Make sure flask uses the port we reserved | |
app.run(threaded=True, host="0.0.0.0", port=port) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
PYTHONPATH="${PYTHONPATH}:." /opt/anaconda/bin/python rest.py |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment