Last active
August 17, 2020 10:33
-
-
Save LowriWilliams/9c6ad6f98603561d5a9724564948684a to your computer and use it in GitHub Desktop.
sms_adversarial/adversarial_generation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "code", | |
| "execution_count": 12, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "def mlp_model(input_shape, input_ph=None, logits=False):\n", | |
| " # \"\"\"145Generate a MultiLayer Perceptron model146\"\"\"\n", | |
| " model = Sequential()\n", | |
| "\n", | |
| " layers = [ \n", | |
| " Dense(256, activation='relu', input_shape=input_shape),\n", | |
| " Dropout(0.4),\n", | |
| " Dense(256, activation='relu'),\n", | |
| " Dropout(0.4),\n", | |
| " Dense(FLAGS.nb_classes),\n", | |
| " ]\n", | |
| " \n", | |
| " for l in layers:\n", | |
| " model.add(l)\n", | |
| " \n", | |
| " if logits:\n", | |
| " logit_tensor = model(input_ph)\n", | |
| "\n", | |
| " model.add(Activation(\"softmax\"))\n", | |
| "\n", | |
| " model.compile(loss='categorical_crossentropy',\n", | |
| " optimizer='adam',\n", | |
| " metrics =['accuracy'])\n", | |
| " model.summary()\n", | |
| "\n", | |
| " if logits:\n", | |
| " return model, logit_tensor\n", | |
| " return model" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 13, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "def evaluate():\n", | |
| " \"\"\"164Model evaluation function165\"\"\"\n", | |
| " eval_params = {'batch_size': FLAGS.batch_size}\n", | |
| " train_acc = model_eval(sess, x, y, predictions , X_train , y_train , args=eval_params)\n", | |
| " test_acc = model_eval(sess, x, y, predictions , X_test , y_test , args=eval_params)\n", | |
| " print('Train acc: {:.2f} Test acc: {:.2f} '.format(train_acc, test_acc))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 14, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Instructions for updating:\n", | |
| "Colocations handled automatically by placer.\n", | |
| "Instructions for updating:\n", | |
| "Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.\n", | |
| "_________________________________________________________________\n", | |
| "Layer (type) Output Shape Param # \n", | |
| "=================================================================\n", | |
| "dense_1 (Dense) (None, None, 256) 256256 \n", | |
| "_________________________________________________________________\n", | |
| "dropout_1 (Dropout) (None, None, 256) 0 \n", | |
| "_________________________________________________________________\n", | |
| "dense_2 (Dense) (None, None, 256) 65792 \n", | |
| "_________________________________________________________________\n", | |
| "dropout_2 (Dropout) (None, None, 256) 0 \n", | |
| "_________________________________________________________________\n", | |
| "dense_3 (Dense) (None, None, 2) 514 \n", | |
| "_________________________________________________________________\n", | |
| "activation_1 (Activation) (None, None, 2) 0 \n", | |
| "=================================================================\n", | |
| "Total params: 322,562\n", | |
| "Trainable params: 322,562\n", | |
| "Non-trainable params: 0\n", | |
| "_________________________________________________________________\n", | |
| "Instructions for updating:\n", | |
| "dim is deprecated, use axis instead\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Train acc: 0.86 Test acc: 0.87 \n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "True" | |
| ] | |
| }, | |
| "execution_count": 14, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "Sequential = keras.models.Sequential\n", | |
| "Dense = keras.layers.Dense\n", | |
| "Dropout = keras.layers.Dropout\n", | |
| "Activation = keras.layers.Activation\n", | |
| "\n", | |
| "plt.style.use('bmh')\n", | |
| "flags = tf.app.flags\n", | |
| "FLAGS = flags.FLAGS\n", | |
| "flags.DEFINE_integer('nb_epochs', 1, 'Number of epochs to train model') # was 20\n", | |
| "flags.DEFINE_integer('batch_size', 256, 'Size of training batches ') # was 32\n", | |
| "flags.DEFINE_float('learning_rate', 0.1, 'Learning rate for training ')\n", | |
| "flags.DEFINE_integer('nb_classes', y_train.shape[1], 'Number of classification classes ')\n", | |
| "flags.DEFINE_integer('source_samples', X_train.shape[1], 'Nb of test set examples to attack ')\n", | |
| "\n", | |
| "FLAGS = flags.FLAGS\n", | |
| "\n", | |
| "# Tensorflow placeholder variables\n", | |
| "tf.compat.v1.flags.DEFINE_string('f','','')\n", | |
| "\n", | |
| "x = tf.compat.v1.placeholder(tf.float32, shape=(None, X_train.shape[1]))\n", | |
| "y = tf.compat.v1.placeholder(tf.float32, shape=(None, FLAGS.nb_classes))\n", | |
| "tf.compat.v1.set_random_seed(42)\n", | |
| "model = mlp_model((None, X_train.shape[1]))\n", | |
| "\n", | |
| "sess = tf.Session()\n", | |
| "keras.backend.set_session(sess)\n", | |
| "\n", | |
| "predictions = model(x)\n", | |
| "init = tf.global_variables_initializer()\n", | |
| "sess.run(init)\n", | |
| "\n", | |
| "# Train the model\n", | |
| "train_params = {\n", | |
| " 'nb_epochs': FLAGS.nb_epochs,\n", | |
| " 'batch_size': FLAGS.batch_size,\n", | |
| " 'learning_rate': FLAGS.learning_rate,\n", | |
| " 'verbose': 0}\n", | |
| "\n", | |
| "model_train(sess, x, y, predictions, X_train, y_train, evaluate=evaluate, args=train_params)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 15, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "<class 'keras.models.Sequential'>\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "# Generate adversarial samples for all test datapoints\n", | |
| "source_samples = X_test.shape[0]\n", | |
| "# Jacobian -based Saliency Map\n", | |
| "results = np.zeros((FLAGS.nb_classes, source_samples), dtype ='i')\n", | |
| "perturbations = np.zeros((FLAGS.nb_classes, source_samples), dtype ='f')\n", | |
| "grads = jacobian_graph(predictions, x, FLAGS.nb_classes)\n", | |
| "\n", | |
| "X_adv = np.zeros((source_samples, X_test.shape[1]))\n", | |
| "\n", | |
| "wrap = KerasModelWrapper(model)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 16, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "# Loop over the samples we want to perturb into adversarial examples\n", | |
| "samples_to_perturb = np.where(y_test[:,1] == 1)[0] # only malicious\n", | |
| "nb_classes = 2 # malicious or benign \n", | |
| "\n", | |
| "def model_pred(sess, x, predictions, samples):\n", | |
| " feed_dict = {x: samples}\n", | |
| " probabilities = sess.run(predictions, feed_dict)\n", | |
| "\n", | |
| " print(probabilities, \"************\")\n", | |
| "\n", | |
| " if samples.shape[0] == 1:\n", | |
| " return np.argmax(probabilities)\n", | |
| " else:\n", | |
| " return np.argmax(probabilities, axis=1)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 17, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "def generate_adv_samples(samples_to_perturb, jsma_params):\n", | |
| " adversarial_samples = []\n", | |
| " samples_perturbed_idxs = []\n", | |
| "\n", | |
| " for i, sample_ind in enumerate(samples_to_perturb):\n", | |
| " sample = X_test[sample_ind: sample_ind+1]\n", | |
| "\n", | |
| " # We want to find an adversarial example for each possible target class\n", | |
| " # (i.e. all classes that differ from the label given in the dataset)\n", | |
| " current_class = int(np.argmax(y_test[sample_ind]))\n", | |
| " target = 1 - current_class\n", | |
| "\n", | |
| " # This call runs the Jacobian-based saliency map approach\n", | |
| " one_hot_target = np.zeros((1, nb_classes), dtype=np.float32)\n", | |
| " one_hot_target[0, target] = 1\n", | |
| " jsma_params['y_target'] = one_hot_target\n", | |
| "\n", | |
| " adv_x = jsma.generate_np(sample, **jsma_params) # adversarial sample generated = adv_x\n", | |
| " adversarial_samples.append(adv_x)\n", | |
| " samples_perturbed_idxs.append(sample_ind)\n", | |
| "\n", | |
| " # Check if success was achieved\n", | |
| " adv_tgt = np.zeros((1, FLAGS.nb_classes)) # adversarial target = adv_tgt\n", | |
| " adv_tgt[:,target] = 1\n", | |
| " res = int(model_eval(sess, x, y, predictions, adv_x, adv_tgt, args={'batch_size': 1}))\n", | |
| "\n", | |
| " # Compute number of modified features\n", | |
| " adv_x_reshape = adv_x.reshape(-1)\n", | |
| " test_in_reshape = X_test[sample_ind].reshape(-1)\n", | |
| " nb_changed = np.where(adv_x_reshape != test_in_reshape)[0].shape[0]\n", | |
| " percent_perturb = float(nb_changed) / adv_x.reshape(-1).shape[0]\n", | |
| "\n", | |
| " # Update the arrays for later analysis\n", | |
| " results[target, sample_ind] = res\n", | |
| " perturbations[target, sample_ind] = percent_perturb\n", | |
| "\n", | |
| " malicious_targets = np.zeros((len(adversarial_samples), 2))\n", | |
| " malicious_targets[:, 1] = 1\n", | |
| "\n", | |
| " adversarial_samples = np.stack(adversarial_samples).squeeze()\n", | |
| " original_samples = X_test[np.array(samples_perturbed_idxs)]\n", | |
| " \n", | |
| " return adversarial_samples" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 19, | |
| "metadata": {}, | |
| "source": [ | |
| "gamma = []\n", | |
| "theta = []\n", | |
| "\n", | |
| "import itertools\n", | |
| "\n", | |
| "for i in range(1, 10):\n", | |
| " gamma.append(i/10)\n", | |
| " theta.append(i/10)\n", | |
| "\n", | |
| "combinations = list(itertools.product(gamma, theta))\n", | |
| "\n", | |
| "jsma = SaliencyMapMethod(wrap, sess=sess)\n", | |
| "\n", | |
| "final_results = []\n", | |
| "\n", | |
| "for i in combinations:\n", | |
| " jsma_params = {'theta': i[1], 'gamma': i[0], 'clip_min': 0., 'clip_max': 1., 'y_target': None}\n", | |
| " adversarial_samples = generate_adv_samples(samples_to_perturb, jsma_params)\n", | |
| " adv_test = pd.DataFrame(adversarial_samples, columns = headers)\n", | |
| "\n", | |
| " adv_test['label_not_spam'] = 0\n", | |
| " adv_test['label_spam'] = 1\n", | |
| "\n", | |
| " test = pd.DataFrame(X_test, columns = headers)\n", | |
| " test['label_not_spam'] = y_test[:, 0]\n", | |
| " test['label_spam'] = y_test[:, 1]\n", | |
| "\n", | |
| " not_spam = test[test['label_not_spam'] == 1]\n", | |
| "\n", | |
| " joined = not_spam.append(adv_test, ignore_index=True)\n", | |
| "\n", | |
| " X_test_adv = np.array(joined[headers])\n", | |
| " y_test_adv = np.array(joined[['label_not_spam', 'label_spam']])\n", | |
| " \n", | |
| " final_results.append(f1_score(y_test, classify(X_train, y_train, X_test_adv, y_test_adv), average='weighted'))" | |
| ] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 3", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.7.2" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 2 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment