LowriWilliams · August 17, 2020 10:33
diff --git a/adversarial_generation.ipynb b/adversarial_generation.ipynb
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "def  mlp_model(input_shape, input_ph=None, logits=False):\n",
    "    # \"\"\"145Generate a MultiLayer  Perceptron  model146\"\"\"\n",
    "    model = Sequential()\n",
    "\n",
    "    layers = [ \n",
    "        Dense(256, activation='relu', input_shape=input_shape),\n",
    "        Dropout(0.4),\n",
    "        Dense(256,  activation='relu'),\n",
    "        Dropout(0.4),\n",
    "        Dense(FLAGS.nb_classes),\n",
    "    ]\n",
    "    \n",
    "    for l in layers:\n",
    "        model.add(l)\n",
    "    \n",
    "    if logits:\n",
    "        logit_tensor = model(input_ph)\n",
    "\n",
    "    model.add(Activation(\"softmax\"))\n",
    "\n",
    "    model.compile(loss='categorical_crossentropy',\n",
    "    optimizer='adam',\n",
    "    metrics =['accuracy'])\n",
    "    model.summary()\n",
    "\n",
    "    if logits:\n",
    "        return model, logit_tensor\n",
    "    return model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "def  evaluate():\n",
    "    \"\"\"164Model  evaluation  function165\"\"\"\n",
    "    eval_params = {'batch_size': FLAGS.batch_size}\n",
    "    train_acc = model_eval(sess, x, y, predictions , X_train , y_train , args=eval_params)\n",
    "    test_acc = model_eval(sess, x, y, predictions , X_test , y_test , args=eval_params)\n",
    "    print('Train acc: {:.2f} Test  acc: {:.2f} '.format(train_acc, test_acc))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Instructions for updating:\n",
      "Colocations handled automatically by placer.\n",
      "Instructions for updating:\n",
      "Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.\n",
      "_________________________________________________________________\n",
      "Layer (type)                 Output Shape              Param #   \n",
      "=================================================================\n",
      "dense_1 (Dense)              (None, None, 256)         256256    \n",
      "_________________________________________________________________\n",
      "dropout_1 (Dropout)          (None, None, 256)         0         \n",
      "_________________________________________________________________\n",
      "dense_2 (Dense)              (None, None, 256)         65792     \n",
      "_________________________________________________________________\n",
      "dropout_2 (Dropout)          (None, None, 256)         0         \n",
      "_________________________________________________________________\n",
      "dense_3 (Dense)              (None, None, 2)           514       \n",
      "_________________________________________________________________\n",
      "activation_1 (Activation)    (None, None, 2)           0         \n",
      "=================================================================\n",
      "Total params: 322,562\n",
      "Trainable params: 322,562\n",
      "Non-trainable params: 0\n",
      "_________________________________________________________________\n",
      "Instructions for updating:\n",
      "dim is deprecated, use axis instead\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train acc: 0.86 Test  acc: 0.87 \n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Sequential = keras.models.Sequential\n",
    "Dense = keras.layers.Dense\n",
    "Dropout = keras.layers.Dropout\n",
    "Activation = keras.layers.Activation\n",
    "\n",
    "plt.style.use('bmh')\n",
    "flags = tf.app.flags\n",
    "FLAGS = flags.FLAGS\n",
    "flags.DEFINE_integer('nb_epochs', 1, 'Number  of  epochs  to train  model') # was 20\n",
    "flags.DEFINE_integer('batch_size', 256, 'Size of  training  batches ') # was 32\n",
    "flags.DEFINE_float('learning_rate', 0.1, 'Learning  rate  for  training ')\n",
    "flags.DEFINE_integer('nb_classes', y_train.shape[1], 'Number  of  classification  classes ')\n",
    "flags.DEFINE_integer('source_samples', X_train.shape[1], 'Nb of test  set  examples  to  attack ')\n",
    "\n",
    "FLAGS = flags.FLAGS\n",
    "\n",
    "# Tensorflow  placeholder  variables\n",
    "tf.compat.v1.flags.DEFINE_string('f','','')\n",
    "\n",
    "x = tf.compat.v1.placeholder(tf.float32, shape=(None, X_train.shape[1]))\n",
    "y = tf.compat.v1.placeholder(tf.float32, shape=(None, FLAGS.nb_classes))\n",
    "tf.compat.v1.set_random_seed(42)\n",
    "model = mlp_model((None, X_train.shape[1]))\n",
    "\n",
    "sess = tf.Session()\n",
    "keras.backend.set_session(sess)\n",
    "\n",
    "predictions = model(x)\n",
    "init = tf.global_variables_initializer()\n",
    "sess.run(init)\n",
    "\n",
    "# Train  the  model\n",
    "train_params = {\n",
    "    'nb_epochs': FLAGS.nb_epochs,\n",
    "    'batch_size': FLAGS.batch_size,\n",
    "    'learning_rate': FLAGS.learning_rate,\n",
    "    'verbose': 0}\n",
    "\n",
    "model_train(sess, x, y, predictions, X_train, y_train, evaluate=evaluate, args=train_params)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'keras.models.Sequential'>\n"
     ]
    }
   ],
   "source": [
    "# Generate  adversarial  samples  for  all  test  datapoints\n",
    "source_samples = X_test.shape[0]\n",
    "# Jacobian -based  Saliency  Map\n",
    "results = np.zeros((FLAGS.nb_classes, source_samples), dtype ='i')\n",
    "perturbations = np.zeros((FLAGS.nb_classes, source_samples), dtype ='f')\n",
    "grads = jacobian_graph(predictions, x, FLAGS.nb_classes)\n",
    "\n",
    "X_adv = np.zeros((source_samples, X_test.shape[1]))\n",
    "\n",
    "wrap = KerasModelWrapper(model)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Loop over the samples we want to perturb into adversarial examples\n",
    "samples_to_perturb = np.where(y_test[:,1] == 1)[0] # only malicious\n",
    "nb_classes = 2 # malicious or benign \n",
    "\n",
    "def model_pred(sess, x, predictions, samples):\n",
    "    feed_dict = {x: samples}\n",
    "    probabilities = sess.run(predictions, feed_dict)\n",
    "\n",
    "    print(probabilities, \"************\")\n",
    "\n",
    "    if samples.shape[0] == 1:\n",
    "        return np.argmax(probabilities)\n",
    "    else:\n",
    "        return np.argmax(probabilities, axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "def generate_adv_samples(samples_to_perturb, jsma_params):\n",
    "    adversarial_samples = []\n",
    "    samples_perturbed_idxs = []\n",
    "\n",
    "    for i, sample_ind in enumerate(samples_to_perturb):\n",
    "        sample = X_test[sample_ind: sample_ind+1]\n",
    "\n",
    "        # We want to find an adversarial example for each possible target class\n",
    "        # (i.e. all classes that differ from the label given in the dataset)\n",
    "        current_class = int(np.argmax(y_test[sample_ind]))\n",
    "        target = 1 - current_class\n",
    "\n",
    "        # This call runs the Jacobian-based saliency map approach\n",
    "        one_hot_target = np.zeros((1, nb_classes), dtype=np.float32)\n",
    "        one_hot_target[0, target] = 1\n",
    "        jsma_params['y_target'] = one_hot_target\n",
    "\n",
    "        adv_x = jsma.generate_np(sample, **jsma_params) # adversarial sample generated = adv_x\n",
    "        adversarial_samples.append(adv_x)\n",
    "        samples_perturbed_idxs.append(sample_ind)\n",
    "\n",
    "        # Check if success was achieved\n",
    "        adv_tgt = np.zeros((1, FLAGS.nb_classes)) # adversarial target = adv_tgt\n",
    "        adv_tgt[:,target] = 1\n",
    "        res = int(model_eval(sess, x, y, predictions, adv_x, adv_tgt, args={'batch_size': 1}))\n",
    "\n",
    "        # Compute number of modified features\n",
    "        adv_x_reshape = adv_x.reshape(-1)\n",
    "        test_in_reshape = X_test[sample_ind].reshape(-1)\n",
    "        nb_changed = np.where(adv_x_reshape != test_in_reshape)[0].shape[0]\n",
    "        percent_perturb = float(nb_changed) / adv_x.reshape(-1).shape[0]\n",
    "\n",
    "        # Update the arrays for later analysis\n",
    "        results[target, sample_ind] = res\n",
    "        perturbations[target, sample_ind] = percent_perturb\n",
    "\n",
    "    malicious_targets = np.zeros((len(adversarial_samples), 2))\n",
    "    malicious_targets[:, 1] = 1\n",
    "\n",
    "    adversarial_samples = np.stack(adversarial_samples).squeeze()\n",
    "    original_samples = X_test[np.array(samples_perturbed_idxs)]\n",
    "    \n",
    "    return adversarial_samples"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "source": [
    "gamma = []\n",
    "theta = []\n",
    "\n",
    "import itertools\n",
    "\n",
    "for i in range(1, 10):\n",
    "    gamma.append(i/10)\n",
    "    theta.append(i/10)\n",
    "\n",
    "combinations = list(itertools.product(gamma, theta))\n",
    "\n",
    "jsma = SaliencyMapMethod(wrap, sess=sess)\n",
    "\n",
    "final_results = []\n",
    "\n",
    "for i in combinations:\n",
    "    jsma_params = {'theta': i[1], 'gamma': i[0], 'clip_min': 0., 'clip_max': 1., 'y_target': None}\n",
    "    adversarial_samples = generate_adv_samples(samples_to_perturb, jsma_params)\n",
    "    adv_test = pd.DataFrame(adversarial_samples, columns = headers)\n",
    "\n",
    "    adv_test['label_not_spam'] = 0\n",
    "    adv_test['label_spam'] = 1\n",
    "\n",
    "    test = pd.DataFrame(X_test, columns = headers)\n",
    "    test['label_not_spam'] = y_test[:, 0]\n",
    "    test['label_spam'] = y_test[:, 1]\n",
    "\n",
    "    not_spam = test[test['label_not_spam'] == 1]\n",
    "\n",
    "    joined = not_spam.append(adv_test, ignore_index=True)\n",
    "\n",
    "    X_test_adv = np.array(joined[headers])\n",
    "    y_test_adv = np.array(joined[['label_not_spam', 'label_spam']])\n",
    "    \n",
    "    final_results.append(f1_score(y_test, classify(X_train, y_train, X_test_adv, y_test_adv), average='weighted'))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
 }
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 12,
	"metadata": {},
	"outputs": [],
	"source": [
	"def mlp_model(input_shape, input_ph=None, logits=False):\n",
	" # \"\"\"145Generate a MultiLayer Perceptron model146\"\"\"\n",
	" model = Sequential()\n",
	"\n",
	" layers = [ \n",
	" Dense(256, activation='relu', input_shape=input_shape),\n",
	" Dropout(0.4),\n",
	" Dense(256, activation='relu'),\n",
	" Dropout(0.4),\n",
	" Dense(FLAGS.nb_classes),\n",
	" ]\n",
	" \n",
	" for l in layers:\n",
	" model.add(l)\n",
	" \n",
	" if logits:\n",
	" logit_tensor = model(input_ph)\n",
	"\n",
	" model.add(Activation(\"softmax\"))\n",
	"\n",
	" model.compile(loss='categorical_crossentropy',\n",
	" optimizer='adam',\n",
	" metrics =['accuracy'])\n",
	" model.summary()\n",
	"\n",
	" if logits:\n",
	" return model, logit_tensor\n",
	" return model"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 13,
	"metadata": {},
	"outputs": [],
	"source": [
	"def evaluate():\n",
	" \"\"\"164Model evaluation function165\"\"\"\n",
	" eval_params = {'batch_size': FLAGS.batch_size}\n",
	" train_acc = model_eval(sess, x, y, predictions , X_train , y_train , args=eval_params)\n",
	" test_acc = model_eval(sess, x, y, predictions , X_test , y_test , args=eval_params)\n",
	" print('Train acc: {:.2f} Test acc: {:.2f} '.format(train_acc, test_acc))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 14,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Instructions for updating:\n",
	"Colocations handled automatically by placer.\n",
	"Instructions for updating:\n",
	"Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.\n",
	"_________________________________________________________________\n",
	"Layer (type) Output Shape Param # \n",
	"=================================================================\n",
	"dense_1 (Dense) (None, None, 256) 256256 \n",
	"_________________________________________________________________\n",
	"dropout_1 (Dropout) (None, None, 256) 0 \n",
	"_________________________________________________________________\n",
	"dense_2 (Dense) (None, None, 256) 65792 \n",
	"_________________________________________________________________\n",
	"dropout_2 (Dropout) (None, None, 256) 0 \n",
	"_________________________________________________________________\n",
	"dense_3 (Dense) (None, None, 2) 514 \n",
	"_________________________________________________________________\n",
	"activation_1 (Activation) (None, None, 2) 0 \n",
	"=================================================================\n",
	"Total params: 322,562\n",
	"Trainable params: 322,562\n",
	"Non-trainable params: 0\n",
	"_________________________________________________________________\n",
	"Instructions for updating:\n",
	"dim is deprecated, use axis instead\n"
	]
	},
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Train acc: 0.86 Test acc: 0.87 \n"
	]
	},
	{
	"data": {
	"text/plain": [
	"True"
	]
	},
	"execution_count": 14,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"Sequential = keras.models.Sequential\n",
	"Dense = keras.layers.Dense\n",
	"Dropout = keras.layers.Dropout\n",
	"Activation = keras.layers.Activation\n",
	"\n",
	"plt.style.use('bmh')\n",
	"flags = tf.app.flags\n",
	"FLAGS = flags.FLAGS\n",
	"flags.DEFINE_integer('nb_epochs', 1, 'Number of epochs to train model') # was 20\n",
	"flags.DEFINE_integer('batch_size', 256, 'Size of training batches ') # was 32\n",
	"flags.DEFINE_float('learning_rate', 0.1, 'Learning rate for training ')\n",
	"flags.DEFINE_integer('nb_classes', y_train.shape[1], 'Number of classification classes ')\n",
	"flags.DEFINE_integer('source_samples', X_train.shape[1], 'Nb of test set examples to attack ')\n",
	"\n",
	"FLAGS = flags.FLAGS\n",
	"\n",
	"# Tensorflow placeholder variables\n",
	"tf.compat.v1.flags.DEFINE_string('f','','')\n",
	"\n",
	"x = tf.compat.v1.placeholder(tf.float32, shape=(None, X_train.shape[1]))\n",
	"y = tf.compat.v1.placeholder(tf.float32, shape=(None, FLAGS.nb_classes))\n",
	"tf.compat.v1.set_random_seed(42)\n",
	"model = mlp_model((None, X_train.shape[1]))\n",
	"\n",
	"sess = tf.Session()\n",
	"keras.backend.set_session(sess)\n",
	"\n",
	"predictions = model(x)\n",
	"init = tf.global_variables_initializer()\n",
	"sess.run(init)\n",
	"\n",
	"# Train the model\n",
	"train_params = {\n",
	" 'nb_epochs': FLAGS.nb_epochs,\n",
	" 'batch_size': FLAGS.batch_size,\n",
	" 'learning_rate': FLAGS.learning_rate,\n",
	" 'verbose': 0}\n",
	"\n",
	"model_train(sess, x, y, predictions, X_train, y_train, evaluate=evaluate, args=train_params)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 15,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"<class 'keras.models.Sequential'>\n"
	]
	}
	],
	"source": [
	"# Generate adversarial samples for all test datapoints\n",
	"source_samples = X_test.shape[0]\n",
	"# Jacobian -based Saliency Map\n",
	"results = np.zeros((FLAGS.nb_classes, source_samples), dtype ='i')\n",
	"perturbations = np.zeros((FLAGS.nb_classes, source_samples), dtype ='f')\n",
	"grads = jacobian_graph(predictions, x, FLAGS.nb_classes)\n",
	"\n",
	"X_adv = np.zeros((source_samples, X_test.shape[1]))\n",
	"\n",
	"wrap = KerasModelWrapper(model)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 16,
	"metadata": {},
	"outputs": [],
	"source": [
	"# Loop over the samples we want to perturb into adversarial examples\n",
	"samples_to_perturb = np.where(y_test[:,1] == 1)[0] # only malicious\n",
	"nb_classes = 2 # malicious or benign \n",
	"\n",
	"def model_pred(sess, x, predictions, samples):\n",
	" feed_dict = {x: samples}\n",
	" probabilities = sess.run(predictions, feed_dict)\n",
	"\n",
	" print(probabilities, \"************\")\n",
	"\n",
	" if samples.shape[0] == 1:\n",
	" return np.argmax(probabilities)\n",
	" else:\n",
	" return np.argmax(probabilities, axis=1)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 17,
	"metadata": {},
	"outputs": [],
	"source": [
	"def generate_adv_samples(samples_to_perturb, jsma_params):\n",
	" adversarial_samples = []\n",
	" samples_perturbed_idxs = []\n",
	"\n",
	" for i, sample_ind in enumerate(samples_to_perturb):\n",
	" sample = X_test[sample_ind: sample_ind+1]\n",
	"\n",
	" # We want to find an adversarial example for each possible target class\n",
	" # (i.e. all classes that differ from the label given in the dataset)\n",
	" current_class = int(np.argmax(y_test[sample_ind]))\n",
	" target = 1 - current_class\n",
	"\n",
	" # This call runs the Jacobian-based saliency map approach\n",
	" one_hot_target = np.zeros((1, nb_classes), dtype=np.float32)\n",
	" one_hot_target[0, target] = 1\n",
	" jsma_params['y_target'] = one_hot_target\n",
	"\n",
	" adv_x = jsma.generate_np(sample, **jsma_params) # adversarial sample generated = adv_x\n",
	" adversarial_samples.append(adv_x)\n",
	" samples_perturbed_idxs.append(sample_ind)\n",
	"\n",
	" # Check if success was achieved\n",
	" adv_tgt = np.zeros((1, FLAGS.nb_classes)) # adversarial target = adv_tgt\n",
	" adv_tgt[:,target] = 1\n",
	" res = int(model_eval(sess, x, y, predictions, adv_x, adv_tgt, args={'batch_size': 1}))\n",
	"\n",
	" # Compute number of modified features\n",
	" adv_x_reshape = adv_x.reshape(-1)\n",
	" test_in_reshape = X_test[sample_ind].reshape(-1)\n",
	" nb_changed = np.where(adv_x_reshape != test_in_reshape)[0].shape[0]\n",
	" percent_perturb = float(nb_changed) / adv_x.reshape(-1).shape[0]\n",
	"\n",
	" # Update the arrays for later analysis\n",
	" results[target, sample_ind] = res\n",
	" perturbations[target, sample_ind] = percent_perturb\n",
	"\n",
	" malicious_targets = np.zeros((len(adversarial_samples), 2))\n",
	" malicious_targets[:, 1] = 1\n",
	"\n",
	" adversarial_samples = np.stack(adversarial_samples).squeeze()\n",
	" original_samples = X_test[np.array(samples_perturbed_idxs)]\n",
	" \n",
	" return adversarial_samples"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 19,
	"metadata": {},
	"source": [
	"gamma = []\n",
	"theta = []\n",
	"\n",
	"import itertools\n",
	"\n",
	"for i in range(1, 10):\n",
	" gamma.append(i/10)\n",
	" theta.append(i/10)\n",
	"\n",
	"combinations = list(itertools.product(gamma, theta))\n",
	"\n",
	"jsma = SaliencyMapMethod(wrap, sess=sess)\n",
	"\n",
	"final_results = []\n",
	"\n",
	"for i in combinations:\n",
	" jsma_params = {'theta': i[1], 'gamma': i[0], 'clip_min': 0., 'clip_max': 1., 'y_target': None}\n",
	" adversarial_samples = generate_adv_samples(samples_to_perturb, jsma_params)\n",
	" adv_test = pd.DataFrame(adversarial_samples, columns = headers)\n",
	"\n",
	" adv_test['label_not_spam'] = 0\n",
	" adv_test['label_spam'] = 1\n",
	"\n",
	" test = pd.DataFrame(X_test, columns = headers)\n",
	" test['label_not_spam'] = y_test[:, 0]\n",
	" test['label_spam'] = y_test[:, 1]\n",
	"\n",
	" not_spam = test[test['label_not_spam'] == 1]\n",
	"\n",
	" joined = not_spam.append(adv_test, ignore_index=True)\n",
	"\n",
	" X_test_adv = np.array(joined[headers])\n",
	" y_test_adv = np.array(joined[['label_not_spam', 'label_spam']])\n",
	" \n",
	" final_results.append(f1_score(y_test, classify(X_train, y_train, X_test_adv, y_test_adv), average='weighted'))"
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.7.2"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}