Created
August 8, 2021 10:20
-
-
Save unrealwill/c480371c3a4bf3abb29856c29197c0be to your computer and use it in GitHub Desktop.
Proof of Concept : generating collisions on a neural perceptual hash
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import tensorflow as tf #We need tensorflow 2.x | |
import numpy as np | |
#The hashlength in bits | |
hashLength = 256 | |
def buildModel(): | |
#we can set the seed to simulate the fact that this network is known and doesn't change between runs | |
#tf.random.set_seed(42) | |
model = tf.keras.Sequential() | |
model.add(tf.keras.Input(shape=(1000))) | |
model.add(tf.keras.layers.Dense(300,activation=tf.nn.selu)) | |
model.add(tf.keras.layers.Dense(300, activation=tf.nn.selu)) | |
model.add(tf.keras.layers.Dense(300, activation=tf.nn.selu)) | |
#The last layer contains the LSH Random hyperplanes | |
model.add(tf.keras.layers.Dense(hashLength)) | |
return model | |
#This use Random projection LSH (aka "HyperPlane LSH") on the features generated by the model | |
def computeNeuralHash(m, img): | |
hash = m(img).numpy()[0] | |
targetstringhash = "".join(["1" if x > 0 else "0" for x in hash]) | |
return targetstringhash | |
def demo( ): | |
m = buildModel() | |
#np.random.seed(340) | |
targetimg = np.expand_dims(np.random.randn(1000),0) | |
print(targetimg.shape) | |
targetstringhash = computeNeuralHash(m,targetimg) | |
print("targetstringhash : ") | |
print(targetstringhash) | |
flip = [ 1.0 if x=="0" else -1.0 for x in targetstringhash] | |
print( "flip : ") | |
print(flip) | |
img = np.expand_dims(np.random.randn(1000), 0) | |
#to make sure the hash is more stable we add a gap | |
gap = 0.1 | |
#when the network is trying to have a 1 for the kth bit, it will try to have the feature in the range [gap, +infinity] | |
#when the network is trying to have a 0 for the kth bit, it will try to have the feature in the range [-infinity,-gap] | |
#Otherwise it get penalized | |
loss = 1.0 #we initialize loss so that we take at least one iteration | |
#we use a standard gradient descent | |
learning_rate = 1e-2 | |
#we can do better using l-bfgs-b optimizer and handle bounds constraints | |
#we can also add some additional loss to make the result similar to a provided image | |
#or use a gan-loss to make it look "natural" | |
while( loss > gap*gap ): | |
loss = distanceBetweenHashes( m, img, flip, gap ).numpy() | |
print("loss : ") | |
print(loss) | |
grad = gradient( m,img, flip,gap) | |
img -= learning_rate * grad | |
imgstringhash = computeNeuralHash(m,img) | |
print("img : ") | |
print( img ) | |
#This is not zero : We have found a totally different image | |
print("targetimg - img : ") | |
print(targetimg - img) | |
print("targetstringhash : ") | |
print(targetstringhash) | |
print("imgstringhash : ") | |
print( imgstringhash) | |
# We should get True if a collision has been successfully produced | |
print("targetstringhash == imgstringhash : ") | |
print(targetstringhash == imgstringhash ) | |
def distanceBetweenHashes( model, input, flip , gap ): | |
loss = tf.nn.l2_loss(tf.nn.relu(model(input) * flip + gap) ) | |
return loss | |
def gradient(model, x, flip,gap): | |
input = tf.convert_to_tensor(x, dtype=tf.float32) | |
with tf.GradientTape() as t: | |
t.watch(input) | |
loss = distanceBetweenHashes( model, input,flip,gap) | |
return t.gradient(loss, input).numpy() | |
if __name__ == "__main__": | |
demo() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
The technique outlined is fairly generic and well known. See this link, which is pracitcally identical to this.
I'm a PhD student doing research on subverting Computer Vision models and would happily give a talk on this. I already have slides prepared for 20-60 minute lectures. Send me an email at [email protected] if you want to discuss more.