Skip to content

Instantly share code, notes, and snippets.

@joekir
Last active April 7, 2018 23:01
Show Gist options
  • Save joekir/17e861daf29d2eec4cbbc69f0ba673e1 to your computer and use it in GitHub Desktop.
Save joekir/17e861daf29d2eec4cbbc69f0ba673e1 to your computer and use it in GitHub Desktop.

Running the example

Modify the Java code in main in MurmurHash3Test.java to whatever you want, including the seed const. Seed recovery has already been proven here - https://www.131002.net/siphash/poc.py

  byte[] data = { 90, 1, 2, 44};
  System.out.format("0x%x\n", murmurhash3_x86_32(data, 0, data.length, 0xefef));
$ mkdir -p {src,target}
$ mv MurmurHash3Test.java src/
$ chmod +x buildnrun.sh inverter.py
$ ./buildnrun.sh | ./inverter.py 4
sat

inverting 0x4642c344 returns:
[0, 90]
[2, 2]
[1, 1]
[3, 44]
#!/bin/bash
javac -d target/ src/*
cd target
java MurmurTest
cd ../
#!/usr/bin/python2.7
from z3 import *
import sys
"""
See https://raw.githubusercontent.com/yonik/java_util/master/src/util/hash/MurmurHash3.java
for implementation of the code modelled below
The one downside is that the logic is so complex in some of the switch statements below
that I had to concretize the length of the input data array, this is ok as usually you can
iteratively tune this until you get meaningful plaintext inputs, or it follows a fixed schema, etc.
"""
def dk(i):
return "k"+str(i)
def dh(i):
return "h"+str(i)
def solve():
s = SolverFor("BV")
s.set("local_ctx",True)
# Concrete
roundedEnd = length & 0xfffffffc
c1 = 0xcc9e2d51
c2 = 0x1b873593
seed = 0xefef # this is fixed in the impl I'm looking into
# Based on array theory, maps an Int32 to a Byte
data = Array('data', BitVecSort(32), BitVecSort(8))
kvecs = {}
hvecs = {}
# ha and ka are allocation counters for each map
ka = 0
ha = 0
# init hvecs to seed
hvecs[dh(ha)] = BitVec(dh(ha),32)
s.add(hvecs[dh(ha)] == seed)
ha += 1
i=0
while i < roundedEnd:
kvecs[dk(ka)] = BitVec(dk(ka),32)
s.add(kvecs[dk(ka)] == ((SignExt(24,data[i]) & 0xff) | ((SignExt(24,data[i+1]) & 0xff) << 8) | ((SignExt(24,data[i+2]) & 0xff) << 16) | (SignExt(24, data[i+3]) << 24)))
ka += 1
kvecs[dk(ka)] = BitVec(dk(ka),32)
s.add(kvecs[dk(ka)] == kvecs[dk(ka-1)] * c1)
ka +=1
kvecs[dk(ka)] = BitVec(dk(ka),32)
s.add(kvecs[dk(ka)] == ((kvecs[dk(ka-1)] << 15) | LShR(kvecs[dk(ka-1)],17)))
ka += 1
kvecs[dk(ka)] = BitVec(dk(ka),32)
s.add(kvecs[dk(ka)] == kvecs[dk(ka-1)] * c2)
ka +=1
hvecs[dh(ha)] = BitVec(dh(ha),32)
s.add(hvecs[dh(ha)] == (hvecs[dh(ha-1)] ^ kvecs[dk(ka-1)]))
ha += 1
hvecs[dh(ha)] = BitVec(dh(ha),32)
s.add(hvecs[dh(ha)] == ((hvecs[dh(ha-1)] << 13) | LShR(hvecs[dh(ha-1)],19)))
ha += 1
hvecs[dh(ha)] = BitVec(dh(ha),32)
s.add(hvecs[dh(ha)] == ((hvecs[dh(ha-1)] * 5) + 0xe6546b64))
ha += 1
i += 4
# had to concretize this as the logic (ROT32) in the `switch == 1`
# statement below cannot be inlined to the z3.If statement
# SSA means lots of duplication here, sorry..
switch = length & 3
if switch == 3:
kvecs[dk(ka)] = BitVec(dk(ka),32)
s.add(kvecs[dk(ka)] == ((SignExt(24,data[roundedEnd + 2]) & 0xff) << 16))
ka += 1
# fallthrough 1
kvecs[dk(ka)] = BitVec(dk(ka),32)
s.add(kvecs[dk(ka)] == (kvecs[dk(ka-1)] | ((SignExt(24,data[roundedEnd + 1]) & 0xff) << 8)))
ka +=1
# fallthrough 2
kvecs[dk(ka)] = BitVec(dk(ka),32)
s.add(kvecs[dk(ka)] == (kvecs[dk(ka-1)] | (SignExt(24,data[roundedEnd]) & 0xff)))
ka +=1
kvecs[dk(ka)] = BitVec(dk(ka),32)
s.add(kvecs[dk(ka)] == (kvecs[dk(ka-1)] * c1))
ka +=1
kvecs[dk(ka)] = BitVec(dk(ka),32)
s.add(kvecs[dk(ka)] == ((kvecs[dk(ka-1)] << 15) | LShR(kvecs[dk(ka-1)],17)))
ka +=1
kvecs[dk(ka)] = BitVec(dk(ka),32)
s.add(kvecs[dk(ka)] == (kvecs[dk(ka-1)] * c2))
ka +=1
hvecs[dh(ha)] = BitVec(dh(ha),32)
s.add(hvecs[dh(ha)] == (hvecs[dh(ha-1)] ^ kvecs[dk(ka-1)]))
ha += 1
if switch == 2:
kvecs[dk(ka)] = BitVec(dk(ka),32)
s.add(kvecs[dk(ka)] == ((SignExt(24,data[roundedEnd + 1]) & 0xff) << 8))
ka +=1
# fallthrough 2
kvecs[dk(ka)] = BitVec(dk(ka),32)
s.add(kvecs[dk(ka)] == (kvecs[dk(ka-1)] | (SignExt(24,data[roundedEnd]) & 0xff)))
ka +=1
kvecs[dk(ka)] = BitVec(dk(ka),32)
s.add(kvecs[dk(ka)] == (kvecs[dk(ka-1)] * c1))
ka +=1
kvecs[dk(ka)] = BitVec(dk(ka),32)
s.add(kvecs[dk(ka)] == ((kvecs[dk(ka-1)] << 15) | LShR(kvecs[dk(ka-1)],17)))
ka +=1
kvecs[dk(ka)] = BitVec(dk(ka),32)
s.add(kvecs[dk(ka)] == (kvecs[dk(ka-1)] * c2))
ka +=1
hvecs[dh(ha)] = BitVec(dh(ha),32)
s.add(hvecs[dh(ha)] == (hvecs[dh(ha-1)] ^ kvecs[dk(ka-1)]))
ha += 1
if switch == 1:
kvecs[dk(ka)] = BitVec(dk(ka),32)
s.add(kvecs[dk(ka)] == (SignExt(24,data[roundedEnd]) & 0xff))
ka +=1
kvecs[dk(ka)] = BitVec(dk(ka),32)
s.add(kvecs[dk(ka)] == (kvecs[dk(ka-1)] * c1))
ka +=1
kvecs[dk(ka)] = BitVec(dk(ka),32)
s.add(kvecs[dk(ka)] == ((kvecs[dk(ka-1)] << 15) | LShR(kvecs[dk(ka-1)],17)))
ka +=1
kvecs[dk(ka)] = BitVec(dk(ka),32)
s.add(kvecs[dk(ka)] == (kvecs[dk(ka-1)] * c2))
ka +=1
hvecs[dh(ha)] = BitVec(dh(ha),32)
s.add(hvecs[dh(ha)] == (hvecs[dh(ha-1)] ^ kvecs[dk(ka-1)]))
ha += 1
hvecs[dh(ha)] = BitVec(dh(ha),32)
s.add(hvecs[dh(ha)] == (hvecs[dh(ha-1)] ^ length))
ha += 1
hvecs[dh(ha)] = BitVec(dh(ha),32)
s.add(hvecs[dh(ha)] == LShR(hvecs[dh(ha-1)],16))
ha += 1
hvecs[dh(ha)] = BitVec(dh(ha),32)
s.add(hvecs[dh(ha)] == hvecs[dh(ha-2)] ^ hvecs[dh(ha-1)])
ha += 1
hvecs[dh(ha)] = BitVec(dh(ha),32)
s.add(hvecs[dh(ha)] == (hvecs[dh(ha-1)] * 0x85ebca6b))
ha += 1
hvecs[dh(ha)] = BitVec(dh(ha),32)
s.add(hvecs[dh(ha)] == LShR(hvecs[dh(ha-1)],13))
ha += 1
hvecs[dh(ha)] = BitVec(dh(ha),32)
s.add(hvecs[dh(ha)] == hvecs[dh(ha-2)] ^ hvecs[dh(ha-1)])
ha += 1
hvecs[dh(ha)] = BitVec(dh(ha),32)
s.add(hvecs[dh(ha)] == (hvecs[dh(ha-1)] * 0xc2b2ae35))
ha += 1
hvecs[dh(ha)] = BitVec(dh(ha),32)
s.add(hvecs[dh(ha)] == LShR(hvecs[dh(ha-1)],16))
ha += 1
s.add(hash_result == hvecs[dh(ha-2)] ^ hvecs[dh(ha-1)])
ch = s.check()
print ch
if ch.r == 1:
model = s.model()
print "\ninverting 0x%x returns:" % hash_result
data = model[data]
for i in range(0 ,length):
print data.entry(i)
else:
print s.unsat_core()
# should really use proper argparse here, but lazy
if not sys.stdin.isatty():
# piped from build output
hash_result = int(sys.stdin.read(),0)
length = int(sys.argv[1])
elif len(sys.argv) != 3:
print "Sorry, calling format has to be like ./inverter.py 0x(<murmurhash>) original_data_length"
sys.exit(1)
else:
# just called directly
hash_result = int(sys.argv[1],0)
length = int(sys.argv[2])
solve()
class MurmurTest {
public static void main(String args[]){
byte[] data = { 90, 1, 2, 44};
System.out.format("0x%x\n", murmurhash3_x86_32(data, 0, data.length, 0xefef));
}
/*
* This is verbatim from:
* https://raw.githubusercontent.com/yonik/java_util/master/src/util/hash/MurmurHash3.java
*/
public static int murmurhash3_x86_32(byte[] data, int offset, int len, int seed) {
int c1 = 0xcc9e2d51;
int c2 = 0x1b873593;
int h1 = seed;
int roundedEnd = offset + (len & 0xfffffffc); // round down to 4 byte block
for (int i=offset; i<roundedEnd; i+=4) {
// little endian load order
int k1 = (data[i] & 0xff) | ((data[i+1] & 0xff) << 8) | ((data[i+2] & 0xff) << 16) | (data[i+3] << 24);
k1 *= c1;
k1 = (k1 << 15) | (k1 >>> 17); // ROTL32(k1,15);
k1 *= c2;
h1 ^= k1;
h1 = (h1 << 13) | (h1 >>> 19); // ROTL32(h1,13);
h1 = h1*5+0xe6546b64;
}
// tail
int k1 = 0;
switch(len & 0x03) {
case 3:
k1 = (data[roundedEnd + 2] & 0xff) << 16;
// fallthrough
case 2:
k1 |= (data[roundedEnd + 1] & 0xff) << 8;
// fallthrough
case 1:
k1 |= (data[roundedEnd] & 0xff);
k1 *= c1;
k1 = (k1 << 15) | (k1 >>> 17); // ROTL32(k1,15);
k1 *= c2;
h1 ^= k1;
}
// finalization
h1 ^= len;
// fmix(h1);
h1 ^= h1 >>> 16;
h1 *= 0x85ebca6b;
h1 ^= h1 >>> 13;
h1 *= 0xc2b2ae35;
h1 ^= h1 >>> 16;
return h1;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment