Skip to content

Instantly share code, notes, and snippets.

@Genzer
Last active July 12, 2023 05:06
Show Gist options
  • Save Genzer/99ca5f92dd034aafbcbc878d7584e212 to your computer and use it in GitHub Desktop.
Save Genzer/99ca5f92dd034aafbcbc878d7584e212 to your computer and use it in GitHub Desktop.
Testing generating MD5/SHA256 checksum with arbitrary size InputStream
//usr/bin/env jshell
import java.io.InputStream;
import java.io.OutputStream;
import java.security.MessageDigest;
import java.security.DigestInputStream;
/**
* A super elegant way to produce a `InputStream` at any size using an iterating
* approach (Stream-like).
* This was copied from https://www.nurkiewicz.com/2014/07/building-extremely-large-in-memory.html
*/
public /* static */ InputStream repeat(byte[] sample, int times) {
return new InputStream() {
private long pos = 0;
private final long total = (long)sample.length * times;
public int read() throws IOException {
return pos < total ?
sample[(int)(pos++ % sample.length)] :
-1;
}
};
}
/**
* <p>
* Inspired by {@code repeat(byte[], int)}, this method creates an {@code InputStream}
* of given {@code size} (in bytes). The content of the {@code InputStream} is randomized
* by using a {@code ThreadLocalRandom}.
* </p>
*
* <p>
* This method is not thread-safe. The returned {@code InputStream} <strong>SHOULD NOT</strong>
* be read by multiple {@code Thread}s as it uses {@code ThreadLocalRandom} to randomize the content
* of the sample {@literal byte[1024]}. This operation is not atomic.
* </p>
*/
public /* static */ InputStream randomInputStream(long size) {
return new InputStream() {
private final long total = size;
private final byte[] sample = new byte[1024];
private long pos = 0;
public int read() throws IOException {
if (pos >= total) return -1;
if (pos % sample.length == 0) ThreadLocalRandom.current().nextBytes(sample);
// Math.abs MUST be used here because the contract of `read()` states that
// The value byte is returned as an int in the range 0 to 255.
return Math.abs(sample[(int)(pos++ % sample.length)]);
}
};
}
var oneKB = new byte[1024];
java.util.concurrent.ThreadLocalRandom.current().nextBytes(oneKB);
var oneMegabyteInputStream = repeat(oneKB, 1024);
var md5Digest = MessageDigest.getInstance("MD5");
var md5InputStream = new DigestInputStream(oneMegabyteInputStream, md5Digest);
// We need to exhaust/consumes the DigestInputStream. The easiest way is to copy it into
// a `null` OutputStream to discard it. This is similar to `cat xxx >/dev/null`.
md5InputStream.transferTo(OutputStream.nullOutputStream());
var md5Base64 = java.util.Base64.getEncoder().encodeToString(md5Digest.digest());
System.out.println(String.format("MD5 digest of 1MB: %s", md5Base64));
var twentyMegabyteInputStream = randomInputStream(20 * 1024 * 1024);
var sha256Digest = MessageDigest.getInstance("SHA-256");
var sha256InputStream = new DigestInputStream(twentyMegabyteInputStream, sha256Digest);
sha256InputStream.transferTo(OutputStream.nullOutputStream());
var sha256Base64 = java.util.Base64.getEncoder().encodeToString(sha256Digest.digest());
System.out.println(String.format("SHA256 digest of 20MB: %s", sha256Base64));
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment