Last active
July 12, 2023 05:06
-
-
Save Genzer/99ca5f92dd034aafbcbc878d7584e212 to your computer and use it in GitHub Desktop.
Testing generating MD5/SHA256 checksum with arbitrary size InputStream
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//usr/bin/env jshell | |
import java.io.InputStream; | |
import java.io.OutputStream; | |
import java.security.MessageDigest; | |
import java.security.DigestInputStream; | |
/** | |
* A super elegant way to produce a `InputStream` at any size using an iterating | |
* approach (Stream-like). | |
* This was copied from https://www.nurkiewicz.com/2014/07/building-extremely-large-in-memory.html | |
*/ | |
public /* static */ InputStream repeat(byte[] sample, int times) { | |
return new InputStream() { | |
private long pos = 0; | |
private final long total = (long)sample.length * times; | |
public int read() throws IOException { | |
return pos < total ? | |
sample[(int)(pos++ % sample.length)] : | |
-1; | |
} | |
}; | |
} | |
/** | |
* <p> | |
* Inspired by {@code repeat(byte[], int)}, this method creates an {@code InputStream} | |
* of given {@code size} (in bytes). The content of the {@code InputStream} is randomized | |
* by using a {@code ThreadLocalRandom}. | |
* </p> | |
* | |
* <p> | |
* This method is not thread-safe. The returned {@code InputStream} <strong>SHOULD NOT</strong> | |
* be read by multiple {@code Thread}s as it uses {@code ThreadLocalRandom} to randomize the content | |
* of the sample {@literal byte[1024]}. This operation is not atomic. | |
* </p> | |
*/ | |
public /* static */ InputStream randomInputStream(long size) { | |
return new InputStream() { | |
private final long total = size; | |
private final byte[] sample = new byte[1024]; | |
private long pos = 0; | |
public int read() throws IOException { | |
if (pos >= total) return -1; | |
if (pos % sample.length == 0) ThreadLocalRandom.current().nextBytes(sample); | |
// Math.abs MUST be used here because the contract of `read()` states that | |
// The value byte is returned as an int in the range 0 to 255. | |
return Math.abs(sample[(int)(pos++ % sample.length)]); | |
} | |
}; | |
} | |
var oneKB = new byte[1024]; | |
java.util.concurrent.ThreadLocalRandom.current().nextBytes(oneKB); | |
var oneMegabyteInputStream = repeat(oneKB, 1024); | |
var md5Digest = MessageDigest.getInstance("MD5"); | |
var md5InputStream = new DigestInputStream(oneMegabyteInputStream, md5Digest); | |
// We need to exhaust/consumes the DigestInputStream. The easiest way is to copy it into | |
// a `null` OutputStream to discard it. This is similar to `cat xxx >/dev/null`. | |
md5InputStream.transferTo(OutputStream.nullOutputStream()); | |
var md5Base64 = java.util.Base64.getEncoder().encodeToString(md5Digest.digest()); | |
System.out.println(String.format("MD5 digest of 1MB: %s", md5Base64)); | |
var twentyMegabyteInputStream = randomInputStream(20 * 1024 * 1024); | |
var sha256Digest = MessageDigest.getInstance("SHA-256"); | |
var sha256InputStream = new DigestInputStream(twentyMegabyteInputStream, sha256Digest); | |
sha256InputStream.transferTo(OutputStream.nullOutputStream()); | |
var sha256Base64 = java.util.Base64.getEncoder().encodeToString(sha256Digest.digest()); | |
System.out.println(String.format("SHA256 digest of 20MB: %s", sha256Base64)); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment