Created
October 14, 2019 01:14
-
-
Save matthewshannon/891ac1359a540fd8322e6049f153c1b7 to your computer and use it in GitHub Desktop.
opc-multipart-md5 calculation / validation code in Java for validation of checksum returned from multipart object storage uploads
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* opc-multipart-md5 calculation code | |
* Matt Shannon / Oracle Corp. | |
* See https://gist.github.com/itemir/f5bc9fded6483cd79c89ebf4ca1cfd30 for opc-multipart-md5 calculation algorithm in python | |
*/ | |
import java.io.BufferedInputStream; | |
import java.io.ByteArrayInputStream; | |
import java.io.File; | |
import java.io.FileInputStream; | |
import java.io.IOException; | |
import java.io.InputStream; | |
import java.nio.charset.StandardCharsets; | |
import java.security.MessageDigest; | |
import java.security.NoSuchAlgorithmException; | |
import java.util.ArrayList; | |
import java.util.Arrays; | |
public class OpcMultipartMd5Test | |
{ | |
public static void main(String[] args) throws Exception | |
{ | |
String sourceFile = "/Users/mshannon/Downloads/ojdbc8-javadoc.tar.gz"; | |
File f = new File(sourceFile); | |
long chunksize = 1048576; | |
ArrayList<Pair<String, Long>> checksums = getMD5Checksums(f, chunksize); | |
byte[] opcMultipartSourceBytes = new byte[16 * (checksums.size() - 1)]; | |
// md5 checksum is 16 bytes | |
for (int i = 0; i < (checksums.size() - 1); i++) | |
{ | |
Pair<String, Long> pair = checksums.get(i); | |
System.out.println(pair.getValue0() + " : " + pair.getValue1()); | |
byte[] bytes = fromHex(pair.getValue0()); | |
System.arraycopy(bytes, 0, opcMultipartSourceBytes, (i * 16), bytes.length); | |
} | |
String opcMultipartMd5 = getMD5ChecksumBase64Encoded(opcMultipartSourceBytes); | |
System.out.println("opc-multipart-md5=" + opcMultipartMd5); | |
} | |
/** | |
* Returns an ordered list of MD5 checksums / size pairs for each chunk/segment in the file along with a full-file | |
* checksum in last index position | |
* @param f the file to process | |
* @param chunkSize the length of each chunk/segment to leverage | |
* @return a list of MD5 checksums | |
* @throws IOException | |
* @throws NoSuchAlgorithmException | |
*/ | |
public static ArrayList<Pair<String, Long>> getMD5Checksums( | |
File f, | |
long chunkSize | |
) throws IOException, NoSuchAlgorithmException | |
{ | |
return getChecksums(f, chunkSize, "MD5"); | |
} | |
/** | |
* Returns an ordered list of MD5 checksums / size pairs for each chunk/segment in the file along with a full-file | |
* checksum in last index position | |
* @param is the stream to process | |
* @param chunkSize the length of each chunk/segment to leverage | |
* @return a list of MD5 checksums | |
* @throws IOException | |
* @throws NoSuchAlgorithmException | |
*/ | |
public static ArrayList<Pair<String, Long>> getMD5Checksums( | |
InputStream is, | |
long chunkSize | |
) throws IOException, NoSuchAlgorithmException | |
{ | |
return getChecksums(is, chunkSize, "MD5"); | |
} | |
/** | |
* Returns an ordered list of checksums / size pairs for each chunk/segment in the file along with a full-file | |
* checksum in last index position. The checksum is calculated using the passed digest algorithm (MD5, SHA-256 etc) | |
* @param f the file to process | |
* @param chunkSize the length of each chunk/segment to leverage | |
* @param digestAlg the digest algorith to leverage for the checksum calculation - e.g. SHA-256 , or MD5 | |
* @return a list of checksums | |
* @throws IOException | |
* @throws NoSuchAlgorithmException | |
*/ | |
public static ArrayList<Pair<String, Long>> getChecksums( | |
File f, | |
long chunkSize, | |
String digestAlg | |
) throws IOException, NoSuchAlgorithmException | |
{ | |
ArrayList<Pair<String, Long>> result = null; | |
InputStream is = null; | |
try | |
{ | |
is = new BufferedInputStream(new FileInputStream(f)); | |
result = getChecksums(is, chunkSize, digestAlg); | |
} | |
finally | |
{ | |
streamClose(is); | |
} | |
return result; | |
} | |
/** | |
* Returns an ordered list of SHA-256 checksums / size pairs for each chunk/segment in the file along with a full-file | |
* checksum in last index position | |
* @param is the stream to process | |
* @param chunkSize the length of each chunk/segment to leverage | |
* @param digestAlg the digest algorith to leverage for the checksum calculation - e.g. SHA-256 , or MD5 | |
* @return a list of checksums | |
* @throws IOException | |
* @throws NoSuchAlgorithmException | |
*/ | |
public static ArrayList<Pair<String, Long>> getChecksums( | |
InputStream is, | |
long chunkSize, | |
String digestAlg | |
) throws IOException, NoSuchAlgorithmException | |
{ | |
ArrayList<Pair<String, Long>> result = new ArrayList<Pair<String, Long>>(); | |
long totalBytesRead = 0; | |
try | |
{ | |
MessageDigest digestFullFile = getOrCreateDigestor(null, digestAlg); | |
MessageDigest digestChunk = null; | |
int chunkBytesProcessed = 0; | |
byte[] buffer = new byte[16384]; | |
int numRead = 0; | |
while ((numRead = is.read(buffer)) != -1) | |
{ | |
totalBytesRead += numRead; | |
if (numRead > 0) | |
{ | |
digestFullFile.update(buffer, 0, numRead); | |
int startIndex = 0; | |
while (startIndex < numRead) | |
{ | |
digestChunk = getOrCreateDigestor(digestChunk, digestAlg); | |
long chunkFree = chunkSize - chunkBytesProcessed; // bytes remaining in chunk to fill it to capacity | |
if ((numRead - startIndex) >= chunkFree) // we can fill this chunk to capacity | |
{ | |
digestChunk.update(buffer, startIndex, (int) chunkFree); | |
result.add(new Pair<String, Long>(toHex(digestChunk.digest()), chunkSize)); // store chunk hash result | |
startIndex += chunkFree; | |
chunkBytesProcessed = 0; // reset back | |
digestChunk = null; // reset back | |
} | |
else | |
{ | |
// we cannot fill current chunk in full | |
int sizeOfDataAvailable = numRead - startIndex; | |
digestChunk.update(buffer, startIndex, sizeOfDataAvailable); | |
startIndex += sizeOfDataAvailable; | |
chunkBytesProcessed += sizeOfDataAvailable; | |
} | |
} | |
} | |
} | |
if (digestChunk != null) | |
{ | |
// if we have an open digest, store last chunk hash result | |
result.add(new Pair<String, Long>(toHex(digestChunk.digest()), Long.valueOf(chunkBytesProcessed))); | |
} | |
result.add(new Pair<String, Long>(toHex(digestFullFile.digest()), totalBytesRead)); // store full file hash result | |
} | |
finally | |
{ | |
streamClose(is); | |
} | |
return result; | |
} | |
private static MessageDigest getOrCreateDigestor( | |
MessageDigest digestor, | |
String digestAlg | |
) throws NoSuchAlgorithmException | |
{ | |
if (digestor == null) | |
{ | |
digestor = MessageDigest.getInstance(digestAlg); // e.g. "SHA-256" | |
} | |
return digestor; | |
} | |
public static String getMD5ChecksumBase64Encoded(byte[] inputBytes) | |
{ | |
return getMD5ChecksumBase64Encoded(inputBytes, false); | |
} | |
public static String getMD5ChecksumBase64Encoded(byte[] inputBytes, boolean urlSafe) | |
{ | |
byte[] bytes = getDigest(inputBytes, "MD5"); | |
return urlSafe | |
? toBase64URLSafeString(bytes) | |
: toBase64String(bytes); | |
} | |
public static byte[] getDigest(InputStream is, String algorithm) throws IOException | |
{ | |
byte[] bytes = new byte[262144]; | |
MessageDigest md = null; | |
try | |
{ | |
md = MessageDigest.getInstance(algorithm); | |
int bytesRead = 0; | |
do | |
{ | |
bytesRead = is.read(bytes); | |
if (bytesRead > 0) | |
{ | |
md.update(bytes, 0, bytesRead); | |
} | |
} | |
while (bytesRead != -1); | |
return md.digest(); | |
} | |
catch (NoSuchAlgorithmException e) | |
{ | |
String msg = String.format("Failed to compute the checksum. No such algorithm %s. Error: %s", algorithm, | |
e.getMessage()); | |
throw new Error(msg, e); | |
} | |
} | |
public static byte[] getDigest(byte[] bytes, String algorithm) | |
{ | |
try | |
{ | |
return getDigest(new ByteArrayInputStream(bytes), algorithm); | |
} | |
catch (IOException e) | |
{ | |
String msg = String.format("Unexpected IOException. Error: %s", e.getMessage()); | |
throw new Error(msg, e); | |
} | |
} | |
public static String toHex(byte[] bytes) | |
{ | |
if (bytes == null) | |
{ | |
return null; | |
} | |
StringBuilder sb = new StringBuilder(bytes.length * 2); | |
for (int i = 0; i < bytes.length; i++) | |
{ | |
sb.append(Character.forDigit((bytes[i] & 0xf0) >> 4, 16)); | |
sb.append(Character.forDigit(bytes[i] & 0x0f, 16)); | |
} | |
return sb.toString(); | |
} | |
public static byte[] fromHex(String hexString) | |
{ | |
byte[] bytes = new byte[hexString.length() / 2]; | |
for (int i = 0; i < hexString.length(); i += 2) | |
{ | |
bytes[i / 2] = (byte) ((Character.digit(hexString.charAt(i), 16) << 4) | |
+ Character.digit(hexString.charAt(i + 1), 16)); | |
} | |
return bytes; | |
} | |
public static String toBase64String(byte[] src) | |
{ | |
return new String(toBase64(src), StandardCharsets.ISO_8859_1); | |
} | |
public static String toBase64URLSafeString(byte[] src) | |
{ | |
return new String(toBase64URLSafe(src), StandardCharsets.ISO_8859_1); | |
} | |
public static String toBase64URLSafeString(byte[] src, boolean removePadding) | |
{ | |
String base64URLSafeString = toBase64URLSafeString(src); | |
return removePadding | |
? trimTrailingBase64Padding(base64URLSafeString) | |
: base64URLSafeString; | |
} | |
public static byte[] toBase64(byte[] src) | |
{ | |
int len = 4 * ((src.length + 2) / 3); // dst array size | |
byte[] dst = new byte[len]; | |
int ret = base64Encode(toBase64, src, 0, src.length, dst); | |
if (ret != dst.length) | |
{ | |
return Arrays.copyOf(dst, ret); | |
} | |
return dst; | |
} | |
public static byte[] toBase64URLSafe(byte[] src) | |
{ | |
int len = 4 * ((src.length + 2) / 3); // dst array size | |
byte[] dst = new byte[len]; | |
int ret = base64Encode(toBase64URLSafe, src, 0, src.length, dst); | |
if (ret != dst.length) | |
{ | |
return Arrays.copyOf(dst, ret); | |
} | |
return dst; | |
} | |
private static int base64Encode(char[] base64Charset, byte[] src, int off, int end, byte[] dst) | |
{ | |
int sp = off; | |
int slen = (end - off) / 3 * 3; | |
int sl = off + slen; | |
int dp = 0; | |
while (sp < sl) | |
{ | |
int sl0 = Math.min(sp + slen, sl); | |
for (int sp0 = sp, dp0 = dp; sp0 < sl0; ) | |
{ | |
int bits = (src[sp0++] & 0xff) << 16 | (src[sp0++] & 0xff) << 8 | (src[sp0++] & 0xff); | |
dst[dp0++] = (byte) base64Charset[(bits >>> 18) & 0x3f]; | |
dst[dp0++] = (byte) base64Charset[(bits >>> 12) & 0x3f]; | |
dst[dp0++] = (byte) base64Charset[(bits >>> 6) & 0x3f]; | |
dst[dp0++] = (byte) base64Charset[bits & 0x3f]; | |
} | |
int dlen = (sl0 - sp) / 3 * 4; | |
dp += dlen; | |
sp = sl0; | |
} | |
if (sp < end) | |
{ // 1 or 2 leftover bytes | |
int b0 = src[sp++] & 0xff; | |
dst[dp++] = (byte) base64Charset[b0 >> 2]; | |
if (sp == end) | |
{ | |
dst[dp++] = (byte) base64Charset[(b0 << 4) & 0x3f]; | |
dst[dp++] = '='; | |
dst[dp++] = '='; | |
} | |
else | |
{ | |
int b1 = src[sp++] & 0xff; | |
dst[dp++] = (byte) base64Charset[(b0 << 4) & 0x3f | (b1 >> 4)]; | |
dst[dp++] = (byte) base64Charset[(b1 << 2) & 0x3f]; | |
dst[dp++] = '='; | |
} | |
} | |
return dp; | |
} | |
public static String trimTrailingBase64Padding(String b64EncodedString) | |
{ | |
if (b64EncodedString != null) | |
{ | |
if (b64EncodedString.endsWith("==")) | |
{ | |
return b64EncodedString.substring(0, b64EncodedString.length() - 2); | |
} | |
else if (b64EncodedString.endsWith("=")) | |
{ | |
return b64EncodedString.substring(0, b64EncodedString.length() - 1); | |
} | |
} | |
return b64EncodedString; | |
} | |
/** | |
* Attempt to close the input stream - internally consume any | |
* resulting IOException | |
* | |
* @param in the input stream | |
*/ | |
public static void streamClose(InputStream in) | |
{ | |
if (in != null) | |
{ | |
try | |
{ | |
in.close(); | |
} | |
catch (IOException ignore) | |
{ | |
} | |
} | |
} | |
public static final class Pair<A, B> | |
{ | |
private final A val0; | |
private final B val1; | |
public Pair(final A value0, final B value1) | |
{ | |
this.val0 = value0; | |
this.val1 = value1; | |
} | |
public A getValue0() | |
{ | |
return this.val0; | |
} | |
public B getValue1() | |
{ | |
return this.val1; | |
} | |
} | |
// Base64 code taken from JDK 8 - java.util.Base64 / Author Xueming Shen | |
/** | |
* This array is a lookup table that translates 6-bit positive integer | |
* index values into their "Base64 Alphabet" equivalents as specified | |
* in "Table 1: The Base64 Alphabet" of RFC 2045 (and RFC 4648). | |
*/ | |
private static final char[] toBase64 = | |
{ | |
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', | |
'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', | |
'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/' | |
}; | |
/** | |
* This array is a lookup table that translates 6-bit positive integer | |
* index values into their "URL and Filename safe Base64" equivalents as | |
* specified in Table 2 of the RFC 4648, with the '+' and '/' changed to '-' and '_' | |
*/ | |
private static final char[] toBase64URLSafe = | |
{ | |
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', | |
'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', | |
'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', '_' | |
}; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment