Skip to content

Instantly share code, notes, and snippets.

@matthewshannon
Created October 14, 2019 01:14
Show Gist options
  • Save matthewshannon/891ac1359a540fd8322e6049f153c1b7 to your computer and use it in GitHub Desktop.
Save matthewshannon/891ac1359a540fd8322e6049f153c1b7 to your computer and use it in GitHub Desktop.
opc-multipart-md5 calculation / validation code in Java for validation of checksum returned from multipart object storage uploads
/**
* opc-multipart-md5 calculation code
* Matt Shannon / Oracle Corp.
* See https://gist.github.com/itemir/f5bc9fded6483cd79c89ebf4ca1cfd30 for opc-multipart-md5 calculation algorithm in python
*/
import java.io.BufferedInputStream;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
import java.util.Arrays;
public class OpcMultipartMd5Test
{
public static void main(String[] args) throws Exception
{
String sourceFile = "/Users/mshannon/Downloads/ojdbc8-javadoc.tar.gz";
File f = new File(sourceFile);
long chunksize = 1048576;
ArrayList<Pair<String, Long>> checksums = getMD5Checksums(f, chunksize);
byte[] opcMultipartSourceBytes = new byte[16 * (checksums.size() - 1)];
// md5 checksum is 16 bytes
for (int i = 0; i < (checksums.size() - 1); i++)
{
Pair<String, Long> pair = checksums.get(i);
System.out.println(pair.getValue0() + " : " + pair.getValue1());
byte[] bytes = fromHex(pair.getValue0());
System.arraycopy(bytes, 0, opcMultipartSourceBytes, (i * 16), bytes.length);
}
String opcMultipartMd5 = getMD5ChecksumBase64Encoded(opcMultipartSourceBytes);
System.out.println("opc-multipart-md5=" + opcMultipartMd5);
}
/**
* Returns an ordered list of MD5 checksums / size pairs for each chunk/segment in the file along with a full-file
* checksum in last index position
* @param f the file to process
* @param chunkSize the length of each chunk/segment to leverage
* @return a list of MD5 checksums
* @throws IOException
* @throws NoSuchAlgorithmException
*/
public static ArrayList<Pair<String, Long>> getMD5Checksums(
File f,
long chunkSize
) throws IOException, NoSuchAlgorithmException
{
return getChecksums(f, chunkSize, "MD5");
}
/**
* Returns an ordered list of MD5 checksums / size pairs for each chunk/segment in the file along with a full-file
* checksum in last index position
* @param is the stream to process
* @param chunkSize the length of each chunk/segment to leverage
* @return a list of MD5 checksums
* @throws IOException
* @throws NoSuchAlgorithmException
*/
public static ArrayList<Pair<String, Long>> getMD5Checksums(
InputStream is,
long chunkSize
) throws IOException, NoSuchAlgorithmException
{
return getChecksums(is, chunkSize, "MD5");
}
/**
* Returns an ordered list of checksums / size pairs for each chunk/segment in the file along with a full-file
* checksum in last index position. The checksum is calculated using the passed digest algorithm (MD5, SHA-256 etc)
* @param f the file to process
* @param chunkSize the length of each chunk/segment to leverage
* @param digestAlg the digest algorith to leverage for the checksum calculation - e.g. SHA-256 , or MD5
* @return a list of checksums
* @throws IOException
* @throws NoSuchAlgorithmException
*/
public static ArrayList<Pair<String, Long>> getChecksums(
File f,
long chunkSize,
String digestAlg
) throws IOException, NoSuchAlgorithmException
{
ArrayList<Pair<String, Long>> result = null;
InputStream is = null;
try
{
is = new BufferedInputStream(new FileInputStream(f));
result = getChecksums(is, chunkSize, digestAlg);
}
finally
{
streamClose(is);
}
return result;
}
/**
* Returns an ordered list of SHA-256 checksums / size pairs for each chunk/segment in the file along with a full-file
* checksum in last index position
* @param is the stream to process
* @param chunkSize the length of each chunk/segment to leverage
* @param digestAlg the digest algorith to leverage for the checksum calculation - e.g. SHA-256 , or MD5
* @return a list of checksums
* @throws IOException
* @throws NoSuchAlgorithmException
*/
public static ArrayList<Pair<String, Long>> getChecksums(
InputStream is,
long chunkSize,
String digestAlg
) throws IOException, NoSuchAlgorithmException
{
ArrayList<Pair<String, Long>> result = new ArrayList<Pair<String, Long>>();
long totalBytesRead = 0;
try
{
MessageDigest digestFullFile = getOrCreateDigestor(null, digestAlg);
MessageDigest digestChunk = null;
int chunkBytesProcessed = 0;
byte[] buffer = new byte[16384];
int numRead = 0;
while ((numRead = is.read(buffer)) != -1)
{
totalBytesRead += numRead;
if (numRead > 0)
{
digestFullFile.update(buffer, 0, numRead);
int startIndex = 0;
while (startIndex < numRead)
{
digestChunk = getOrCreateDigestor(digestChunk, digestAlg);
long chunkFree = chunkSize - chunkBytesProcessed; // bytes remaining in chunk to fill it to capacity
if ((numRead - startIndex) >= chunkFree) // we can fill this chunk to capacity
{
digestChunk.update(buffer, startIndex, (int) chunkFree);
result.add(new Pair<String, Long>(toHex(digestChunk.digest()), chunkSize)); // store chunk hash result
startIndex += chunkFree;
chunkBytesProcessed = 0; // reset back
digestChunk = null; // reset back
}
else
{
// we cannot fill current chunk in full
int sizeOfDataAvailable = numRead - startIndex;
digestChunk.update(buffer, startIndex, sizeOfDataAvailable);
startIndex += sizeOfDataAvailable;
chunkBytesProcessed += sizeOfDataAvailable;
}
}
}
}
if (digestChunk != null)
{
// if we have an open digest, store last chunk hash result
result.add(new Pair<String, Long>(toHex(digestChunk.digest()), Long.valueOf(chunkBytesProcessed)));
}
result.add(new Pair<String, Long>(toHex(digestFullFile.digest()), totalBytesRead)); // store full file hash result
}
finally
{
streamClose(is);
}
return result;
}
private static MessageDigest getOrCreateDigestor(
MessageDigest digestor,
String digestAlg
) throws NoSuchAlgorithmException
{
if (digestor == null)
{
digestor = MessageDigest.getInstance(digestAlg); // e.g. "SHA-256"
}
return digestor;
}
public static String getMD5ChecksumBase64Encoded(byte[] inputBytes)
{
return getMD5ChecksumBase64Encoded(inputBytes, false);
}
public static String getMD5ChecksumBase64Encoded(byte[] inputBytes, boolean urlSafe)
{
byte[] bytes = getDigest(inputBytes, "MD5");
return urlSafe
? toBase64URLSafeString(bytes)
: toBase64String(bytes);
}
public static byte[] getDigest(InputStream is, String algorithm) throws IOException
{
byte[] bytes = new byte[262144];
MessageDigest md = null;
try
{
md = MessageDigest.getInstance(algorithm);
int bytesRead = 0;
do
{
bytesRead = is.read(bytes);
if (bytesRead > 0)
{
md.update(bytes, 0, bytesRead);
}
}
while (bytesRead != -1);
return md.digest();
}
catch (NoSuchAlgorithmException e)
{
String msg = String.format("Failed to compute the checksum. No such algorithm %s. Error: %s", algorithm,
e.getMessage());
throw new Error(msg, e);
}
}
public static byte[] getDigest(byte[] bytes, String algorithm)
{
try
{
return getDigest(new ByteArrayInputStream(bytes), algorithm);
}
catch (IOException e)
{
String msg = String.format("Unexpected IOException. Error: %s", e.getMessage());
throw new Error(msg, e);
}
}
public static String toHex(byte[] bytes)
{
if (bytes == null)
{
return null;
}
StringBuilder sb = new StringBuilder(bytes.length * 2);
for (int i = 0; i < bytes.length; i++)
{
sb.append(Character.forDigit((bytes[i] & 0xf0) >> 4, 16));
sb.append(Character.forDigit(bytes[i] & 0x0f, 16));
}
return sb.toString();
}
public static byte[] fromHex(String hexString)
{
byte[] bytes = new byte[hexString.length() / 2];
for (int i = 0; i < hexString.length(); i += 2)
{
bytes[i / 2] = (byte) ((Character.digit(hexString.charAt(i), 16) << 4)
+ Character.digit(hexString.charAt(i + 1), 16));
}
return bytes;
}
public static String toBase64String(byte[] src)
{
return new String(toBase64(src), StandardCharsets.ISO_8859_1);
}
public static String toBase64URLSafeString(byte[] src)
{
return new String(toBase64URLSafe(src), StandardCharsets.ISO_8859_1);
}
public static String toBase64URLSafeString(byte[] src, boolean removePadding)
{
String base64URLSafeString = toBase64URLSafeString(src);
return removePadding
? trimTrailingBase64Padding(base64URLSafeString)
: base64URLSafeString;
}
public static byte[] toBase64(byte[] src)
{
int len = 4 * ((src.length + 2) / 3); // dst array size
byte[] dst = new byte[len];
int ret = base64Encode(toBase64, src, 0, src.length, dst);
if (ret != dst.length)
{
return Arrays.copyOf(dst, ret);
}
return dst;
}
public static byte[] toBase64URLSafe(byte[] src)
{
int len = 4 * ((src.length + 2) / 3); // dst array size
byte[] dst = new byte[len];
int ret = base64Encode(toBase64URLSafe, src, 0, src.length, dst);
if (ret != dst.length)
{
return Arrays.copyOf(dst, ret);
}
return dst;
}
private static int base64Encode(char[] base64Charset, byte[] src, int off, int end, byte[] dst)
{
int sp = off;
int slen = (end - off) / 3 * 3;
int sl = off + slen;
int dp = 0;
while (sp < sl)
{
int sl0 = Math.min(sp + slen, sl);
for (int sp0 = sp, dp0 = dp; sp0 < sl0; )
{
int bits = (src[sp0++] & 0xff) << 16 | (src[sp0++] & 0xff) << 8 | (src[sp0++] & 0xff);
dst[dp0++] = (byte) base64Charset[(bits >>> 18) & 0x3f];
dst[dp0++] = (byte) base64Charset[(bits >>> 12) & 0x3f];
dst[dp0++] = (byte) base64Charset[(bits >>> 6) & 0x3f];
dst[dp0++] = (byte) base64Charset[bits & 0x3f];
}
int dlen = (sl0 - sp) / 3 * 4;
dp += dlen;
sp = sl0;
}
if (sp < end)
{ // 1 or 2 leftover bytes
int b0 = src[sp++] & 0xff;
dst[dp++] = (byte) base64Charset[b0 >> 2];
if (sp == end)
{
dst[dp++] = (byte) base64Charset[(b0 << 4) & 0x3f];
dst[dp++] = '=';
dst[dp++] = '=';
}
else
{
int b1 = src[sp++] & 0xff;
dst[dp++] = (byte) base64Charset[(b0 << 4) & 0x3f | (b1 >> 4)];
dst[dp++] = (byte) base64Charset[(b1 << 2) & 0x3f];
dst[dp++] = '=';
}
}
return dp;
}
public static String trimTrailingBase64Padding(String b64EncodedString)
{
if (b64EncodedString != null)
{
if (b64EncodedString.endsWith("=="))
{
return b64EncodedString.substring(0, b64EncodedString.length() - 2);
}
else if (b64EncodedString.endsWith("="))
{
return b64EncodedString.substring(0, b64EncodedString.length() - 1);
}
}
return b64EncodedString;
}
/**
* Attempt to close the input stream - internally consume any
* resulting IOException
*
* @param in the input stream
*/
public static void streamClose(InputStream in)
{
if (in != null)
{
try
{
in.close();
}
catch (IOException ignore)
{
}
}
}
public static final class Pair<A, B>
{
private final A val0;
private final B val1;
public Pair(final A value0, final B value1)
{
this.val0 = value0;
this.val1 = value1;
}
public A getValue0()
{
return this.val0;
}
public B getValue1()
{
return this.val1;
}
}
// Base64 code taken from JDK 8 - java.util.Base64 / Author Xueming Shen
/**
* This array is a lookup table that translates 6-bit positive integer
* index values into their "Base64 Alphabet" equivalents as specified
* in "Table 1: The Base64 Alphabet" of RFC 2045 (and RFC 4648).
*/
private static final char[] toBase64 =
{
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't',
'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/'
};
/**
* This array is a lookup table that translates 6-bit positive integer
* index values into their "URL and Filename safe Base64" equivalents as
* specified in Table 2 of the RFC 4648, with the '+' and '/' changed to '-' and '_'
*/
private static final char[] toBase64URLSafe =
{
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't',
'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', '_'
};
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment