Skip to content

Instantly share code, notes, and snippets.

@timrobertson100
Created September 18, 2014 14:02
Show Gist options
  • Save timrobertson100/2d6239b5f846ff8f261f to your computer and use it in GitHub Desktop.
Save timrobertson100/2d6239b5f846ff8f261f to your computer and use it in GitHub Desktop.
package org.gbif.hadoop.compress;
import java.io.IOException;
import java.io.OutputStream;
import java.util.zip.CRC32;
import java.util.zip.Checksum;
import java.util.zip.Deflater;
import java.util.zip.DeflaterOutputStream;
/**
* A {@link java.util.zip.DeflaterOutputStream} which will run a {@Deflater} in <i>no wrap</i> mode so no header or
* footer are written to the stream, provides the length of the compressed data and CRC32 and length of the uncompressed
* data.
*/
public class SplittableDeflaterOutputStream extends DeflaterOutputStream {
private final Checksum checksum = new CRC32(); // trace uncompressed data
public SplittableDeflaterOutputStream(OutputStream out) {
this(out, 1024);
}
public SplittableDeflaterOutputStream(OutputStream out, int bufferSize) {
// deflater set to nowrap, and using SYNC_MODE
super(out, new Deflater(Deflater.BEST_COMPRESSION, true), bufferSize, true);
}
@Override
public void write(int b) throws IOException {
super.write(b);
checksum.update(b);
}
@Override
public void write(byte[] b, int off, int len) throws IOException {
super.write(b, off, len);
checksum.update(b, off, len);
}
/**
* Does nothing, thus no footers are written to the stream unlike the parent.
*/
@Override
public void finish() throws IOException {
}
/**
* Ends the delater without calling finish() so no footers are written, and then flushes and closes the underlying
* stream.
*/
@Override
public void close() throws IOException {
def.end(); // super will not do this
flush();
super.close(); // super will call finish, but we overload that to do nothing
}
/**
* @return The CRC of the uncompressed data that was written
*/
public long getCRC32() {
return checksum.getValue();
}
/**
* @return The length of the uncompressed data
*/
public long getUncompressedLength() {
return def.getBytesRead();
}
/**
* @return The length of the compressed data
*/
public long getCompressedLength() {
return def.getBytesWritten();
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment