Created
September 18, 2014 14:02
-
-
Save timrobertson100/2d6239b5f846ff8f261f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package org.gbif.hadoop.compress; | |
import java.io.IOException; | |
import java.io.OutputStream; | |
import java.util.zip.CRC32; | |
import java.util.zip.Checksum; | |
import java.util.zip.Deflater; | |
import java.util.zip.DeflaterOutputStream; | |
/** | |
* A {@link java.util.zip.DeflaterOutputStream} which will run a {@Deflater} in <i>no wrap</i> mode so no header or | |
* footer are written to the stream, provides the length of the compressed data and CRC32 and length of the uncompressed | |
* data. | |
*/ | |
public class SplittableDeflaterOutputStream extends DeflaterOutputStream { | |
private final Checksum checksum = new CRC32(); // trace uncompressed data | |
public SplittableDeflaterOutputStream(OutputStream out) { | |
this(out, 1024); | |
} | |
public SplittableDeflaterOutputStream(OutputStream out, int bufferSize) { | |
// deflater set to nowrap, and using SYNC_MODE | |
super(out, new Deflater(Deflater.BEST_COMPRESSION, true), bufferSize, true); | |
} | |
@Override | |
public void write(int b) throws IOException { | |
super.write(b); | |
checksum.update(b); | |
} | |
@Override | |
public void write(byte[] b, int off, int len) throws IOException { | |
super.write(b, off, len); | |
checksum.update(b, off, len); | |
} | |
/** | |
* Does nothing, thus no footers are written to the stream unlike the parent. | |
*/ | |
@Override | |
public void finish() throws IOException { | |
} | |
/** | |
* Ends the delater without calling finish() so no footers are written, and then flushes and closes the underlying | |
* stream. | |
*/ | |
@Override | |
public void close() throws IOException { | |
def.end(); // super will not do this | |
flush(); | |
super.close(); // super will call finish, but we overload that to do nothing | |
} | |
/** | |
* @return The CRC of the uncompressed data that was written | |
*/ | |
public long getCRC32() { | |
return checksum.getValue(); | |
} | |
/** | |
* @return The length of the uncompressed data | |
*/ | |
public long getUncompressedLength() { | |
return def.getBytesRead(); | |
} | |
/** | |
* @return The length of the compressed data | |
*/ | |
public long getCompressedLength() { | |
return def.getBytesWritten(); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment