Skip to content

Instantly share code, notes, and snippets.

@myui
Last active April 28, 2016 06:30
Show Gist options
  • Save myui/b7739e23591b87d570ecae80d1699d53 to your computer and use it in GitHub Desktop.
Save myui/b7739e23591b87d570ecae80d1699d53 to your computer and use it in GitHub Desktop.
/*
* Hivemall: Hive scalable Machine Learning Library
*
* Copyright (C) 2015 Makoto YUI
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package hivemall.utils.io;
import hivemall.utils.codec.Base91;
import java.io.FilterInputStream;
import java.io.IOException;
import java.io.InputStream;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
public final class Base91InputStream extends FilterInputStream {
private static final int INPUT_BUFFER_SIZE = 2048;
private final boolean doEncode;
private/* final */byte[] inputBuffer;
private/* final */FastByteArrayOutputStream outputBuffer;
@Nullable
private byte[] output;
private int outputPos;
private int outputLen;
private boolean eof;
public Base91InputStream(@Nonnull InputStream in, boolean doEncode) {
super(in);
this.doEncode = doEncode;
this.inputBuffer = new byte[INPUT_BUFFER_SIZE];
int outputBufferSize = doEncode ? (int) Math.ceil(INPUT_BUFFER_SIZE
* Base91.WORST_ENCODING_RATIO) : Math.round(INPUT_BUFFER_SIZE
/ Base91.BEST_ENCODING_RATIO);
this.outputBuffer = new FastByteArrayOutputStream(outputBufferSize);
this.output = null;
this.outputPos = 0;
this.outputLen = 0;
this.eof = false;
}
@Override
public boolean markSupported() {
return false;
}
@Override
public void mark(int readlimit) {
throw new UnsupportedOperationException();
}
@Override
public void reset() {
throw new UnsupportedOperationException();
}
@Override
public int available() {
return eof ? 0 : 1;
}
@Override
public long skip(long n) throws IOException {
if (outputPos >= outputLen) {
refill();
}
if (outputPos >= outputLen) {
return 0;
}
long bytes = Math.min(n, outputLen - outputPos);
this.outputPos += bytes;
return bytes;
}
@Override
public int read() throws IOException {
if (outputPos >= outputLen) {
refill();
}
if (outputPos >= outputLen) {
return -1;
}
return output[outputPos++] & 0xff;
}
@Override
public int read(@Nonnull final byte[] b, final int off, final int len) throws IOException {
if (b == null) {
throw new NullPointerException();
} else if (off < 0 || len < 0 || off + len > b.length) {
throw new IndexOutOfBoundsException();
} else if (len == 0) {
return 0;
}
if (outputPos >= outputLen) {
refill();
}
if (outputPos >= outputLen) {
return -1;
}
int bytes = Math.min(len, outputLen - outputPos);
System.arraycopy(output, outputPos, b, off, bytes);
this.outputPos += bytes;
return bytes;
}
@Override
public void close() throws IOException {
in.close();
this.inputBuffer = null;
this.outputBuffer = null;
this.output = null;
}
private void refill() throws IOException {
if (eof) {
return;
}
int bytesRead = in.read(inputBuffer);
if (bytesRead == -1) {
this.eof = true;
this.output = null;
return;
}
outputBuffer.reset();
process(doEncode, inputBuffer, bytesRead, outputBuffer);
this.output = outputBuffer.getInternalArray();
this.outputPos = 0;
this.outputLen = outputBuffer.size();
}
private static void process(final boolean doEncode, @Nonnull final byte[] input,
final int bytesRead, @Nonnull final FastByteArrayOutputStream outputBuffer)
throws IOException {
if (doEncode) {
Base91.encode(input, 0, bytesRead, outputBuffer);
} else {
Base91.decode(input, 0, bytesRead, outputBuffer);
}
}
}
/*
* Hivemall: Hive scalable Machine Learning Library
*
* Copyright (C) 2015 Makoto YUI
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package hivemall.utils.io;
import hivemall.utils.codec.Base91;
import java.io.FilterOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import javax.annotation.Nonnull;
public final class Base91OutputStream extends FilterOutputStream {
private static final int INPUT_BUFFER_SIZE = 2048;
private final boolean doEncode;
private/* final */byte[] inputBuffer;
private int inputPos;
private/* final */FastByteArrayOutputStream outputBuffer;
public Base91OutputStream(@Nonnull OutputStream out, boolean doEncode) {
super(out);
this.doEncode = doEncode;
this.inputBuffer = new byte[INPUT_BUFFER_SIZE];
this.inputPos = 0;
int outputBufferSize = doEncode ? (int) Math.ceil(INPUT_BUFFER_SIZE
* Base91.WORST_ENCODING_RATIO) : Math.round(INPUT_BUFFER_SIZE
/ Base91.BEST_ENCODING_RATIO);
this.outputBuffer = new FastByteArrayOutputStream(outputBufferSize);
}
@Override
public void write(final int b) throws IOException {
if (inputPos >= inputBuffer.length) {
flushBuffer();
}
inputBuffer[inputPos++] = (byte) b;
}
@Override
public void write(@Nonnull final byte[] b, final int off, final int len) throws IOException {
if (len <= 0) {
return;
}
flushBuffer();
internalWrite(b, off, len);
}
@Override
public void close() throws IOException {
IOException thrown = null;
try {
flushBuffer();
} catch (IOException e) {
thrown = e;
}
try {
out.flush();
out.close();
} catch (IOException e) {
if (thrown != null) {
thrown = e;
}
}
this.inputBuffer = null;
this.outputBuffer = null;
if (thrown != null) {
throw thrown;
}
}
private void flushBuffer() throws IOException {
if (inputPos > 0) {
internalWrite(inputBuffer, 0, inputPos);
this.inputPos = 0;
}
}
private void internalWrite(@Nonnull final byte[] input, final int offset, final int length)
throws IOException {
outputBuffer.reset();
process(doEncode, input, offset, length, outputBuffer);
byte[] output = outputBuffer.getInternalArray();
int len = outputBuffer.size();
out.write(output, 0, len);
}
private static void process(final boolean doEncode, @Nonnull final byte[] input,
final int offset, final int length,
@Nonnull final FastByteArrayOutputStream outputBuffer) throws IOException {
if (doEncode) {
Base91.encode(input, offset, length, outputBuffer);
} else {
Base91.decode(input, offset, length, outputBuffer);
}
}
}
/*
* Hivemall: Hive scalable Machine Learning Library
*
* Copyright (C) 2015 Makoto YUI
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package hivemall.utils.io;
import hivemall.fm.ArrayModelTest;
import hivemall.utils.codec.Base91;
import java.io.IOException;
import java.io.InputStream;
import org.junit.Assert;
import org.junit.Test;
public class Base91OutputStreamTest {
@Test
public void testSmallEncodeInDecodeOut() throws IOException {
byte[] expected = "abcdecf".getBytes();
FastByteArrayInputStream bis = new FastByteArrayInputStream(expected);
Base91InputStream base91in = new Base91InputStream(bis, true);
FastByteArrayOutputStream bos = new FastByteArrayOutputStream();
Base91OutputStream base91os = new Base91OutputStream(bos, false);
IOUtils.copy(base91in, base91os);
byte[] actual = bos.toByteArray();
Assert.assertArrayEquals(expected, actual);
}
@Test
public void testSmallEncodeOutDecodeIn() throws IOException {
byte[] expected = "abcdecf".getBytes();
FastByteArrayOutputStream bos = new FastByteArrayOutputStream();
Base91OutputStream base91os = new Base91OutputStream(bos, true);
base91os.write(expected);
IOUtils.closeQuietly(base91os);
byte[] encoded = bos.toByteArray();
FastByteArrayInputStream bis = new FastByteArrayInputStream(encoded);
Base91InputStream base91in = new Base91InputStream(bis, false);
byte[] actual = IOUtils.toByteArray(base91in);
Assert.assertArrayEquals(expected, actual);
}
@Test
public void testLargeEncodeInDecodeOut() throws IOException {
InputStream in = ArrayModelTest.class.getResourceAsStream("bigdata.tr.txt");
byte[] expected = IOUtils.toByteArray(in);
FastByteArrayInputStream bis = new FastByteArrayInputStream(expected);
Base91InputStream base91in = new Base91InputStream(bis, true);
FastByteArrayOutputStream bos = new FastByteArrayOutputStream();
Base91OutputStream base91os = new Base91OutputStream(bos, false);
IOUtils.copy(base91in, base91os);
byte[] actual = bos.toByteArray();
Assert.assertArrayEquals(expected, actual);
}
@Test
public void testLargeEncodeOutDecodeIn() throws IOException {
InputStream in = ArrayModelTest.class.getResourceAsStream("bigdata.tr.txt");
byte[] expected = IOUtils.toByteArray(in);
FastByteArrayOutputStream bos = new FastByteArrayOutputStream();
Base91OutputStream base91os = new Base91OutputStream(bos, true);
base91os.write(expected);
IOUtils.closeQuietly(base91os);
byte[] encoded = bos.toByteArray();
Assert.assertArrayEquals(Base91.encode(expected), encoded);
Assert.assertArrayEquals(Base91.decode(encoded), expected);
FastByteArrayInputStream bis = new FastByteArrayInputStream(encoded);
Base91InputStream base91in = new Base91InputStream(bis, false);
byte[] actual = IOUtils.toByteArray(base91in);
System.out.println(new String(actual));
Assert.assertArrayEquals(expected, actual);
}
}
@myui
Copy link
Author

myui commented Apr 28, 2016

bugs in decoding logic

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment