Last active
April 28, 2016 06:30
-
-
Save myui/b7739e23591b87d570ecae80d1699d53 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* Hivemall: Hive scalable Machine Learning Library | |
* | |
* Copyright (C) 2015 Makoto YUI | |
* | |
* Licensed under the Apache License, Version 2.0 (the "License"); | |
* you may not use this file except in compliance with the License. | |
* You may obtain a copy of the License at | |
* | |
* http://www.apache.org/licenses/LICENSE-2.0 | |
* | |
* Unless required by applicable law or agreed to in writing, software | |
* distributed under the License is distributed on an "AS IS" BASIS, | |
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
* See the License for the specific language governing permissions and | |
* limitations under the License. | |
*/ | |
package hivemall.utils.io; | |
import hivemall.utils.codec.Base91; | |
import java.io.FilterInputStream; | |
import java.io.IOException; | |
import java.io.InputStream; | |
import javax.annotation.Nonnull; | |
import javax.annotation.Nullable; | |
public final class Base91InputStream extends FilterInputStream { | |
private static final int INPUT_BUFFER_SIZE = 2048; | |
private final boolean doEncode; | |
private/* final */byte[] inputBuffer; | |
private/* final */FastByteArrayOutputStream outputBuffer; | |
@Nullable | |
private byte[] output; | |
private int outputPos; | |
private int outputLen; | |
private boolean eof; | |
public Base91InputStream(@Nonnull InputStream in, boolean doEncode) { | |
super(in); | |
this.doEncode = doEncode; | |
this.inputBuffer = new byte[INPUT_BUFFER_SIZE]; | |
int outputBufferSize = doEncode ? (int) Math.ceil(INPUT_BUFFER_SIZE | |
* Base91.WORST_ENCODING_RATIO) : Math.round(INPUT_BUFFER_SIZE | |
/ Base91.BEST_ENCODING_RATIO); | |
this.outputBuffer = new FastByteArrayOutputStream(outputBufferSize); | |
this.output = null; | |
this.outputPos = 0; | |
this.outputLen = 0; | |
this.eof = false; | |
} | |
@Override | |
public boolean markSupported() { | |
return false; | |
} | |
@Override | |
public void mark(int readlimit) { | |
throw new UnsupportedOperationException(); | |
} | |
@Override | |
public void reset() { | |
throw new UnsupportedOperationException(); | |
} | |
@Override | |
public int available() { | |
return eof ? 0 : 1; | |
} | |
@Override | |
public long skip(long n) throws IOException { | |
if (outputPos >= outputLen) { | |
refill(); | |
} | |
if (outputPos >= outputLen) { | |
return 0; | |
} | |
long bytes = Math.min(n, outputLen - outputPos); | |
this.outputPos += bytes; | |
return bytes; | |
} | |
@Override | |
public int read() throws IOException { | |
if (outputPos >= outputLen) { | |
refill(); | |
} | |
if (outputPos >= outputLen) { | |
return -1; | |
} | |
return output[outputPos++] & 0xff; | |
} | |
@Override | |
public int read(@Nonnull final byte[] b, final int off, final int len) throws IOException { | |
if (b == null) { | |
throw new NullPointerException(); | |
} else if (off < 0 || len < 0 || off + len > b.length) { | |
throw new IndexOutOfBoundsException(); | |
} else if (len == 0) { | |
return 0; | |
} | |
if (outputPos >= outputLen) { | |
refill(); | |
} | |
if (outputPos >= outputLen) { | |
return -1; | |
} | |
int bytes = Math.min(len, outputLen - outputPos); | |
System.arraycopy(output, outputPos, b, off, bytes); | |
this.outputPos += bytes; | |
return bytes; | |
} | |
@Override | |
public void close() throws IOException { | |
in.close(); | |
this.inputBuffer = null; | |
this.outputBuffer = null; | |
this.output = null; | |
} | |
private void refill() throws IOException { | |
if (eof) { | |
return; | |
} | |
int bytesRead = in.read(inputBuffer); | |
if (bytesRead == -1) { | |
this.eof = true; | |
this.output = null; | |
return; | |
} | |
outputBuffer.reset(); | |
process(doEncode, inputBuffer, bytesRead, outputBuffer); | |
this.output = outputBuffer.getInternalArray(); | |
this.outputPos = 0; | |
this.outputLen = outputBuffer.size(); | |
} | |
private static void process(final boolean doEncode, @Nonnull final byte[] input, | |
final int bytesRead, @Nonnull final FastByteArrayOutputStream outputBuffer) | |
throws IOException { | |
if (doEncode) { | |
Base91.encode(input, 0, bytesRead, outputBuffer); | |
} else { | |
Base91.decode(input, 0, bytesRead, outputBuffer); | |
} | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* Hivemall: Hive scalable Machine Learning Library | |
* | |
* Copyright (C) 2015 Makoto YUI | |
* | |
* Licensed under the Apache License, Version 2.0 (the "License"); | |
* you may not use this file except in compliance with the License. | |
* You may obtain a copy of the License at | |
* | |
* http://www.apache.org/licenses/LICENSE-2.0 | |
* | |
* Unless required by applicable law or agreed to in writing, software | |
* distributed under the License is distributed on an "AS IS" BASIS, | |
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
* See the License for the specific language governing permissions and | |
* limitations under the License. | |
*/ | |
package hivemall.utils.io; | |
import hivemall.utils.codec.Base91; | |
import java.io.FilterOutputStream; | |
import java.io.IOException; | |
import java.io.OutputStream; | |
import javax.annotation.Nonnull; | |
public final class Base91OutputStream extends FilterOutputStream { | |
private static final int INPUT_BUFFER_SIZE = 2048; | |
private final boolean doEncode; | |
private/* final */byte[] inputBuffer; | |
private int inputPos; | |
private/* final */FastByteArrayOutputStream outputBuffer; | |
public Base91OutputStream(@Nonnull OutputStream out, boolean doEncode) { | |
super(out); | |
this.doEncode = doEncode; | |
this.inputBuffer = new byte[INPUT_BUFFER_SIZE]; | |
this.inputPos = 0; | |
int outputBufferSize = doEncode ? (int) Math.ceil(INPUT_BUFFER_SIZE | |
* Base91.WORST_ENCODING_RATIO) : Math.round(INPUT_BUFFER_SIZE | |
/ Base91.BEST_ENCODING_RATIO); | |
this.outputBuffer = new FastByteArrayOutputStream(outputBufferSize); | |
} | |
@Override | |
public void write(final int b) throws IOException { | |
if (inputPos >= inputBuffer.length) { | |
flushBuffer(); | |
} | |
inputBuffer[inputPos++] = (byte) b; | |
} | |
@Override | |
public void write(@Nonnull final byte[] b, final int off, final int len) throws IOException { | |
if (len <= 0) { | |
return; | |
} | |
flushBuffer(); | |
internalWrite(b, off, len); | |
} | |
@Override | |
public void close() throws IOException { | |
IOException thrown = null; | |
try { | |
flushBuffer(); | |
} catch (IOException e) { | |
thrown = e; | |
} | |
try { | |
out.flush(); | |
out.close(); | |
} catch (IOException e) { | |
if (thrown != null) { | |
thrown = e; | |
} | |
} | |
this.inputBuffer = null; | |
this.outputBuffer = null; | |
if (thrown != null) { | |
throw thrown; | |
} | |
} | |
private void flushBuffer() throws IOException { | |
if (inputPos > 0) { | |
internalWrite(inputBuffer, 0, inputPos); | |
this.inputPos = 0; | |
} | |
} | |
private void internalWrite(@Nonnull final byte[] input, final int offset, final int length) | |
throws IOException { | |
outputBuffer.reset(); | |
process(doEncode, input, offset, length, outputBuffer); | |
byte[] output = outputBuffer.getInternalArray(); | |
int len = outputBuffer.size(); | |
out.write(output, 0, len); | |
} | |
private static void process(final boolean doEncode, @Nonnull final byte[] input, | |
final int offset, final int length, | |
@Nonnull final FastByteArrayOutputStream outputBuffer) throws IOException { | |
if (doEncode) { | |
Base91.encode(input, offset, length, outputBuffer); | |
} else { | |
Base91.decode(input, offset, length, outputBuffer); | |
} | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* Hivemall: Hive scalable Machine Learning Library | |
* | |
* Copyright (C) 2015 Makoto YUI | |
* | |
* Licensed under the Apache License, Version 2.0 (the "License"); | |
* you may not use this file except in compliance with the License. | |
* You may obtain a copy of the License at | |
* | |
* http://www.apache.org/licenses/LICENSE-2.0 | |
* | |
* Unless required by applicable law or agreed to in writing, software | |
* distributed under the License is distributed on an "AS IS" BASIS, | |
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
* See the License for the specific language governing permissions and | |
* limitations under the License. | |
*/ | |
package hivemall.utils.io; | |
import hivemall.fm.ArrayModelTest; | |
import hivemall.utils.codec.Base91; | |
import java.io.IOException; | |
import java.io.InputStream; | |
import org.junit.Assert; | |
import org.junit.Test; | |
public class Base91OutputStreamTest { | |
@Test | |
public void testSmallEncodeInDecodeOut() throws IOException { | |
byte[] expected = "abcdecf".getBytes(); | |
FastByteArrayInputStream bis = new FastByteArrayInputStream(expected); | |
Base91InputStream base91in = new Base91InputStream(bis, true); | |
FastByteArrayOutputStream bos = new FastByteArrayOutputStream(); | |
Base91OutputStream base91os = new Base91OutputStream(bos, false); | |
IOUtils.copy(base91in, base91os); | |
byte[] actual = bos.toByteArray(); | |
Assert.assertArrayEquals(expected, actual); | |
} | |
@Test | |
public void testSmallEncodeOutDecodeIn() throws IOException { | |
byte[] expected = "abcdecf".getBytes(); | |
FastByteArrayOutputStream bos = new FastByteArrayOutputStream(); | |
Base91OutputStream base91os = new Base91OutputStream(bos, true); | |
base91os.write(expected); | |
IOUtils.closeQuietly(base91os); | |
byte[] encoded = bos.toByteArray(); | |
FastByteArrayInputStream bis = new FastByteArrayInputStream(encoded); | |
Base91InputStream base91in = new Base91InputStream(bis, false); | |
byte[] actual = IOUtils.toByteArray(base91in); | |
Assert.assertArrayEquals(expected, actual); | |
} | |
@Test | |
public void testLargeEncodeInDecodeOut() throws IOException { | |
InputStream in = ArrayModelTest.class.getResourceAsStream("bigdata.tr.txt"); | |
byte[] expected = IOUtils.toByteArray(in); | |
FastByteArrayInputStream bis = new FastByteArrayInputStream(expected); | |
Base91InputStream base91in = new Base91InputStream(bis, true); | |
FastByteArrayOutputStream bos = new FastByteArrayOutputStream(); | |
Base91OutputStream base91os = new Base91OutputStream(bos, false); | |
IOUtils.copy(base91in, base91os); | |
byte[] actual = bos.toByteArray(); | |
Assert.assertArrayEquals(expected, actual); | |
} | |
@Test | |
public void testLargeEncodeOutDecodeIn() throws IOException { | |
InputStream in = ArrayModelTest.class.getResourceAsStream("bigdata.tr.txt"); | |
byte[] expected = IOUtils.toByteArray(in); | |
FastByteArrayOutputStream bos = new FastByteArrayOutputStream(); | |
Base91OutputStream base91os = new Base91OutputStream(bos, true); | |
base91os.write(expected); | |
IOUtils.closeQuietly(base91os); | |
byte[] encoded = bos.toByteArray(); | |
Assert.assertArrayEquals(Base91.encode(expected), encoded); | |
Assert.assertArrayEquals(Base91.decode(encoded), expected); | |
FastByteArrayInputStream bis = new FastByteArrayInputStream(encoded); | |
Base91InputStream base91in = new Base91InputStream(bis, false); | |
byte[] actual = IOUtils.toByteArray(base91in); | |
System.out.println(new String(actual)); | |
Assert.assertArrayEquals(expected, actual); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
bugs in decoding logic