Last active
February 17, 2021 21:10
-
-
Save Brixomatic/2dfc58bd9c0f830695d812489a0cf82a to your computer and use it in GitHub Desktop.
Java translation of the Tinycrunch 1.2 encoder, requires tc_boot.prg from the original Tinycrunch 1.2 distribution.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package de.plush.brix.c64graphics.core.pipeline.stages.compression; | |
import static de.plush.brix.c64graphics.core.pipeline.stages.compression.Tinycrunch_1_2.Stats.StatId.*; | |
import static de.plush.brix.c64graphics.core.pipeline.stages.compression.Tinycrunch_1_2.Stats.StatId.load; | |
import static de.plush.brix.c64graphics.core.util.Utils.subList; | |
import static java.lang.Float.POSITIVE_INFINITY; | |
import static java.lang.Integer.parseInt; | |
import static java.lang.Math.*; | |
import static java.lang.String.format; | |
import static java.lang.System.*; | |
import static java.nio.file.Paths.get; | |
import static java.nio.file.StandardOpenOption.*; | |
import static java.util.Arrays.stream; | |
import static java.util.Map.entry; | |
import static net.sourceforge.argparse4j.impl.Arguments.*; | |
import java.io.*; | |
import java.net.URISyntaxException; | |
import java.nio.file.*; | |
import java.util.*; | |
import java.util.function.*; | |
import org.eclipse.collections.api.list.primitive.*; | |
import org.eclipse.collections.api.tuple.primitive.ByteObjectPair; | |
import org.eclipse.collections.impl.factory.primitive.*; | |
import org.eclipse.collections.impl.list.mutable.primitive.ByteArrayList; | |
import org.eclipse.collections.impl.map.mutable.primitive.ObjectIntHashMap; | |
import de.plush.brix.c64graphics.core.util.Utils; | |
import net.sourceforge.argparse4j.ArgumentParsers; | |
import net.sourceforge.argparse4j.inf.*; | |
/** | |
* Java implementation of the TinyCrunch 1.2 encoder.<br> | |
* | |
* @see <a href = "https://csdb.dk/release/?id=168629">https://csdb.dk/release/?id=168629</a> | |
* @author Wanja Gayk | |
* @author �2018 Christopher Jam, original Python code | |
* | |
*/ | |
// https://gist.github.com/Brixomatic/2dfc58bd9c0f830695d812489a0cf82a | |
@SuppressWarnings({ "checkstyle:npathComplexity", "checkstyle:cyclomaticComplexity" }) | |
public class Tinycrunch_1_2 implements Function<byte[], byte[]> { | |
private boolean fast; | |
private boolean verbose; | |
/** | |
* Create a new Tinycrunch 1.2 compression stage with optimum (slow) compression and basic output. | |
* | |
* @see #withFastMode(boolean) | |
* @see #withVerboseOutput(boolean) | |
*/ | |
public Tinycrunch_1_2() {} | |
private Tinycrunch_1_2(final boolean fast, final boolean verbose) { | |
this.fast = fast; | |
this.verbose = verbose; | |
} | |
/** | |
* @param fast | |
* <tt>true</tt> to switch to greedy (fast) compression, trading compression ratio for more speed | |
* @return a new instance of this class with this configuration set accordingly. | |
*/ | |
public Tinycrunch_1_2 withFastMode(final boolean fast) { | |
return new Tinycrunch_1_2(fast, verbose); | |
} | |
/** | |
* @param verbose | |
* <tt>true</tt> to get a more detailed report at the end of crunching. | |
* @return a new instance of this class with this configuration set accordingly. | |
*/ | |
public Tinycrunch_1_2 withVerboseOutput(final boolean verbose) { | |
return new Tinycrunch_1_2(fast, verbose); | |
} | |
@Override | |
public byte[] apply(final byte[] input) { | |
final byte[] packed = toEncodedBinary(input); | |
out.println("raw length: " + input.length + " -> " + packed.length + " (bytes)"); | |
return packed; | |
} | |
private byte[] toEncodedBinary(final byte[] rawInput) { | |
final MutableByteList inputData = ByteLists.mutable.of(rawInput); | |
out.println(format("%05d bytes read from stream", inputData.size())); | |
final var op = new CCruncher(fast); | |
final var args = new Namespace(Map.of()); | |
final var outputBytes = raw_crunch(args, op, inputData); | |
if (verbose) { | |
op.report(true); | |
} | |
out.println(format("%05d bytes written to stream (%4.1f%% of original size)", // | |
outputBytes.size(), outputBytes.size() * 100.0 / inputData.size()) // | |
); | |
return outputBytes.toArray(); | |
} | |
/* | |
* NOTE: This is a pretty straight translation of the original Python code. The source contains several oddities that only exist, because | |
* the Python language is as it is. | |
* The source should also not be "beautified", as it should be easy to compare with the original. | |
*/ | |
static Path boot_path; | |
static { | |
try { | |
boot_path = get(Tinycrunch_1_2.class.getResource("resources/tc_boot.prg").toURI()); | |
} catch (final URISyntaxException e) { | |
throw new IllegalStateException(e); | |
} | |
} | |
static CDataChunk load_prg(final File fi) throws IOException { | |
final var data = ByteLists.mutable.of(Files.readAllBytes(fi.toPath())); | |
final var addr = data.get(0) + data.get(1) << 8; | |
return new CDataChunk(addr, subList(data, 2, data.size())); | |
} | |
static void save_prg(final File fo, final CDataChunk data_chunk) throws IOException { | |
final var data = ByteLists.mutable.of((byte) (data_chunk.addr & 255), (byte) (data_chunk.addr >>> 8)); | |
data.addAll(data_chunk.data); | |
Files.write(fo.toPath(), data.toArray(), WRITE, CREATE); | |
} | |
private static class CDataChunk { | |
int addr; | |
MutableByteList data; | |
String se; // source extent description. Doubles as a marker that this data has been compressed | |
public CDataChunk(final int addr, final MutableByteList data) { | |
this(addr, data, null); | |
} | |
public CDataChunk(final int addr, final MutableByteList data, final String se) { | |
this.addr = addr; | |
this.data = data; | |
this.se = se; | |
} | |
static CDataChunk ending_at(final int ea, final MutableByteList data, final String se) { | |
return new CDataChunk(ea - data.size(), data, se); | |
} | |
int end_addr() { | |
return addr + data.size(); | |
} | |
String ext() { | |
return String.format("0x%04X-0x%04X", addr, end_addr()); | |
} | |
CDataChunk[] split_at(final int addr) { | |
assert this.addr <= addr : "split address lower than start address"; | |
assert addr <= end_addr() : "split address higher than end address"; | |
final var sp = addr - this.addr; | |
final var lower = new CDataChunk(addr, subList(data, 0, sp)); | |
final var higher = new CDataChunk(addr, subList(data, sp, data.size())); | |
return new CDataChunk[] { lower, higher }; | |
} | |
void extend(final CDataChunk addend) { | |
assert end_addr() == addend.addr : "extension start address does not match end address"; | |
data.addAll(addend.data); | |
} | |
} | |
private static class OptToken { | |
float cost; | |
ByteList data; | |
Integer next; | |
int length; | |
int offset; | |
public OptToken(final float cost, final ByteList data, final Integer next, final int length, final int offset) { | |
this.cost = cost; | |
this.data = data; | |
this.next = next; | |
this.length = length; | |
this.offset = offset; | |
} | |
@Override | |
public String toString() { | |
String dr; | |
if (data.size() < 5) { | |
dr = data.collect(Utils::toHexString).makeString(" "); | |
} else { | |
dr = data.size() + " bytes"; | |
} | |
return String.format("OptToken(%d,[%s],%s,%d,%d)", cost, dr, next, length, offset); | |
} | |
} | |
private static class CCruncher { | |
int longest_copy = 17; | |
int max_pair_offset = 63; | |
int max_offset = 2048; | |
int longest_literal = 64; | |
boolean greedy; | |
Stats stats; | |
private boolean inPlace; | |
private MutableByteList output_bytes; | |
private CDataChunk input_chunk; | |
private MutableByteList data; | |
private MutableByteList remainder; | |
private MutableByteList literals; | |
public CCruncher(final boolean greedy) { | |
this.greedy = greedy; | |
reset_stats(); | |
} | |
void reset_stats() { | |
stats = new Stats(); | |
} | |
MutableByteList crunch(final CDataChunk input_chunk, final boolean inPlace) { | |
this.inPlace = inPlace; | |
output_bytes = ByteLists.mutable.of((byte) 0, (byte) 0, (byte) 0); // leave space for target address, and final byte | |
this.input_chunk = input_chunk; | |
final var split = input_chunk.split_at(max(0, input_chunk.data.size() - 1)); | |
data = split[0].data; // everything except the last byte ( input_chunk.data[:-1] ) | |
remainder = split[1].data; // last item in the array ( input_chunk.data[-1:] ) | |
if (data.size() == 0) { return output_token_list(new ArrayList<OptToken>()); } | |
if (greedy) { return crunch_greedy(); } | |
return crunch_optimal(); | |
} | |
/** "10xxxxxx" */ | |
ByteList encode_literal(final ByteList literal) { | |
assert 0 < literal.size() && literal.size() <= 64 : "literal size out of bounds 0 < size <= 64, size: " + literal.size(); | |
final byte count = (byte) (128 + literal.size() - 1); | |
final var encoding = ByteLists.mutable.of(count); | |
encoding.addAll(literal); | |
return encoding; | |
} | |
/** | |
* "11oooooo" offsets < 64 <br> | |
* "0aaaaabb bbbbbbbb" bigger offsets | |
*/ | |
ByteList encode_copy(final int length, int offset) { | |
assert 1 < length && length <= longest_copy : "copy sequence out of bounds, 1 < lenth < " + longest_copy + ", length: " + length; | |
if (length == 2 && offset <= max_pair_offset) { | |
// "11oooooo" | |
assert offset < 64 : "offset >= 64, offset: " + offset; | |
final byte count = (byte) (0xc0 + offset ^ 63); // ie, offset^0x7f | |
return ByteLists.mutable.of(count); | |
} | |
// "0aaaaabb bbbbbbbb" | |
assert 0 < offset && offset < 2048 : "offset out of bounds : 0 < offset < 2048, offset: " + offset; | |
// bits 1.5.10 | |
// assert(2<= length<32) | |
offset -= 1; | |
final byte hi = (byte) (0x00 + ((offset & 0x700) >>> 8 ^ 7) + (length - 2 << 3)); | |
final byte lo = (byte) (offset & 0xff ^ 0xff); | |
return ByteLists.mutable.of(hi, lo); | |
} | |
/** just finds for any point the string that extends the furthest */ | |
private MutableByteList crunch_greedy() { | |
final var longestCopy = longest_copy; | |
final var maxOffset = max_offset; | |
final var tokens = new ArrayList<OptToken>(); | |
literals = ByteLists.mutable.empty(); | |
final Runnable tokenize_literal_if_present = () -> { | |
if (literals.size() > 0) { | |
tokens.add(new OptToken(0, encode_literal(literals), 0, literals.size(), 0)); | |
literals = ByteLists.mutable.empty(); | |
} | |
}; | |
final var data = this.data; | |
final var l = data.size(); | |
var nj = 0; | |
/* | |
* last_seen = defaultdict(lambda: -(2 ** 17)) | |
* The argument of a defaultdict (in this case"lambda: -(1**17)") | |
* will be called when you try to access a key that doesn't exist | |
* and the return value of it will be set as the new value of this key. | |
* In Java we do that by using another function than get, | |
* e.g. computeIfAbsent in java.util or getIfAbsentPut in Eclipse collections | |
*/ | |
final var last_seen = new ObjectIntHashMap<ByteList>(); // address key was last seen starting at | |
// kl = [tuple(data[j:j + i]) for i in range(longestCopy + 1) if j + i <= l] | |
// data = [1,2,3] | |
// kl = [(), (1,), (1,2), (1,2,3)] | |
// kl = [(), (2,), (2, 3)] | |
// kl = [(), (3,)] | |
for (int j = 0; j < l; ++j) { | |
final var kl = new ArrayList<ByteList>(); | |
for (int i = 0; i < longestCopy + 1 && j + i <= l; ++i) { | |
kl.add(subList(data, j, j + i)); | |
} | |
if (j == nj) { | |
var length = kl.size() - 1; | |
var noBreak = true; | |
while (length > 1) { | |
final int offset = j - last_seen.getIfAbsentPut(kl.get(length), -(1 << 17)); | |
if (offset < maxOffset) { | |
final var token = encode_copy(length, offset); | |
if (token.get(0) != 0) { | |
out.println(token.collect(b -> Utils.toHexString(b)).makeString()); | |
tokenize_literal_if_present.run(); | |
assert offset > 0 : "offset expected to be > 0, offset: " + offset; | |
tokens.add(new OptToken(0, token, 0, length, offset)); | |
nj = j + length; | |
noBreak = false; | |
break; | |
} | |
} | |
length -= 1; | |
} | |
if (noBreak) { | |
literals.add(data.get(j)); | |
if (literals.size() == longest_literal) { | |
tokenize_literal_if_present.run(); | |
} | |
nj = j + 1; | |
} | |
} | |
final var jj = j; | |
kl.subList(1, kl.size()).forEach(k -> last_seen.put(k, jj)); | |
} | |
tokenize_literal_if_present.run(); | |
return output_token_list(tokens); | |
} | |
private MutableByteList crunch_optimal() { | |
final var longestCopy = longest_copy; | |
final var maxPairOffset = max_pair_offset; | |
final var longestLiteral = longest_literal; | |
final var data = this.data; | |
final var l = data.size(); | |
final var last_seen = new ObjectIntHashMap<ByteList>(); // address key was last seen starting at | |
/* | |
* cfile[j] contains the tail of a list that in turn contains the | |
* cheapest representation of the first j bytes of the file | |
* data containst the bytes that must be added to the stream to | |
* cover the bytes between that token and its predecessor | |
*/ | |
final var cfile = new ArrayList<>(List.of(new OptToken(0, null, null, 0, 0))); | |
OptToken best = null; | |
for (int j = 1; j < l + 1; ++j) { | |
final var copy_candidates = new ArrayList<ByteList>(); | |
for (int i = 2; i < longestCopy + 1 && j - i >= 0; ++i) { | |
copy_candidates.add(subList(data, j - i, j)); | |
} | |
best = new OptToken(POSITIVE_INFINITY, null, null, 0, 0); | |
for (final var k : copy_candidates) { | |
final var mra = last_seen.getIfAbsentPut(k, -(1 << 17)); | |
final var length = k.size(); | |
final var start_addr = j - length; | |
assert length > 1 : "length must be > 1, length: " + length; | |
final var offset = j - mra; | |
if (offset < max_offset) { | |
final var nb = length > 2 || offset > maxPairOffset ? 2.012f : 1.01f; | |
final var cost = cfile.get(start_addr).cost + nb; | |
if (cost < best.cost) { | |
final var token = encode_copy(length, offset); | |
if (token.get(0) != 0) { | |
best = new OptToken(cost, token, start_addr, length, offset); | |
assert mra - length < j : "Expected mra-length < j. mra: " + mra + " - length: " + length // | |
+ " = " + (mra - length) + ", j; " + j; | |
} | |
} | |
} | |
} | |
for (int length = 1; length < longestLiteral + 1; ++length) { | |
final var start_addr = j - length; | |
if (start_addr >= 0) { | |
final var cost = cfile.get(start_addr).cost + length + 1.01f; | |
if (cost < best.cost) { | |
final var literal = subList(data, start_addr, j); | |
best = new OptToken(cost, encode_literal(literal), start_addr, literal.size(), 0); | |
assert best.data.size() == length + 1 : "expected data.size()== length+1, data.size(): " + data.size() + ", length+1: " + (length | |
+ 1); | |
assert start_addr < j : "expected startAddr < j, startAddr: " + start_addr + ", j: " + j; | |
} | |
} | |
} | |
cfile.add(best); | |
for (final var k : copy_candidates) { | |
last_seen.put(k, j); | |
} | |
} | |
assert best != null : "expected best != null"; | |
final var tokens = new ArrayList<>(Arrays.asList(best)); | |
while (best.next != 0) { | |
best = cfile.get(best.next); | |
tokens.add(best); | |
} | |
Collections.reverse(tokens); | |
return output_token_list(tokens); | |
} | |
private MutableByteList output_token_list(final ArrayList<OptToken> tokens) { | |
var j = tokens.size(); | |
var watermark = j; // from here on, just store raw data | |
var raw_bytes_after_watermark = 0; | |
if (inPlace) { | |
// Scan the token list from the end back. | |
// Whenever compressed remainder is equal or longer in length to raw remainder, | |
// set that token to start of raw data. | |
var raw_bytes_after_tokenJ = 0; | |
var comp_bytes_after_token_j = 0; | |
raw_bytes_after_watermark = 0; | |
while (j > 0) { | |
j -= 1; | |
raw_bytes_after_tokenJ += tokens.get(j).length; | |
comp_bytes_after_token_j += tokens.get(j).data.size(); | |
if (raw_bytes_after_tokenJ <= comp_bytes_after_token_j) { | |
watermark = j; | |
raw_bytes_after_watermark += raw_bytes_after_tokenJ; | |
raw_bytes_after_tokenJ = 0; | |
comp_bytes_after_token_j = 0; | |
} | |
} | |
} | |
for (final var t : tokens.subList(0, watermark)) { | |
stats.log_token(t); | |
output_bytes.addAll(t.data); | |
} | |
if (inPlace && raw_bytes_after_watermark > 0) { | |
final var newRemainder = subList(data, data.size() - raw_bytes_after_watermark, data.size()); | |
newRemainder.addAll(remainder); | |
remainder = newRemainder; | |
} | |
if (remainder.size() > 1) { | |
stats.log_raw(remainder.size() - 1); | |
} | |
stats.log_header(3); | |
stats.log_move(1); | |
output_bytes.set(0, remainder.get(0)); | |
output_bytes.set(1, (byte) ((input_chunk.addr - 1) % 256)); | |
output_bytes.set(2, (byte) (input_chunk.addr - 1 >>> 8)); | |
stats.log_terminator(); | |
remainder.set(0, (byte) 0); // terminator for compressed data | |
output_bytes.addAll(remainder); | |
remainder = null; | |
return output_bytes; | |
} | |
void report() { | |
report(false); | |
} | |
void report(final boolean raw) { | |
stats.report(raw); | |
} | |
} | |
static class Stats { | |
static class StatCounter { | |
Object legend; | |
int ct, bi, bo; | |
StatCounter(final Object legend) { | |
this.legend = legend; | |
reset(); | |
} | |
void reset() { | |
ct = 0; | |
bi = 0; | |
bo = 0; | |
} | |
void accBi(final int bi) { | |
accCIO100(1, bi, 0); | |
} | |
void accBo(final int bo) { | |
accCIO100(1, 0, bo); | |
} | |
void accBiBo(final int bi, final int bo) { | |
accCIO100(1, bi, bo); | |
} | |
void accBiCt(final int bi, final int ct) { | |
accCIO100(ct, bi, 0); | |
} | |
void accCIO100(final int ct, final int bi, final int bo) { | |
this.ct += ct; | |
this.bi += bi; | |
this.bo += bo; | |
} | |
int cost() { | |
return max(0, bo - bi); | |
} | |
int savings() { | |
return max(0, bi - bo); | |
} | |
void print(final String fs, final IntFunction<String> ent, final IntFunction<String> ifp) { | |
final var l_c = ct; | |
final var l_o = bo; | |
final var l_i = bi; | |
if (l_c > 0) { | |
out.println(String.format(fs, legend, l_c, ent.apply(l_c), ifp.apply(l_i), ifp.apply(l_o - l_i), ifp.apply(l_i - l_o), ifp.apply(l_o))); | |
} | |
} | |
} | |
enum StatId { | |
pair, copy, literal, header, move, gap, boot, raw, end, load, save; | |
} | |
String rs; | |
Map<StatId, StatCounter> counts; | |
MutableIntList offsets; | |
MutableIntList litlens; | |
Stats() { | |
rs = ""; | |
counts = Map.ofEntries(// | |
entry(pair, new StatCounter("2 byte copies")), // | |
entry(copy, new StatCounter("n byte copies")), // | |
entry(literal, new StatCounter("literal strings")), // | |
entry(header, new StatCounter("segment headers")), // | |
entry(move, new StatCounter("moved to header")), // | |
entry(gap, new StatCounter("gaps")), // | |
entry(boot, new StatCounter("boot")), // | |
entry(raw, new StatCounter("uncompressed")), // | |
entry(end, new StatCounter("terminators")), // | |
entry(load, new StatCounter("load address")), // | |
entry(save, new StatCounter("save address")) // | |
); | |
offsets = IntLists.mutable.empty(); | |
litlens = IntLists.mutable.empty(); | |
} | |
void log_token(final OptToken t) { | |
final var dl = t.data.size(); | |
if (t.offset == 0) { | |
log_literal(t.length, dl); | |
} else if (dl == 1) { | |
log_pair(t.offset, dl); | |
} else { | |
log_copy(t.length, t.offset, dl); | |
} | |
} | |
private void log_pair(final int offset, final int dl) { | |
offsets.add(offset); | |
rs += '1'; | |
counts.get(pair).accBiBo(2, dl); | |
assert dl == 1; | |
} | |
private void log_copy(final int length, final int offset, final int dl) { | |
offsets.add(offset); | |
rs += '@'; | |
counts.get(copy).accBiBo(length, dl); | |
assert dl == 2; | |
} | |
private void log_literal(final int length, final int dl) { | |
litlens.add(length); | |
rs += '.'; | |
assert dl == 1 + length; | |
counts.get(literal).accBiBo(length, dl); | |
} | |
void log_boot(final int length) { | |
counts.get(boot).accBo(length); | |
} | |
void log_gap(final int length) { | |
counts.get(gap).accBo(length); | |
} | |
void log_header(final int length) { | |
counts.get(header).accBo(length); | |
} | |
void log_move(final int length) { | |
counts.get(move).accBi(length); | |
} | |
void log_raw(final int length) { | |
counts.get(raw).accBiBo(length, length); | |
} | |
void log_terminator() { | |
counts.get(end).accBo(1); | |
} | |
void log_load_addr() { | |
counts.get(load).accBiCt(2, 1); | |
} | |
void log_save_addr() { | |
counts.get(save).accBo(2); | |
} | |
void report() { | |
report(false); | |
} | |
void report(final boolean raw_) { | |
final var g1 = new StatId[] { copy, pair, literal, end }; | |
final var g2 = new StatId[] { move, load, save, boot, header, gap, raw }; | |
if (raw_) { | |
stream(g2).forEach(k -> counts.get(k).reset()); | |
} | |
final var symcount = stream(g1).mapToInt(k -> counts.get(k).ct).sum(); | |
final var vi = counts.values(); | |
final var s_c = vi.stream().mapToInt(c -> c.ct).sum(); | |
final var s_i = vi.stream().mapToInt(c -> c.bi).sum(); | |
final var cost = (float) vi.stream().mapToDouble(c -> c.cost()).sum(); | |
final var savings = vi.stream().mapToInt(c -> c.savings()).sum(); | |
final var s_o = vi.stream().mapToInt(c -> c.bo).sum(); | |
assert round(s_i + cost - savings) == s_o : "input + cost - savings doesn't equal output"; // need to use round, for floating point inaccuracies | |
final IntFunction<String> ent = x -> x <= 0 ? " n/a" : "" + format("%7.3f", log((x + 1e-20) / (symcount + 1e-20)) / log(0.5)); | |
final IntFunction<String> noent = x -> ""; | |
final IntFunction<String> ifp = x -> x <= 0 ? "" : "" + x; | |
final var hr = "+-----------------+------------------+----------------------------------+"; | |
final var fs = "|%16s | %7s %7s | %7s %7s %7s %7s |"; | |
out.println(); | |
out.println(hr); | |
out.println(format(fs, "", "count", "entropy", "input", "cost", "savIngs", "output")); | |
out.println(hr); | |
stream(g1).map(k -> counts.get(k)).forEach(c -> c.print(fs, ent, ifp)); | |
out.println(hr); | |
if (!raw_) { | |
stream(g2).map(k -> counts.get(k)).forEach(c -> c.print(fs, noent, ifp)); | |
} | |
out.println(format(fs, "total", s_c, "", s_i, round(cost), savings, s_o)); | |
out.println(hr); | |
offsets.sortThis(); | |
if (!offsets.isEmpty()) { | |
out.println(format("median, maximum offset used = % d, % d", offsets.get(floorDiv(offsets.size(), 2)), offsets.get(offsets.size() - 1))); | |
} | |
litlens.sortThis(); | |
if (!litlens.isEmpty()) { | |
// in the original the last arg for format is "self.litlens[-5:]", an array of one element, while the formatting "{:}" does not suggest an | |
// array, but it takes them | |
out.println(format("median, maximum litlen used = % d, %s", litlens.get(floorDiv(litlens.size(), 2)), subList(litlens, max(0, litlens.size() | |
- 5), litlens.size()))); | |
} | |
out.println(); | |
} | |
} | |
static byte hi(final int x) { | |
return (byte) (x >>> 8 & 0xff); | |
} | |
static byte lo(final int x) { | |
return (byte) (x & 0xff); | |
} | |
static Namespace parse_args(final String[] args) throws ArgumentParserException { | |
final ArgumentType<Integer> hex = (final ArgumentParser parser, final Argument arg, final String value) -> parseInt(value, 16); | |
final ArgumentParser parser = ArgumentParsers.newFor("Java tinycrunch").build()// | |
.version("${prog} 1.2") // | |
.defaultHelp(true) // | |
.description(""); // | |
parser.addArgument("-V", "--version").action(version()); | |
parser.addArgument("infile").type(fileType().verifyCanRead().verifyIsFile()).help("(.prg file unless -r is used)"); | |
// parser.addArgument("outfile").type(fileType().verifyCanWrite().or().verifyCanCreate()).help("(.prg file unless -r is used"); | |
parser.addArgument("outfile").type(fileType()).help("(.prg file unless -r is used)"); | |
final var group = parser.addMutuallyExclusiveGroup().required(true); | |
group.addArgument("-s", "--startAddress").dest("startAddr").help("start address").type(hex).setDefault((Integer) null); | |
group.addArgument("-e", "--endAddress").dest("endAddr").help("end address").type(hex).setDefault((Integer) null); | |
group.addArgument("-i", "--inPlace").dest("inPlace").help("compress to end of destination area").action(storeTrue()); | |
group.addArgument("-c", "--selfExtracting").action(storeTrue()); | |
group.addArgument("-r", "--raw").action(storeTrue()).help("read/write .bin files, no header. cf readme.txt"); | |
parser.addArgument("-j", "--jmp").dest("execAddr").help("execution address for self extracting .prg (requires -x)").setDefault("0x080d").type(hex); | |
parser.addArgument("-p", "--paramFile").type(fileType().verifyCanWrite().or().verifyCanCreate()).setDefault((Object) null).help( | |
"generated asm include file containing a define for the output start address"); | |
parser.addArgument("-v", "--verbose").action(storeTrue()); | |
parser.addArgument("-f", "--fast").action(storeTrue()).help("faster (greedy) compression (default is optimal size)"); | |
return parser.parseArgs(args); | |
} | |
static CDataChunk level_crunch(final Namespace args, final CCruncher op, final CDataChunk input_chunk) { | |
final var output_bytes = op.crunch(input_chunk, args.getBoolean("inPlace")); | |
int la; | |
if (args.getBoolean("inPlace")) { | |
la = input_chunk.end_addr() - output_bytes.size(); | |
} else if (args.getInt("endAddr") != null) { | |
la = args.getInt("endAddr") - output_bytes.size(); | |
} else { | |
la = args.getInt("startAddr"); | |
} | |
final var output_chunk = new CDataChunk(la, output_bytes); | |
if (args.get("paramFile") != null) { | |
try { | |
final Path path = args.<File> get("paramFile").toPath(); | |
Files.writeString(path, format("dcSrc=$%04X", output_chunk.addr) + args.<File> get("paramFile")); | |
} catch (final Exception e) { | |
e.printStackTrace(); | |
} | |
} | |
return output_chunk; | |
} | |
static MutableByteList raw_crunch(final Namespace args, final CCruncher op, final MutableByteList input_data) { | |
assert !args.getBoolean("inPlace") : "--inPlace makes no sense for raw mode"; | |
assert args.get("paramFile") == null : "cannot generate paramFile for raw mode"; | |
final var id = ByteLists.mutable.ofAll(input_data); | |
id.add((byte) 0); // add fake load address, and dummy byte-to-move-to-header | |
final var input_chunk = new CDataChunk(0, id); // fake load address = 0 | |
final var output_bytes = op.crunch(input_chunk, false); | |
return subList(output_bytes, 3, output_bytes.size()); // drop header | |
} | |
/** | |
* compresses the input file in two segments. | |
* The first contains a compressed copy of the data | |
* to be decrunched to the area after the loaded file | |
* The second contains the remaining data, | |
* compressed in place.<br> | |
* <br> | |
* It takes an iteration or three to find the optimal split point | |
* | |
* @throws IOException | |
*/ | |
static CDataChunk sfx_crunch(final Namespace args, final CCruncher op, final CDataChunk input_chunk) throws IOException { | |
final var __ = new Object() { | |
void dprint(final String... prargs) { | |
if (args.getBoolean("verbose")) { | |
stream(prargs).forEach(out::println); | |
if (prargs.length == 0) { | |
out.println(); | |
} | |
} | |
} | |
void disp_chunks(final List<CDataChunk> chunks) { | |
chunks.forEach(chunk -> { | |
if (chunk.se == null) { | |
dprint(format("data segment at %s, uncompressed", chunk.ext())); | |
} else { | |
dprint(format("data segment at %s, decrunches to %s", chunk.ext(), chunk.se)); | |
} | |
}); | |
} | |
}; | |
__.dprint(); | |
__.dprint(String.format(input_chunk.ext())); | |
final var boot_chunk = load_prg(boot_path.toFile()); | |
final var oCo = boot_chunk.split_at(boot_chunk.end_addr() - 3); | |
final var output_chunk = oCo[0]; | |
final var offsets = oCo[1]; | |
final var patch_offsets = offsets.data; | |
final var oStart = patch_offsets.get(2); | |
__.dprint(format(" boot at %s", output_chunk.ext())); | |
final var data_start = output_chunk.end_addr(); | |
final var monolith = new CDataChunk(data_start, op.crunch(input_chunk, false), input_chunk.ext()); | |
final var monolith_stats = op.stats; | |
__.dprint(format(" monolith at %s", monolith.ext())); | |
List<CDataChunk> chunks; | |
if (input_chunk.addr >= monolith.end_addr() || input_chunk.end_addr() <= monolith.addr) { | |
__.dprint("(doesn't overlap output, using as is)"); | |
chunks = List.of(monolith); // this is safe because it doesn't overlap the input chunk | |
} else { | |
var split_point = min(monolith.end_addr() + 12, input_chunk.end_addr() - 1); // assume compression is slightly worse | |
var max_gap = floorDiv(input_chunk.data.size(), 2000); // try for 0.05% bytes wasted between output segments | |
while (true) { | |
op.reset_stats(); | |
if (split_point >= input_chunk.end_addr()) { | |
__.dprint(format("\nnew split point of 0x%04x is past end of input.", split_point)); | |
split_point = data_start; | |
if (input_chunk.end_addr() < data_start) { | |
final var lCuC = input_chunk.split_at(split_point); | |
final var lower_chunk = lCuC[0]; | |
final var upper_chunk = lCuC[1]; | |
chunks = List.of( // | |
upper_chunk, // | |
new CDataChunk(input_chunk.end_addr(), op.crunch(lower_chunk, false), lower_chunk.ext()) // | |
); | |
} else { | |
__.dprint("Just storing raw input file after header"); | |
chunks = List.of( // | |
input_chunk// | |
); | |
} | |
__.disp_chunks(chunks); | |
break; | |
} | |
final var lCuC = input_chunk.split_at(split_point); | |
final var lower_chunk = lCuC[0]; | |
final var upper_chunk = lCuC[1]; | |
chunks = List.of( // | |
new CDataChunk(data_start, op.crunch(upper_chunk, false), upper_chunk.ext()), // | |
CDataChunk.ending_at(split_point, op.crunch(lower_chunk, true), lower_chunk.ext()) // in place => safe | |
); | |
final var gap = chunks.get(1).addr - chunks.get(0).end_addr(); | |
if (gap < 0) { | |
var adjustment = -gap; | |
adjustment -= floorDiv(adjustment, 5); // reduce larger steps a little | |
__.disp_chunks(chunks); | |
__.dprint(format("segment overlap = %d", -gap)); | |
__.dprint(format("shifting split up by %d bytes and recrunching", adjustment)); | |
split_point += adjustment; | |
continue; | |
} | |
__.disp_chunks(chunks); | |
__.dprint(format("segment gap = %d (max=%d)", gap, max_gap)); | |
if (gap > max_gap) { | |
final var adjustment = gap - floorDiv(gap, 4); | |
__.dprint(format("shifting split down by %d bytes and recrunching", adjustment)); | |
split_point -= adjustment; | |
max_gap += 1 + max_gap; // increase tolerance to escape oscillation. | |
} else { | |
/* | |
* ok. At this point, | |
* gap >= 0 | |
* chunks[1].addr-chunks[0].end_addr() >= 0 | |
* chunks[1].addr >= chunks[0].end_addr() | |
* chunks[1].end_addr() >= chunks[0].end_addr() (as c1.end_addr>=c1.addr) | |
* split_point >= chunks[0].end_addr() | |
* upper_chunk.addr >= chunks[0].end_addr() | |
* therefore, chunk 0 is safe. | |
*/ | |
__.dprint(gap > 0 ? "close enough." : "perfect."); | |
break; | |
} | |
} | |
} | |
op.stats.log_boot(output_chunk.data.size()); | |
for (final ByteObjectPair<CDataChunk> chunkOffset : patch_offsets.zip(chunks.subList(0, 2))) { | |
final var offset = chunkOffset.getOne(); | |
final var chunk = chunkOffset.getTwo(); | |
final var gap = chunk.addr - output_chunk.end_addr(); | |
if (gap > 0) { | |
op.stats.log_gap(gap); | |
output_chunk.data.addAll(ByteArrayList.newWithNValues(gap, (byte) 0xff)); | |
} | |
if (chunk.se != null) { | |
output_chunk.data.set(offset + 1, lo(chunk.addr)); | |
output_chunk.data.set(offset + 3, lo(chunk.addr)); | |
output_chunk.data.set(offset + 4, (byte) 0x20); // replace LDA decrunch with JSR decrunch | |
} else { | |
op.stats.log_raw(chunk.data.size()); | |
} | |
output_chunk.extend(chunk); | |
} | |
final var exec_addr = args.getInt("execAddr"); | |
output_chunk.data.set(oStart + 1, lo(exec_addr)); | |
output_chunk.data.set(oStart + 2, hi(exec_addr)); | |
return output_chunk; | |
} | |
public static void main(final String[] argsv) throws ArgumentParserException, IOException { | |
try { | |
final var args = parse_args(argsv); | |
final var op = new CCruncher(args.getBoolean("fast")); | |
if (args.getBoolean("raw")) { | |
if (args.<File> get("infile").getName().endsWith(".prg")) { | |
err.println("warning, input file will be parsed as a .bin"); | |
} | |
if (args.<File> get("outfile").getName().endsWith(".prg")) { | |
err.println("warning, output file will be written as a .bin"); | |
} | |
final var input_data = ByteLists.mutable.of(Files.readAllBytes(args.<File> get("infile").toPath())); | |
final var original_length = input_data.size(); | |
out.println(format("%05d bytes read from %s", // | |
original_length, args.<File> get("infile").getName())); | |
final var output_bytes = raw_crunch(args, op, input_data); | |
if (args.getBoolean("verbose")) { | |
op.report(true); | |
} | |
Files.write(args.<File> get("outfile").toPath(), output_bytes.toArray(), WRITE, CREATE); | |
final var compressed_length = output_bytes.size(); | |
out.println(format("% 5d bytes written to %s (%4.1f%% of original size)", // | |
compressed_length, args.<File> get("outfile").getName(), // | |
compressed_length * 100.0 / original_length) // | |
); | |
} else { | |
if (args.<File> get("infile").getName().endsWith(".bin")) { | |
err.println("warning, input file will be parsed as a .prg"); | |
} | |
if (args.<File> get("outfile").getName().endsWith(".bin")) { | |
err.println("warning, output file will be written as a .prg"); | |
} | |
final var input_chunk = load_prg(args.<File> get("infile")); | |
final var original_length = input_chunk.data.size() + 2; | |
out.println(format("%s: %05d bytes read from %s", // | |
input_chunk.ext(), original_length, args.<File> get("infile").getName())); | |
if (input_chunk.data.size() < 1) { | |
err.println("Input file can't be empty"); | |
exit(1); | |
} | |
CDataChunk output_chunk; | |
if (args.getBoolean("selfExtracting")) { | |
if (input_chunk.addr < 0x0200) { | |
out.println(input_chunk.addr); | |
err.println("Destination addresses below 0x0200 not supported by -x"); | |
exit(1); | |
} | |
output_chunk = sfx_crunch(args, op, input_chunk); | |
} else { | |
output_chunk = level_crunch(args, op, input_chunk); | |
} | |
op.stats.log_load_addr(); | |
op.stats.log_save_addr(); | |
if (args.getBoolean("verbose")) { | |
op.report(); | |
} | |
save_prg(args.<File> get("outfile"), output_chunk); | |
final var compressed_length = output_chunk.data.size() + 2; | |
out.print(format("%s: %05d bytes written to %s (%04.1f%% of original size)", // | |
output_chunk.ext(), compressed_length, args.<File> get("outfile").getName(), // | |
compressed_length * 100.0 / original_length)); | |
} | |
} catch (final ArgumentParserException e) { | |
e.getParser().printHelp(); | |
} | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package de.plush.brix.c64graphics.core.util; | |
public class Utils { | |
// ... | |
public static String toHexString(final byte b) { | |
return format("%2s", Integer.toHexString(Byte.toUnsignedInt(b))).replace(' ', '0'); | |
} | |
/** In Eclipse Collections 10.4.0 {@link ByteArrayList#subList(int, int)} is still not implemented. */ | |
public static MutableByteList subList(final MutableByteList in, final int from, final int to) { | |
return ByteLists.mutable.of(copyOfRange(in.toArray(), from, to)); | |
} | |
/** In Eclipse Collections 10.4.0 {@link ByteArrayList#subList(int, int)} is still not implemented. */ | |
public static MutableIntList subList(final MutableIntList in, final int from, final int to) { | |
return IntLists.mutable.of(copyOfRange(in.toArray(), from, to)); | |
} | |
} |
Author
Brixomatic
commented
Aug 25, 2020
•
fixed: missing arguments didn't print help message.
fixed: visibility of factory methods package -> public
fixed: some remainder calculation was wrong.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment