Brixomatic · February 17, 2021 21:10 · Brixomatic · Aug 25, 2020 · Brixomatic · Aug 26, 2020
diff --git a/Tinycrunch_1_2.java b/Tinycrunch_1_2.java
 package de.plush.brix.c64graphics.core.pipeline.stages.compression;

 import static de.plush.brix.c64graphics.core.pipeline.stages.compression.Tinycrunch_1_2.Stats.StatId.*;
 import static de.plush.brix.c64graphics.core.pipeline.stages.compression.Tinycrunch_1_2.Stats.StatId.load;
 import static de.plush.brix.c64graphics.core.util.Utils.subList;
 import static java.lang.Float.POSITIVE_INFINITY;
 import static java.lang.Integer.parseInt;
 import static java.lang.Math.*;
 import static java.lang.String.format;
 import static java.lang.System.*;
 import static java.nio.file.Paths.get;
 import static java.nio.file.StandardOpenOption.*;
 import static java.util.Arrays.stream;
 import static java.util.Map.entry;
 import static net.sourceforge.argparse4j.impl.Arguments.*;

 import java.io.*;
 import java.net.URISyntaxException;
 import java.nio.file.*;
 import java.util.*;
 import java.util.function.*;

 import org.eclipse.collections.api.list.primitive.*;
 import org.eclipse.collections.api.tuple.primitive.ByteObjectPair;
 import org.eclipse.collections.impl.factory.primitive.*;
 import org.eclipse.collections.impl.list.mutable.primitive.ByteArrayList;
 import org.eclipse.collections.impl.map.mutable.primitive.ObjectIntHashMap;

 import de.plush.brix.c64graphics.core.util.Utils;
 import net.sourceforge.argparse4j.ArgumentParsers;
 import net.sourceforge.argparse4j.inf.*;

 /**
 * Java implementation of the TinyCrunch 1.2 encoder.<br>
 * 
 * @see <a href = "https://csdb.dk/release/?id=168629">https://csdb.dk/release/?id=168629</a>
 * @author Wanja Gayk
 * @author �2018 Christopher Jam, original Python code
 * 
 */
 // https://gist.github.com/Brixomatic/2dfc58bd9c0f830695d812489a0cf82a

 @SuppressWarnings({ "checkstyle:npathComplexity", "checkstyle:cyclomaticComplexity" })
 public class Tinycrunch_1_2 implements Function<byte[], byte[]> {

 	private boolean fast;

 	private boolean verbose;

 	/**
 	 * Create a new Tinycrunch 1.2 compression stage with optimum (slow) compression and basic output.
 	 * 
 	 * @see #withFastMode(boolean)
 	 * @see #withVerboseOutput(boolean)
 	 */
 	public Tinycrunch_1_2() {}

 	private Tinycrunch_1_2(final boolean fast, final boolean verbose) {
 		this.fast = fast;
 		this.verbose = verbose;
 	}

 	/**
 	 * @param fast
 	 *            <tt>true</tt> to switch to greedy (fast) compression, trading compression ratio for more speed
 	 * @return a new instance of this class with this configuration set accordingly.
 	 */
 	public Tinycrunch_1_2 withFastMode(final boolean fast) {
 		return new Tinycrunch_1_2(fast, verbose);
 	}

 	/**
 	 * @param verbose
 	 *            <tt>true</tt> to get a more detailed report at the end of crunching.
 	 * @return a new instance of this class with this configuration set accordingly.
 	 */
 	public Tinycrunch_1_2 withVerboseOutput(final boolean verbose) {
 		return new Tinycrunch_1_2(fast, verbose);
 	}

 	@Override
 	public byte[] apply(final byte[] input) {
 		final byte[] packed = toEncodedBinary(input);
 		out.println("raw length: " + input.length + " -> " + packed.length + " (bytes)");
 		return packed;
 	}

 	private byte[] toEncodedBinary(final byte[] rawInput) {
 		final MutableByteList inputData = ByteLists.mutable.of(rawInput);
 		out.println(format("%05d bytes read from stream", inputData.size()));
 		final var op = new CCruncher(fast);
 		final var args = new Namespace(Map.of());
 		final var outputBytes = raw_crunch(args, op, inputData);
 		if (verbose) {
 			op.report(true);
 		}
 		out.println(format("%05d bytes written to stream (%4.1f%% of original size)", //
 				outputBytes.size(), outputBytes.size() * 100.0 / inputData.size()) //
 		);
 		return outputBytes.toArray();
 	}

 	/*
 	 * NOTE: This is a pretty straight translation of the original Python code. The source contains several oddities that only exist, because
 	 * the Python language is as it is.
 	 * The source should also not be "beautified", as it should be easy to compare with the original.
 	 */

 	static Path boot_path;
 	static {
 		try {
 			boot_path = get(Tinycrunch_1_2.class.getResource("resources/tc_boot.prg").toURI());
 		} catch (final URISyntaxException e) {
 			throw new IllegalStateException(e);
 		}
 	}

 	static CDataChunk load_prg(final File fi) throws IOException {
 		final var data = ByteLists.mutable.of(Files.readAllBytes(fi.toPath()));
 		final var addr = data.get(0) + data.get(1) << 8;
 		return new CDataChunk(addr, subList(data, 2, data.size()));
 	}

 	static void save_prg(final File fo, final CDataChunk data_chunk) throws IOException {
 		final var data = ByteLists.mutable.of((byte) (data_chunk.addr & 255), (byte) (data_chunk.addr >>> 8));
 		data.addAll(data_chunk.data);
 		Files.write(fo.toPath(), data.toArray(), WRITE, CREATE);
 	}

 	private static class CDataChunk {
 		int addr;

 		MutableByteList data;

 		String se; // source extent description. Doubles as a marker that this data has been compressed

 		public CDataChunk(final int addr, final MutableByteList data) {
 			this(addr, data, null);
 		}

 		public CDataChunk(final int addr, final MutableByteList data, final String se) {
 			this.addr = addr;
 			this.data = data;
 			this.se = se;
 		}

 		static CDataChunk ending_at(final int ea, final MutableByteList data, final String se) {
 			return new CDataChunk(ea - data.size(), data, se);
 		}

 		int end_addr() {
 			return addr + data.size();
 		}

 		String ext() {
 			return String.format("0x%04X-0x%04X", addr, end_addr());
 		}

 		CDataChunk[] split_at(final int addr) {
 			assert this.addr <= addr : "split address lower than start address";
 			assert addr <= end_addr() : "split address higher than end address";
 			final var sp = addr - this.addr;
 			final var lower = new CDataChunk(addr, subList(data, 0, sp));
 			final var higher = new CDataChunk(addr, subList(data, sp, data.size()));
 			return new CDataChunk[] { lower, higher };
 		}

 		void extend(final CDataChunk addend) {
 			assert end_addr() == addend.addr : "extension start address does not match end address";
 			data.addAll(addend.data);
 		}
 	}

 	private static class OptToken {
 		float cost;

 		ByteList data;

 		Integer next;

 		int length;

 		int offset;

 		public OptToken(final float cost, final ByteList data, final Integer next, final int length, final int offset) {
 			this.cost = cost;
 			this.data = data;
 			this.next = next;
 			this.length = length;
 			this.offset = offset;
 		}

 		@Override
 		public String toString() {
 			String dr;
 			if (data.size() < 5) {
 				dr = data.collect(Utils::toHexString).makeString(" ");
 			} else {
 				dr = data.size() + " bytes";
 			}
 			return String.format("OptToken(%d,[%s],%s,%d,%d)", cost, dr, next, length, offset);
 		}
 	}

 	private static class CCruncher {
 		int longest_copy = 17;

 		int max_pair_offset = 63;

 		int max_offset = 2048;

 		int longest_literal = 64;

 		boolean greedy;

 		Stats stats;

 		private boolean inPlace;

 		private MutableByteList output_bytes;

 		private CDataChunk input_chunk;

 		private MutableByteList data;

 		private MutableByteList remainder;

 		private MutableByteList literals;

 		public CCruncher(final boolean greedy) {
 			this.greedy = greedy;
 			reset_stats();
 		}

 		void reset_stats() {
 			stats = new Stats();
 		}

 		MutableByteList crunch(final CDataChunk input_chunk, final boolean inPlace) {
 			this.inPlace = inPlace;
 			output_bytes = ByteLists.mutable.of((byte) 0, (byte) 0, (byte) 0); // leave space for target address, and final byte
 			this.input_chunk = input_chunk;
 			final var split = input_chunk.split_at(max(0, input_chunk.data.size() - 1));
 			data = split[0].data; // everything except the last byte ( input_chunk.data[:-1] )
 			remainder = split[1].data; // last item in the array ( input_chunk.data[-1:] )
 			if (data.size() == 0) { return output_token_list(new ArrayList<OptToken>()); }
 			if (greedy) { return crunch_greedy(); }
 			return crunch_optimal();
 		}

 		/** "10xxxxxx" */
 		ByteList encode_literal(final ByteList literal) {
 			assert 0 < literal.size() && literal.size() <= 64 : "literal size out of bounds 0 < size <= 64, size: " + literal.size();
 			final byte count = (byte) (128 + literal.size() - 1);
 			final var encoding = ByteLists.mutable.of(count);
 			encoding.addAll(literal);
 			return encoding;
 		}

 		/**
 		 * "11oooooo" offsets &lt; 64 <br>
 		 * "0aaaaabb bbbbbbbb" bigger offsets
 		 */
 		ByteList encode_copy(final int length, int offset) {
 			assert 1 < length && length <= longest_copy : "copy sequence out of bounds, 1 < lenth < " + longest_copy + ", length: " + length;
 			if (length == 2 && offset <= max_pair_offset) {
 				// "11oooooo"
 				assert offset < 64 : "offset >= 64, offset: " + offset;
 				final byte count = (byte) (0xc0 + offset ^ 63); // ie, offset^0x7f
 				return ByteLists.mutable.of(count);
 			}
 			// "0aaaaabb bbbbbbbb"
 			assert 0 < offset && offset < 2048 : "offset out of bounds : 0 < offset < 2048, offset: " + offset;
 			// bits 1.5.10
 			// assert(2<= length<32)
 			offset -= 1;
 			final byte hi = (byte) (0x00 + ((offset & 0x700) >>> 8 ^ 7) + (length - 2 << 3));
 			final byte lo = (byte) (offset & 0xff ^ 0xff);
 			return ByteLists.mutable.of(hi, lo);
 		}

 		/** just finds for any point the string that extends the furthest */
 		private MutableByteList crunch_greedy() {
 			final var longestCopy = longest_copy;
 			final var maxOffset = max_offset;
 			final var tokens = new ArrayList<OptToken>();

 			literals = ByteLists.mutable.empty();

 			final Runnable tokenize_literal_if_present = () -> {
 				if (literals.size() > 0) {
 					tokens.add(new OptToken(0, encode_literal(literals), 0, literals.size(), 0));
 					literals = ByteLists.mutable.empty();
 				}
 			};
 			final var data = this.data;
 			final var l = data.size();
 			var nj = 0;
 			/*
 			 * last_seen = defaultdict(lambda: -(2 ** 17))
 			 * The argument of a defaultdict (in this case"lambda: -(1**17)")
 			 * will be called when you try to access a key that doesn't exist
 			 * and the return value of it will be set as the new value of this key.
 			 * In Java we do that by using another function than get,
 			 * e.g. computeIfAbsent in java.util or getIfAbsentPut in Eclipse collections
 			 */
 			final var last_seen = new ObjectIntHashMap<ByteList>(); // address key was last seen starting at
 			// kl = [tuple(data[j:j + i]) for i in range(longestCopy + 1) if j + i <= l]
 			// data = [1,2,3]
 			// kl = [(), (1,), (1,2), (1,2,3)]
 			// kl = [(), (2,), (2, 3)]
 			// kl = [(), (3,)]
 			for (int j = 0; j < l; ++j) {
 				final var kl = new ArrayList<ByteList>();
 				for (int i = 0; i < longestCopy + 1 && j + i <= l; ++i) {
 					kl.add(subList(data, j, j + i));
 				}
 				if (j == nj) {
 					var length = kl.size() - 1;
 					var noBreak = true;
 					while (length > 1) {
 						final int offset = j - last_seen.getIfAbsentPut(kl.get(length), -(1 << 17));
 						if (offset < maxOffset) {
 							final var token = encode_copy(length, offset);
 							if (token.get(0) != 0) {
 								out.println(token.collect(b -> Utils.toHexString(b)).makeString());
 								tokenize_literal_if_present.run();
 								assert offset > 0 : "offset expected to be > 0, offset: " + offset;
 								tokens.add(new OptToken(0, token, 0, length, offset));
 								nj = j + length;
 								noBreak = false;
 								break;
 							}
 						}
 						length -= 1;
 					}
 					if (noBreak) {
 						literals.add(data.get(j));
 						if (literals.size() == longest_literal) {
 							tokenize_literal_if_present.run();
 						}
 						nj = j + 1;
 					}
 				}
 				final var jj = j;
 				kl.subList(1, kl.size()).forEach(k -> last_seen.put(k, jj));
 			}
 			tokenize_literal_if_present.run();
 			return output_token_list(tokens);
 		}

 		private MutableByteList crunch_optimal() {
 			final var longestCopy = longest_copy;
 			final var maxPairOffset = max_pair_offset;
 			final var longestLiteral = longest_literal;
 			final var data = this.data;
 			final var l = data.size();

 			final var last_seen = new ObjectIntHashMap<ByteList>(); // address key was last seen starting at
 			/*
 			 * cfile[j] contains the tail of a list that in turn contains the
 			 * cheapest representation of the first j bytes of the file
 			 * data containst the bytes that must be added to the stream to
 			 * cover the bytes between that token and its predecessor
 			 */
 			final var cfile = new ArrayList<>(List.of(new OptToken(0, null, null, 0, 0)));
 			OptToken best = null;
 			for (int j = 1; j < l + 1; ++j) {
 				final var copy_candidates = new ArrayList<ByteList>();
 				for (int i = 2; i < longestCopy + 1 && j - i >= 0; ++i) {
 					copy_candidates.add(subList(data, j - i, j));
 				}
 				best = new OptToken(POSITIVE_INFINITY, null, null, 0, 0);
 				for (final var k : copy_candidates) {
 					final var mra = last_seen.getIfAbsentPut(k, -(1 << 17));
 					final var length = k.size();
 					final var start_addr = j - length;
 					assert length > 1 : "length must be > 1, length: " + length;
 					final var offset = j - mra;
 					if (offset < max_offset) {
 						final var nb = length > 2 || offset > maxPairOffset ? 2.012f : 1.01f;
 						final var cost = cfile.get(start_addr).cost + nb;
 						if (cost < best.cost) {
 							final var token = encode_copy(length, offset);
 							if (token.get(0) != 0) {
 								best = new OptToken(cost, token, start_addr, length, offset);
 								assert mra - length < j : "Expected mra-length < j. mra: " + mra + " - length: " + length //
 										+ " = " + (mra - length) + ", j; " + j;
 							}
 						}
 					}
 				}
 				for (int length = 1; length < longestLiteral + 1; ++length) {
 					final var start_addr = j - length;
 					if (start_addr >= 0) {
 						final var cost = cfile.get(start_addr).cost + length + 1.01f;
 						if (cost < best.cost) {
 							final var literal = subList(data, start_addr, j);
 							best = new OptToken(cost, encode_literal(literal), start_addr, literal.size(), 0);
 							assert best.data.size() == length + 1 : "expected data.size()== length+1, data.size(): " + data.size() + ", length+1: " + (length
 									+ 1);
 							assert start_addr < j : "expected startAddr < j, startAddr: " + start_addr + ", j: " + j;
 						}
 					}
 				}
 				cfile.add(best);
 				for (final var k : copy_candidates) {
 					last_seen.put(k, j);
 				}
 			}
 			assert best != null : "expected best != null";
 			final var tokens = new ArrayList<>(Arrays.asList(best));
 			while (best.next != 0) {
 				best = cfile.get(best.next);
 				tokens.add(best);
 			}
 			Collections.reverse(tokens);
 			return output_token_list(tokens);
 		}

 		private MutableByteList output_token_list(final ArrayList<OptToken> tokens) {
 			var j = tokens.size();
 			var watermark = j; // from here on, just store raw data
 			var raw_bytes_after_watermark = 0;
 			if (inPlace) {
 				// Scan the token list from the end back.
 				// Whenever compressed remainder is equal or longer in length to raw remainder,
 				// set that token to start of raw data.
 				var raw_bytes_after_tokenJ = 0;
 				var comp_bytes_after_token_j = 0;
 				raw_bytes_after_watermark = 0;
 				while (j > 0) {
 					j -= 1;
 					raw_bytes_after_tokenJ += tokens.get(j).length;
 					comp_bytes_after_token_j += tokens.get(j).data.size();
 					if (raw_bytes_after_tokenJ <= comp_bytes_after_token_j) {
 						watermark = j;
 						raw_bytes_after_watermark += raw_bytes_after_tokenJ;
 						raw_bytes_after_tokenJ = 0;
 						comp_bytes_after_token_j = 0;
 					}
 				}
 			}
 			for (final var t : tokens.subList(0, watermark)) {
 				stats.log_token(t);
 				output_bytes.addAll(t.data);
 			}
 			if (inPlace && raw_bytes_after_watermark > 0) {
 				final var newRemainder = subList(data, data.size() - raw_bytes_after_watermark, data.size());
 				newRemainder.addAll(remainder);
 				remainder = newRemainder;
 			}
 			if (remainder.size() > 1) {
 				stats.log_raw(remainder.size() - 1);
 			}
 			stats.log_header(3);
 			stats.log_move(1);
 			output_bytes.set(0, remainder.get(0));
 			output_bytes.set(1, (byte) ((input_chunk.addr - 1) % 256));
 			output_bytes.set(2, (byte) (input_chunk.addr - 1 >>> 8));
 			stats.log_terminator();
 			remainder.set(0, (byte) 0); // terminator for compressed data
 			output_bytes.addAll(remainder);
 			remainder = null;
 			return output_bytes;
 		}

 		void report() {
 			report(false);
 		}

 		void report(final boolean raw) {
 			stats.report(raw);
 		}
 	}

 	static class Stats {

 		static class StatCounter {
 			Object legend;

 			int ct, bi, bo;

 			StatCounter(final Object legend) {
 				this.legend = legend;
 				reset();
 			}

 			void reset() {
 				ct = 0;
 				bi = 0;
 				bo = 0;
 			}

 			void accBi(final int bi) {
 				accCIO100(1, bi, 0);
 			}

 			void accBo(final int bo) {
 				accCIO100(1, 0, bo);
 			}

 			void accBiBo(final int bi, final int bo) {
 				accCIO100(1, bi, bo);
 			}

 			void accBiCt(final int bi, final int ct) {
 				accCIO100(ct, bi, 0);
 			}

 			void accCIO100(final int ct, final int bi, final int bo) {
 				this.ct += ct;
 				this.bi += bi;
 				this.bo += bo;
 			}

 			int cost() {
 				return max(0, bo - bi);
 			}

 			int savings() {
 				return max(0, bi - bo);
 			}

 			void print(final String fs, final IntFunction<String> ent, final IntFunction<String> ifp) {
 				final var l_c = ct;
 				final var l_o = bo;
 				final var l_i = bi;
 				if (l_c > 0) {
 					out.println(String.format(fs, legend, l_c, ent.apply(l_c), ifp.apply(l_i), ifp.apply(l_o - l_i), ifp.apply(l_i - l_o), ifp.apply(l_o)));
 				}
 			}
 		}

 		enum StatId {
 			pair, copy, literal, header, move, gap, boot, raw, end, load, save;
 		}

 		String rs;

 		Map<StatId, StatCounter> counts;

 		MutableIntList offsets;

 		MutableIntList litlens;

 		Stats() {
 			rs = "";
 			counts = Map.ofEntries(//
 					entry(pair, new StatCounter("2 byte copies")), //
 					entry(copy, new StatCounter("n byte copies")), //
 					entry(literal, new StatCounter("literal strings")), //
 					entry(header, new StatCounter("segment headers")), //
 					entry(move, new StatCounter("moved to header")), //
 					entry(gap, new StatCounter("gaps")), //
 					entry(boot, new StatCounter("boot")), //
 					entry(raw, new StatCounter("uncompressed")), //
 					entry(end, new StatCounter("terminators")), //
 					entry(load, new StatCounter("load address")), //
 					entry(save, new StatCounter("save address")) //
 			);
 			offsets = IntLists.mutable.empty();
 			litlens = IntLists.mutable.empty();
 		}

 		void log_token(final OptToken t) {
 			final var dl = t.data.size();
 			if (t.offset == 0) {
 				log_literal(t.length, dl);
 			} else if (dl == 1) {
 				log_pair(t.offset, dl);
 			} else {
 				log_copy(t.length, t.offset, dl);
 			}
 		}

 		private void log_pair(final int offset, final int dl) {
 			offsets.add(offset);
 			rs += '1';
 			counts.get(pair).accBiBo(2, dl);
 			assert dl == 1;
 		}

 		private void log_copy(final int length, final int offset, final int dl) {
 			offsets.add(offset);
 			rs += '@';
 			counts.get(copy).accBiBo(length, dl);
 			assert dl == 2;
 		}

 		private void log_literal(final int length, final int dl) {
 			litlens.add(length);
 			rs += '.';
 			assert dl == 1 + length;
 			counts.get(literal).accBiBo(length, dl);
 		}

 		void log_boot(final int length) {
 			counts.get(boot).accBo(length);
 		}

 		void log_gap(final int length) {
 			counts.get(gap).accBo(length);
 		}

 		void log_header(final int length) {
 			counts.get(header).accBo(length);
 		}

 		void log_move(final int length) {
 			counts.get(move).accBi(length);
 		}

 		void log_raw(final int length) {
 			counts.get(raw).accBiBo(length, length);
 		}

 		void log_terminator() {
 			counts.get(end).accBo(1);
 		}

 		void log_load_addr() {
 			counts.get(load).accBiCt(2, 1);
 		}

 		void log_save_addr() {
 			counts.get(save).accBo(2);
 		}

 		void report() {
 			report(false);
 		}

 		void report(final boolean raw_) {
 			final var g1 = new StatId[] { copy, pair, literal, end };
 			final var g2 = new StatId[] { move, load, save, boot, header, gap, raw };
 			if (raw_) {
 				stream(g2).forEach(k -> counts.get(k).reset());
 			}
 			final var symcount = stream(g1).mapToInt(k -> counts.get(k).ct).sum();
 			final var vi = counts.values();

 			final var s_c = vi.stream().mapToInt(c -> c.ct).sum();
 			final var s_i = vi.stream().mapToInt(c -> c.bi).sum();
 			final var cost = (float) vi.stream().mapToDouble(c -> c.cost()).sum();
 			final var savings = vi.stream().mapToInt(c -> c.savings()).sum();
 			final var s_o = vi.stream().mapToInt(c -> c.bo).sum();
 			assert round(s_i + cost - savings) == s_o : "input + cost - savings doesn't equal output"; // need to use round, for floating point inaccuracies

 			final IntFunction<String> ent = x -> x <= 0 ? " n/a" : "" + format("%7.3f", log((x + 1e-20) / (symcount + 1e-20)) / log(0.5));
 			final IntFunction<String> noent = x -> "";
 			final IntFunction<String> ifp = x -> x <= 0 ? "" : "" + x;

 			final var hr = "+-----------------+------------------+----------------------------------+";
 			final var fs = "|%16s | %7s %7s  |  %7s %7s %7s %7s |";
 			out.println();
 			out.println(hr);
 			out.println(format(fs, "", "count", "entropy", "input", "cost", "savIngs", "output"));
 			out.println(hr);
 			stream(g1).map(k -> counts.get(k)).forEach(c -> c.print(fs, ent, ifp));
 			out.println(hr);
 			if (!raw_) {
 				stream(g2).map(k -> counts.get(k)).forEach(c -> c.print(fs, noent, ifp));
 			}
 			out.println(format(fs, "total", s_c, "", s_i, round(cost), savings, s_o));
 			out.println(hr);

 			offsets.sortThis();
 			if (!offsets.isEmpty()) {
 				out.println(format("median, maximum offset used = % d, % d", offsets.get(floorDiv(offsets.size(), 2)), offsets.get(offsets.size() - 1)));
 			}
 			litlens.sortThis();
 			if (!litlens.isEmpty()) {
 				// in the original the last arg for format is "self.litlens[-5:]", an array of one element, while the formatting "{:}" does not suggest an
 				// array, but it takes them
 				out.println(format("median, maximum litlen used = % d, %s", litlens.get(floorDiv(litlens.size(), 2)), subList(litlens, max(0, litlens.size()
 						- 5), litlens.size())));
 			}
 			out.println();
 		}
 	}

 	static byte hi(final int x) {
 		return (byte) (x >>> 8 & 0xff);
 	}

 	static byte lo(final int x) {
 		return (byte) (x & 0xff);
 	}

 	static Namespace parse_args(final String[] args) throws ArgumentParserException {

 		final ArgumentType<Integer> hex = (final ArgumentParser parser, final Argument arg, final String value) -> parseInt(value, 16);

 		final ArgumentParser parser = ArgumentParsers.newFor("Java tinycrunch").build()//
 				.version("${prog} 1.2") //
 				.defaultHelp(true) //
 				.description(""); //
 		parser.addArgument("-V", "--version").action(version());
 		parser.addArgument("infile").type(fileType().verifyCanRead().verifyIsFile()).help("(.prg file unless -r is used)");
 		// parser.addArgument("outfile").type(fileType().verifyCanWrite().or().verifyCanCreate()).help("(.prg file unless -r is used");
 		parser.addArgument("outfile").type(fileType()).help("(.prg file unless -r is used)");

 		final var group = parser.addMutuallyExclusiveGroup().required(true);
 		group.addArgument("-s", "--startAddress").dest("startAddr").help("start address").type(hex).setDefault((Integer) null);
 		group.addArgument("-e", "--endAddress").dest("endAddr").help("end address").type(hex).setDefault((Integer) null);
 		group.addArgument("-i", "--inPlace").dest("inPlace").help("compress to end of destination area").action(storeTrue());
 		group.addArgument("-c", "--selfExtracting").action(storeTrue());
 		group.addArgument("-r", "--raw").action(storeTrue()).help("read/write .bin files, no header. cf readme.txt");

 		parser.addArgument("-j", "--jmp").dest("execAddr").help("execution address for self extracting .prg (requires -x)").setDefault("0x080d").type(hex);
 		parser.addArgument("-p", "--paramFile").type(fileType().verifyCanWrite().or().verifyCanCreate()).setDefault((Object) null).help(
 				"generated asm include file containing a define for the output start address");
 		parser.addArgument("-v", "--verbose").action(storeTrue());
 		parser.addArgument("-f", "--fast").action(storeTrue()).help("faster (greedy) compression (default is optimal size)");
 		return parser.parseArgs(args);
 	}

 	static CDataChunk level_crunch(final Namespace args, final CCruncher op, final CDataChunk input_chunk) {
 		final var output_bytes = op.crunch(input_chunk, args.getBoolean("inPlace"));
 		int la;
 		if (args.getBoolean("inPlace")) {
 			la = input_chunk.end_addr() - output_bytes.size();
 		} else if (args.getInt("endAddr") != null) {
 			la = args.getInt("endAddr") - output_bytes.size();
 		} else {
 			la = args.getInt("startAddr");
 		}
 		final var output_chunk = new CDataChunk(la, output_bytes);
 		if (args.get("paramFile") != null) {
 			try {
 				final Path path = args.<File> get("paramFile").toPath();
 				Files.writeString(path, format("dcSrc=$%04X", output_chunk.addr) + args.<File> get("paramFile"));
 			} catch (final Exception e) {
 				e.printStackTrace();
 			}
 		}
 		return output_chunk;
 	}

 	static MutableByteList raw_crunch(final Namespace args, final CCruncher op, final MutableByteList input_data) {
 		assert !args.getBoolean("inPlace") : "--inPlace makes no sense for raw mode";
 		assert args.get("paramFile") == null : "cannot generate paramFile for raw mode";
 		final var id = ByteLists.mutable.ofAll(input_data);
 		id.add((byte) 0); // add fake load address, and dummy byte-to-move-to-header
 		final var input_chunk = new CDataChunk(0, id); // fake load address = 0
 		final var output_bytes = op.crunch(input_chunk, false);
 		return subList(output_bytes, 3, output_bytes.size()); // drop header
 	}

 	/**
 	 * compresses the input file in two segments.
 	 * The first contains a compressed copy of the data
 	 * to be decrunched to the area after the loaded file
 	 * The second contains the remaining data,
 	 * compressed in place.<br>
 	 * <br>
 	 * It takes an iteration or three to find the optimal split point
 	 * 
 	 * @throws IOException
 	 */
 	static CDataChunk sfx_crunch(final Namespace args, final CCruncher op, final CDataChunk input_chunk) throws IOException {
 		final var __ = new Object() {
 			void dprint(final String... prargs) {
 				if (args.getBoolean("verbose")) {
 					stream(prargs).forEach(out::println);
 					if (prargs.length == 0) {
 						out.println();
 					}
 				}
 			}

 			void disp_chunks(final List<CDataChunk> chunks) {
 				chunks.forEach(chunk -> {
 					if (chunk.se == null) {
 						dprint(format("data segment at %s, uncompressed", chunk.ext()));
 					} else {
 						dprint(format("data segment at %s, decrunches to %s", chunk.ext(), chunk.se));
 					}
 				});
 			}
 		};

 		__.dprint();
 		__.dprint(String.format(input_chunk.ext()));

 		final var boot_chunk = load_prg(boot_path.toFile());
 		final var oCo = boot_chunk.split_at(boot_chunk.end_addr() - 3);
 		final var output_chunk = oCo[0];
 		final var offsets = oCo[1];

 		final var patch_offsets = offsets.data;
 		final var oStart = patch_offsets.get(2);

 		__.dprint(format("        boot at %s", output_chunk.ext()));

 		final var data_start = output_chunk.end_addr();
 		final var monolith = new CDataChunk(data_start, op.crunch(input_chunk, false), input_chunk.ext());
 		final var monolith_stats = op.stats;

 		__.dprint(format("    monolith at %s", monolith.ext()));

 		List<CDataChunk> chunks;
 		if (input_chunk.addr >= monolith.end_addr() || input_chunk.end_addr() <= monolith.addr) {
 			__.dprint("(doesn't overlap output, using as is)");
 			chunks = List.of(monolith); // this is safe because it doesn't overlap the input chunk
 		} else {
 			var split_point = min(monolith.end_addr() + 12, input_chunk.end_addr() - 1); // assume compression is slightly worse
 			var max_gap = floorDiv(input_chunk.data.size(), 2000); // try for 0.05% bytes wasted between output segments
 			while (true) {
 				op.reset_stats();
 				if (split_point >= input_chunk.end_addr()) {
 					__.dprint(format("\nnew split point of 0x%04x is past end of input.", split_point));
 					split_point = data_start;
 					if (input_chunk.end_addr() < data_start) {

 						final var lCuC = input_chunk.split_at(split_point);
 						final var lower_chunk = lCuC[0];
 						final var upper_chunk = lCuC[1];
 						chunks = List.of( //
 								upper_chunk, //
 								new CDataChunk(input_chunk.end_addr(), op.crunch(lower_chunk, false), lower_chunk.ext()) //
 						);
 					} else {
 						__.dprint("Just storing raw input file after header");
 						chunks = List.of( //
 								input_chunk//
 						);
 					}
 					__.disp_chunks(chunks);
 					break;
 				}
 				final var lCuC = input_chunk.split_at(split_point);
 				final var lower_chunk = lCuC[0];
 				final var upper_chunk = lCuC[1];
 				chunks = List.of( //
 						new CDataChunk(data_start, op.crunch(upper_chunk, false), upper_chunk.ext()), //
 						CDataChunk.ending_at(split_point, op.crunch(lower_chunk, true), lower_chunk.ext()) // in place => safe
 				);
 				final var gap = chunks.get(1).addr - chunks.get(0).end_addr();
 				if (gap < 0) {
 					var adjustment = -gap;
 					adjustment -= floorDiv(adjustment, 5); // reduce larger steps a little
 					__.disp_chunks(chunks);
 					__.dprint(format("segment overlap = %d", -gap));
 					__.dprint(format("shifting split up by %d bytes and recrunching", adjustment));
 					split_point += adjustment;
 					continue;
 				}
 				__.disp_chunks(chunks);
 				__.dprint(format("segment gap = %d (max=%d)", gap, max_gap));
 				if (gap > max_gap) {
 					final var adjustment = gap - floorDiv(gap, 4);
 					__.dprint(format("shifting split down by %d bytes and recrunching", adjustment));
 					split_point -= adjustment;
 					max_gap += 1 + max_gap; // increase tolerance to escape oscillation.
 				} else {
 					/*
 					 * ok. At this point,
 					 * gap >= 0
 					 * chunks[1].addr-chunks[0].end_addr() >= 0
 					 * chunks[1].addr >= chunks[0].end_addr()
 					 * chunks[1].end_addr() >= chunks[0].end_addr() (as c1.end_addr>=c1.addr)
 					 * split_point >= chunks[0].end_addr()
 					 * upper_chunk.addr >= chunks[0].end_addr()
 					 * therefore, chunk 0 is safe.
 					 */
 					__.dprint(gap > 0 ? "close enough." : "perfect.");
 					break;
 				}
 			}
 		}
 		op.stats.log_boot(output_chunk.data.size());
 		for (final ByteObjectPair<CDataChunk> chunkOffset : patch_offsets.zip(chunks.subList(0, 2))) {
 			final var offset = chunkOffset.getOne();
 			final var chunk = chunkOffset.getTwo();
 			final var gap = chunk.addr - output_chunk.end_addr();
 			if (gap > 0) {
 				op.stats.log_gap(gap);
 				output_chunk.data.addAll(ByteArrayList.newWithNValues(gap, (byte) 0xff));
 			}
 			if (chunk.se != null) {
 				output_chunk.data.set(offset + 1, lo(chunk.addr));
 				output_chunk.data.set(offset + 3, lo(chunk.addr));
 				output_chunk.data.set(offset + 4, (byte) 0x20); // replace LDA decrunch with JSR decrunch
 			} else {
 				op.stats.log_raw(chunk.data.size());
 			}
 			output_chunk.extend(chunk);
 		}
 		final var exec_addr = args.getInt("execAddr");
 		output_chunk.data.set(oStart + 1, lo(exec_addr));
 		output_chunk.data.set(oStart + 2, hi(exec_addr));

 		return output_chunk;
 	}

 	public static void main(final String[] argsv) throws ArgumentParserException, IOException {
 		try {
 			final var args = parse_args(argsv);
 			final var op = new CCruncher(args.getBoolean("fast"));
 			if (args.getBoolean("raw")) {
 				if (args.<File> get("infile").getName().endsWith(".prg")) {
 					err.println("warning, input file will be parsed as a .bin");
 				}
 				if (args.<File> get("outfile").getName().endsWith(".prg")) {
 					err.println("warning, output file will be written as a .bin");
 				}
 				final var input_data = ByteLists.mutable.of(Files.readAllBytes(args.<File> get("infile").toPath()));
 				final var original_length = input_data.size();
 				out.println(format("%05d bytes read from %s", //
 						original_length, args.<File> get("infile").getName()));
 				final var output_bytes = raw_crunch(args, op, input_data);
 				if (args.getBoolean("verbose")) {
 					op.report(true);
 				}
 				Files.write(args.<File> get("outfile").toPath(), output_bytes.toArray(), WRITE, CREATE);
 				final var compressed_length = output_bytes.size();
 				out.println(format("% 5d bytes written to %s (%4.1f%% of original size)", //
 						compressed_length, args.<File> get("outfile").getName(), //
 						compressed_length * 100.0 / original_length) //
 				);
 			} else {
 				if (args.<File> get("infile").getName().endsWith(".bin")) {
 					err.println("warning, input file will be parsed as a .prg");
 				}
 				if (args.<File> get("outfile").getName().endsWith(".bin")) {
 					err.println("warning, output file will be written as a .prg");
 				}
 				final var input_chunk = load_prg(args.<File> get("infile"));
 				final var original_length = input_chunk.data.size() + 2;
 				out.println(format("%s: %05d bytes read from %s", //
 						input_chunk.ext(), original_length, args.<File> get("infile").getName()));
 				if (input_chunk.data.size() < 1) {
 					err.println("Input file can't be empty");
 					exit(1);
 				}
 				CDataChunk output_chunk;
 				if (args.getBoolean("selfExtracting")) {
 					if (input_chunk.addr < 0x0200) {
 						out.println(input_chunk.addr);
 						err.println("Destination addresses below 0x0200 not supported by -x");
 						exit(1);
 					}
 					output_chunk = sfx_crunch(args, op, input_chunk);
 				} else {
 					output_chunk = level_crunch(args, op, input_chunk);
 				}
 				op.stats.log_load_addr();
 				op.stats.log_save_addr();
 				if (args.getBoolean("verbose")) {
 					op.report();
 				}
 				save_prg(args.<File> get("outfile"), output_chunk);
 				final var compressed_length = output_chunk.data.size() + 2;
 				out.print(format("%s: %05d bytes written to %s (%04.1f%% of original size)", //
 						output_chunk.ext(), compressed_length, args.<File> get("outfile").getName(), //
 						compressed_length * 100.0 / original_length));
 			}
 		} catch (final ArgumentParserException e) {
 			e.getParser().printHelp();
 		}
 	}


 }
diff --git a/Utils.java b/Utils.java
 package de.plush.brix.c64graphics.core.util;

 public class Utils {
  // ...   
 	public static String toHexString(final byte b) {
 		return format("%2s", Integer.toHexString(Byte.toUnsignedInt(b))).replace(' ', '0');
 	}
 	
 	/** In Eclipse Collections 10.4.0 {@link ByteArrayList#subList(int, int)} is still not implemented. */
 	public static MutableByteList subList(final MutableByteList in, final int from, final int to) {
 		return ByteLists.mutable.of(copyOfRange(in.toArray(), from, to));
 	}

 	/** In Eclipse Collections 10.4.0 {@link ByteArrayList#subList(int, int)} is still not implemented. */
 	public static MutableIntList subList(final MutableIntList in, final int from, final int to) {
 		return IntLists.mutable.of(copyOfRange(in.toArray(), from, to));
 	}
 }
	package de.plush.brix.c64graphics.core.util;

	public class Utils {
	// ...
	public static String toHexString(final byte b) {
	return format("%2s", Integer.toHexString(Byte.toUnsignedInt(b))).replace(' ', '0');
	}

	/** In Eclipse Collections 10.4.0 {@link ByteArrayList#subList(int, int)} is still not implemented. */
	public static MutableByteList subList(final MutableByteList in, final int from, final int to) {
	return ByteLists.mutable.of(copyOfRange(in.toArray(), from, to));
	}

	/** In Eclipse Collections 10.4.0 {@link ByteArrayList#subList(int, int)} is still not implemented. */
	public static MutableIntList subList(final MutableIntList in, final int from, final int to) {
	return IntLists.mutable.of(copyOfRange(in.toArray(), from, to));
	}
	}