mossheim · February 22, 2017 15:36
diff --git a/sclangTestsImplementation.sc b/sclangTestsImplementation.sc
 /* Untitled.sc (TODO: Actual name for class)
 *
 * Tests for the SuperCollider 3 lexer, parser, compiler
 * Original author: Brian Heim 2017-02-19
 * This code is licensed under the GPLv3
 *
 * Note: performance really matters for some of these things. I
 * did a lot of intense benchmarking and came up with low-level
 * functions and loops that are blazingly fast for SC. I left in
 * my old code in some cases so you can benchmark yourself if
 * you're so inclined. Also, there is the validation code I've
 * left commented under some functions, try it out. - Brian
 */
 StubTestLexerParserCompiler : UnitTest {
 	const compileErrorString = "!cErr";
 	const runtimeErrorString = "!rErr";
 	const validatedOutputFilenameSuffix = "_expected";
 	const maxline = 1024; // may need to update this, some strings like `0!99` can produce very long output

 	// TODO: (possible) add option for non-hex-string output (maybe you're just actually doing normal looking code sometimes)

 	// tests all possible combinations-with-replacement of members of a given alphabet,
 	// writes the results to a file, and checks it against validated results.
 	// The client may also specify an optional prefix and/or suffix to be added to each
 	// string before testing. Only strings of length `len` are tested.
 	*testAllPossibleStrings {
 		arg alphabet, // Array of strings to test
 		len, // String length
 		prefix = "", suffix = "",
 		testID = "", // unique filename ID
 		technique = \compile, // best
 		deleteOnFinish = true, // technique to use for testing
 		compressRepeatResults = true;

 		var alphabetSize = alphabet.size;
 		var filename, file;
 		var header; // intro content to be written to the file
 		var diffs; // holds a list (empty under sucessful execution) of differences between expected and actual output
 		var testString, fullTestString; // holds the string that will be tested with testOneString
 		var prevResult, repeatedResultCount = 0; // hold the previous result for compressing repeat results
 		var counter = 0!len; // [0, 0, ..., 0] which controls indexing into the alphabet to generate a string
 		var dataLine; // will hold the result stored to the file
 		var encoding;

 		// sort the alphabet to start
 		alphabet = alphabet.sort;

 		postln("testAllPossibleStrings: Generating header");
 		header = this.formatHeader(
 			alphabet,
 			prefix,
 			suffix,
 			technique,
 			getEncodingString(technique),
 			alphabetSize ** len // stringCount
 		);

 		postln("testAllPossibleStrings: Creating file");
 		if(File.exists(filename)) {
 			Error("testAllPossibleStrings: File % already exists: "
 				"please delete before continuing".format(filename.quote)).throw;
 		};

 		file = File.new(filename, "w");
 		if(file.isOpen.not) {
 			Error("testAllPossibleStrings: Failed to open file").throw;
 		};

 		protect {
 			postln("testAllPossibleStrings: Writing header");
 			file.write(header);

 			postln("testAllPossibleStrings: Writing data");

 			// we don't have your fancy do-whiles down in these parts
 			// TODO: benchmark to see if using a flag in the while loop is /really/ so bad
 			// TODO: benchmark to see if there's a better way than reduce
 			// TODO: refactor this for clarity
 			testString = alphabet[counter].reduce('++')?""; // reduce on an empty array returns nil :(
 			fullTestString = prefix ++ testString ++ suffix;
 			dataLine = this.testOneString(fullTestString, technique);
 			file.write((this.stringToHexString(testString) ++ "\t" ++ dataLine));
 			prevResult = dataLine;

 			while {this.incrementAlphabetCount(counter, len, alphabetSize)} {
 				testString = alphabet[counter].reduce('++')?"";
 				fullTestString = prefix ++ testString ++ suffix;
 				dataLine = this.testOneString(fullTestString, technique);

 				if(compressRepeatResults) {
 					if(dataLine == prevResult) {
 						// if we just saw this result, don't print it, but keep track of it
 						repeatedResultCount = repeatedResultCount + 1;
 					} {
 						if(repeatedResultCount > 0) {
 							// give a count if it's more than 0. 0 is the assumed default
 							file.putChar($\t);
 							file.write(repeatedResultCount.asString);
 							repeatedResultCount = 0;
 						};

 						file.putChar($\n);
 						file.write((this.stringToHexString(testString) ++ "\t" ++ dataLine));
 						prevResult = dataLine; // only update if it changed
 					}
 				} {
 					// just write the line normally
 					file.putChar($\n);
 					file.write((this.stringToHexString(testString) ++ "\t" ++ dataLine));
 				}

 			};

 			if(compressRepeatResults && (repeatedResultCount > 0)) {
 				// "flush the buffer"
 				file.putChar($\t);
 				file.write(repeatedResultCount.asString);
 			};

 			file.putChar($\n);
 		} {
 			file.close;

 			postln("testAllPossibleStrings: Validating against standard output");
 			//diffs = this.validate(filename);

 			if(deleteOnFinish) {
 				postln("testAllPossibleStrings: Deleting test file");
 				File.delete(filename);
 			}
 		};

 		postln("testAllPossibleStrings: Success!");

 		^diffs;
 	}

 	// TODO: add more encodings
 	getEncodingString {
 		arg technique;

 		^switch(technique,
 			\compile,
 			"<hex-format input string><tab>"
 			"<\"!cErr\" (compile error) | \"!rErr\" (runtime error) | hex-format output string>"
 			"<opt tab & class name of output>"
 			"<opt tab & count of consecutive repeats of this result (no number means no repetitions)>",
 			{Error("getEncodingString: unrecognized technique: %".format(technique.quote))}
 		);
 	}

 	// returns `false` if there is an overflow
 	// modifies the array in-place!
 	*incrementAlphabetCount {
 		arg ctr, len, n; // the counter array, string length, and alphabet size (n)

 		// I benchmarked this pretty hard, but there might be a better way!
 		// especially if you can avoid the repeated indexing into `ctr` - Brian
 		while {len > 0} {
 			len = len-1;
 			ctr[len] = ctr[len] + 1;
 			if(len == 0) {
 				"incrementAlphabetCount: at %\n".postf(ctr);
 			};
 			if(ctr[len] != n) {^true} {ctr[len] = 0};
 		};

 		^false;
 	}

 	*testOneString {
 		arg string, technique;
 		var r, rclass;

 		// string.postln;

 		// TODO: use this.clearAll (clears a,b,...,z)
 		// TODO: benchmark vs format

 		switch(technique,
 			\compile, {
 				r = string.compile;
 				if(r.isNil) {
 					// compile error
 					r = compileErrorString;
 				} {
 					// interpret using .value
 					try {
 						r = r.value;
 						rclass = r.class;
 						r = this.stringToHexString(r.asString)++"\t"++rclass.asString;
 					} {
 						r = runtimeErrorString;
 					}
 				}
 			},
 			\bytecode, {
 				r = string.compile;
 				if(r.isNil) {
 					// compile error
 					r = compileErrorString;
 				} {
 					r = this.bytecodeToHexString(r.def.code);
 				}
 			}, {
 				Error("testOneString: invalid technique option: %".format(technique)).throw;
 			}
 		);

 		^r;
 		// TODO: benchmark w/ format and switching around .asString

 		// TODO: write faster methods for when prefix/suffix/both are empty
 	}

 	// TODO: find better names than afile and efile
 	*validate {
 		arg filename;

 		var validated = filename ++ validatedOutputFilenameSuffix;

 		var efile, afile; // expected file, actual file
 		var diffs; // array of pairs of lines that are not identical

 		File.exists(validated).not.if {
 			Error("validate: couldn't find file for validation: %".format(validated.quote)).throw;
 		};
 		File.exists(filename).not.if {
 			Error("validate: couldn't find file for validation: %".format(filename.quote)).throw;
 		};

 		efile = File(validated, "r");
 		afile = File(filename, "r");

 		if(efile.isOpen.not) {
 			Error("validate: failed to open validation file: %".format(validated.quote)).throw;
 		};
 		if(afile.isOpen.not) {
 			Error("validate: failed to open validation file: %".format(filename.quote)).throw;
 		};

 		protect {
 			var eheader, aheader, keySet, areAlphabetsEqual;

 			eheader = this.parseHeader(this.readHeader(efile));
 			aheader = this.parseHeader(this.readHeader(afile));

 			// compare the headers, warn if there are issues
 			keySet = union(aheader.keys, eheader.keys);
 			keySet.do {
 				arg key;
 				if(aheader[key] != eheader[key]) {
 					"validate: headers differ on key %\n"
 					"\texpected header has: %\n"
 					"\tactual header has: %".format(
 						key, aheader[key], eheader[key]
 					).warn;
 				}
 			};

 			areAlphabetsEqual = aheader[\alphabet] == eheader[\alphabet];

 			diffs = areAlphabetsEqual.if {
 				"validate: alphabets are equal, using fast method".postln;
 				validateDataFast(afile, efile);
 			} {
 				"validate: alphabets are unequal, using slow method".postln;
 				validateDataSlow(afile, efile, aheader[\alphabet], eheader[\alphabet]);
 			};



 			// TODO: read data lines
 			// parse into input + output
 			// compare
 			// record
 			// also, handle state for unrolling repeated-line notation

 		} {
 			afile.close;
 			efile.close;
 		}

 		// TODO: benchmark with Pair vs Array

 		^diffs;
 	}

 	// TODO
 	// TODO: this is only for .compile -- write one for bytecode!
 	*validateDataSlow {
 		arg afile, efile, aset, eset;
 		var asize = aset.size, esize = eset.size;
 		var actr = 0!asize, ectr = 0!esize;

 		var diffs = []; // can't use a dict because some entries may be nil




 	}

 	// TODO
 	// TODO: this is only for .compile -- write one for bytecode!
 	*validateDataFast {
 		arg afile, efile;

 		var aline, eline;
 		var areps = 0, ereps = 0;
 		var diffs = []; // TODO: benchmark against array/set/collection of pairs, dictionary/set/etc. (less important)

 		while {
 			if(areps > 0) {
 				aline = afile.getLine(this.maxline);
 				if(aline.size >= (this.maxline-1)) {
 					"validateDataFast: maxline characters read from actual data file: %".format(aline.quote).warn;
 				};
 			};
 			if(ereps > 0) {
 				eline = efile.getLine(this.maxline);
 				if(eline.size >= (this.maxline-1)) {
 					"validateDataFast: maxline characters read from expected data file: %".format(eline.quote).warn;
 				};
 			};

 			((areps > 0) || aline.notNil) &&
 			((ereps > 0) || eline.notNil);
 		} {
 			// either pull next line or decrement current line.
 			// only one of these counters can ever be decremented in a single pass.
 			if(areps == 0) {
 				aline = this.parseCompileDataLine(aline);
 				areps = aline[\reps];
 			} {
 				areps = areps - 1;
 			};

 			if(ereps == 0) {
 				eline = this.parseCompileDataLine(eline);
 				ereps = aline[\reps];
 			} {
 				ereps = ereps - 1;
 			};

 			if(
 				(aline[\output] != eline[\output]) ||
 				(aline[\outputClass] != eline[\outputClass])
 			) {
 				diffs = diffs.add(this.mkCompileDataDiff(aline, eline));
 			};

 			// only bother if there's work to be done
 			// TODO: benchmark w/o this check
 			if((areps > 0) && (ereps > 0)) {
 				var minReps = min(areps, ereps);
 				areps = areps - minReps;
 				ereps = ereps - minReps;
 			};
 		};

 		if(aline.notNil || eline.notNil) {
 			"validateDataFast: not all data has been exhausted: actual file has %, expected file has %".format(aline.quote, eline.quote).warn;
 		};

 		^diffs;
 	}

 	*parseCompileDataLine {
 		arg line;

 		var result = Dictionary[];

 		line = line.split($\t);

 		result[\string] = line[0];
 		result[\output] = line[1];
 		result[\hadError] = this.isErrorString(result[\output]);

 		if(result[\hadError]) {
 			result[\reps] = line[2]?0;
 		} {
 			result[\outputClass] = line[2];
 			result[\reps] = line[3]?0;
 		};

 		^result;
 	}

 	*mkCompileDataDiff {
 		arg aline, eline;

 		^[aline[\string], [aline[\output], aline[\outputClass]], [eline[\output], eline[\outputClass]]];
 	}

 	*isErrorString {
 		arg str;

 		^(str == compileErrorString) || (str == runtimeErrorString);
 	}


 	// TODO: determine naming scheme
 	*mkFilename {
 		arg testID;

 		// this works for now; maybe it should really be this simple?
 		^testID.resolveRelative;
 	}

 	/////////////////////////////////////
 	///// SMALL CONVENIENCE METHODS /////
 	/////////////////////////////////////

 	*bytecodeToHexString {
 		arg bytes;

 		var hexString = String.newClear(bytes.size*2);

 		bytes.do {
 			arg byte, i;
 			hexString[i*2] = byte.mod(256).div(16).asDigit;
 			hexString[i*2+1] = byte.mod(16).asDigit;
 		};

 		^hexString;
 	}

 	*bytecodeFromHexString {
 		arg hexString;

 		var bytes = Int8Array.newClear(hexString.size.div(2));

 		// about 4/3 faster
 		bytes.size.do {
 			arg i;
 			bytes[i] = hexString[i*2].digit*16 + hexString[i*2+1].digit;
 		};

 		/*var i=0;
 		hexString.pairsDo {
 		arg hb, lb;
 		bytes[i] = hb.digit*16 + lb.digit;
 		i = i + 1;
 		};*/

 		^bytes;
 	}

 	/* validation:
 	a = Int8Array.newFrom((-256..256));
 	b = StubTestLexerParserCompiler.bytecodeToHexString(a);
 	c =  StubTestLexerParserCompiler.bytecodeFromHexString(b);
 	a == c
 	*/

 	// converts an input String to a String representing its hex ASCII values.
 	// The output string is twice the length of the input.
 	*stringToHexString {
 		arg string;

 		// var hexString = "";
 		var hexString = String.newClear(string.size*2);

 		// I was originally using .asHexString; this is much faster (250+%) - Brian
 		string.do {
 			arg char, i;
 			var hx = char.ascii.mod(256);
 			hexString[i*2] = hx.div(16).asDigit;
 			hexString[i*2+1] = hx.mod(16).asDigit;
 		};

 		^hexString;
 	}

 	// inverse of stringToHexString
 	*stringFromHexString {
 		arg hexString;

 		var string = String.newClear(hexString.size.div(2));

 		string.size.do {
 			arg i;
 			string[i] = (hexString[i*2].digit*16 + hexString[i*2+1].digit).asAscii;
 		}

 		^string;
 	}

 	/* validation:
 	a = (-256..256).collect(_.asAscii).reduce('++');
 	b = StubTestLexerParserCompiler.stringToHexString(a);
 	c =  StubTestLexerParserCompiler.stringFromHexString(b);
 	a == c
 	*/

 	///////////////////////////////////
 	///// FILE FORMATTING METHODS /////
 	///////////////////////////////////

 	// given proper input, creates a header for the test file format used in these tests
 	*formatHeader {
 		arg alphabetSize, alphabet, prefix, suffix, technique, encodingDesc, stringCount;

 		var alphabetString = "";
 		var tmp;

 		// data validation: BEGIN
 		if(alphabetSize != alphabet.size) {
 			Error("formatHeader: alphabet.size (%) and alphabetSize (%) should be equal".format(alphabet.size, alphabetSize)).throw;
 		};
 		if(alphabetSize <= 0) {
 			Error("formatHeader: alphabetSize (%) must be greater than 0".format(alphabetSize)).throw;
 		}
 		if(alphabet.isKindOf(Array).not) {
 			Error("formatHeader: alphabet should be an array").throw;
 		};
 		if(alphabet.every(_.isKindOf(String)).not) {
 			Error("formatHeader: alphabet should be composed of strings").throw;
 		};
 		if(prefix.isKindOf(String).not) {
 			Error("formatHeader: prefix should be a string").throw;
 		};
 		if(suffix.isKindOf(String).not) {
 			Error("formatHeader: suffix should be a string").throw;
 		};
 		if(technique.isKindOf(Symbol).not) {
 			Error("formatHeader: technique should be a symbol").throw;
 		};
 		if(encodingDesc.isKindOf(String).not) {
 			Error("formatHeader: encodingDesc should be a string").throw;
 		};
 		if(stringCount <= 0) {
 			Error("formatHeader: stringCount must be a positive number").throw;
 		};
 		// data validation: END

 		alphabetString = alphabet.collect(this.stringToHexString(_)).join(",");

 		^format(
 			"HEAD\n"
 			"alphabet size:%\n"
 			"alphabet:%\n"
 			"prefix:%\n"
 			"suffix:%\n"
 			"technique:%\n"
 			"encoding:%\n"
 			"DATA\n"
 			"%\n",
 			alphabetSize,
 			alphabetString,
 			this.stringToHexString(prefix),
 			this.stringToHexString(suffix),
 			technique,
 			encodingDesc,
 			stringCount
 		);
 	}

 	// does pretty hefty validation on the header format
 	// validation is performed in the parsing subroutines
 	// returns results in a Dictionary
 	*readHeader {
 		arg file;

 		var result = Dictionary[];

 		// assert that file is already readable
 		if(file.isOpen.not) {
 			Error("readHeader: given file is not open: %".format(file.path)).throw;
 		};

 		this.readHeader_parseBlockName(file.getLine(this.maxline), "HEAD");
 		result[\alphabetSize] = this.parseAlphabetSize(file.getLine(this.maxline));
 		result[\alphabet] = this.parseAlphabet(file.getLine(this.maxline));
 		result[\prefix] = this.parsePrefix(file.getLine(this.maxline));
 		result[\suffix] = this.parseSuffix(file.getLine(this.maxline));
 		result[\technique] = this.parseTechnique(file.getLine(this.maxline));
 		result[\encoding] = this.parseEncoding(file.getLine(this.maxline));

 		this.readHeader_parseBlockName(file.getLine(this.maxline), "DATA");
 		result[\stringCount] = this.parseStringCount(file.getLine(this.maxline));

 		^result;
 	}

 	////////////////////////////////////////////
 	/// HEADER READING HELPER METHODS: BEGIN ///
 	////////////////////////////////////////////

 	*readHeader_verifyFieldName {
 		arg str, expected;

 		var len = expected.size;

 		if(str.isNil) {
 			Error("parseHeader: unexpectedly reached end of document while parsing %".format(expected.quote)).throw;
 		};

 		if(str[..len-1] != expected) {
 			Error("parseHeader: expected %: got %".format(expected.quote, str[..len-1].quote)).throw;
 		};

 		^str[len..];
 	}

 	*readHeader_parseBlockName {
 		arg line, blockName;

 		if(line != blockName) {
 			Error("readHeader: expected % block, got %".format(blockName.quote, line.quote));
 		};
 	}

 	*parseAlphabetSize {
 		arg str;
 		var size;

 		str = this.parseHeader_verifyFieldName(str, "alphabet size:");
 		size = str.asInteger;

 		if(size <= 0) {
 			Error("parseHeader: alphabet size must be > 0: got %".format(size.quote)).throw;
 		};

 		^size;
 	}

 	*parseAlphabet {
 		arg str;
 		var alphabet;

 		str = this.parseHeader_verifyFieldName(str, "alphabet:");
 		alphabet = str.split($,);

 		if(alphabet.isEmpty) {
 			Error("parseHeader: alphabet is empty: got %".format(str.quote)).throw;
 		};

 		try {
 			^alphabet.collect(this.stringFromHexString(_));
 		} {
 			arg e;
 			Error("parseHeader: error while decoding alphabet %: %".format(alphabet, e.errorString)).throw;
 		};
 	}

 	*parsePrefix {
 		arg str;

 		str = this.parseHeader_verifyFieldName(str, "prefix:");

 		try {
 			^this.stringFromHexString(str);
 		} {
 			arg e;
 			Error("parseHeader: error while decoding prefix: %".format(e.errorString)).throw;
 		};
 	}

 	*parseSuffix {
 		arg str;

 		str = this.parseHeader_verifyFieldName(str, "suffix:");

 		try {
 			^this.stringFromHexString(str);
 		} {
 			arg e;
 			Error("parseHeader: error while decoding suffix: %".format(e.errorString)).throw;
 		};
 	}

 	*parseTechnique {
 		arg str;

 		str = this.parseHeader_verifyFieldName(str, "technique:");

 		if(str.isEmpty) {
 			Error("parseHeader: must provide a technique name").throw;
 		}

 		^str.asSymbol;
 	}

 	*parseEncoding {
 		arg str;

 		str = this.parseHeader_verifyFieldName(str, "encoding:");

 		^str;
 	}

 	*parseStringCount {
 		arg str;
 		var cnt = str.asInteger;
 		if(cnt <= 0) {
 			Error("parseHeader: string count must be > 0: got %".format(cnt)).throw;
 		};
 		^cnt;
 	}

 	//////////////////////////////////////////
 	/// HEADER READING HELPER METHODS: END ///
 	//////////////////////////////////////////

 }
	/* Untitled.sc (TODO: Actual name for class)
	*
	* Tests for the SuperCollider 3 lexer, parser, compiler
	* Original author: Brian Heim 2017-02-19
	* This code is licensed under the GPLv3
	*
	* Note: performance really matters for some of these things. I
	* did a lot of intense benchmarking and came up with low-level
	* functions and loops that are blazingly fast for SC. I left in
	* my old code in some cases so you can benchmark yourself if
	* you're so inclined. Also, there is the validation code I've
	* left commented under some functions, try it out. - Brian
	*/
	StubTestLexerParserCompiler : UnitTest {
	const compileErrorString = "!cErr";
	const runtimeErrorString = "!rErr";
	const validatedOutputFilenameSuffix = "_expected";
	const maxline = 1024; // may need to update this, some strings like `0!99` can produce very long output

	// TODO: (possible) add option for non-hex-string output (maybe you're just actually doing normal looking code sometimes)

	// tests all possible combinations-with-replacement of members of a given alphabet,
	// writes the results to a file, and checks it against validated results.
	// The client may also specify an optional prefix and/or suffix to be added to each
	// string before testing. Only strings of length `len` are tested.
	*testAllPossibleStrings {
	arg alphabet, // Array of strings to test
	len, // String length
	prefix = "", suffix = "",
	testID = "", // unique filename ID
	technique = \compile, // best
	deleteOnFinish = true, // technique to use for testing
	compressRepeatResults = true;

	var alphabetSize = alphabet.size;
	var filename, file;
	var header; // intro content to be written to the file
	var diffs; // holds a list (empty under sucessful execution) of differences between expected and actual output
	var testString, fullTestString; // holds the string that will be tested with testOneString
	var prevResult, repeatedResultCount = 0; // hold the previous result for compressing repeat results
	var counter = 0!len; // [0, 0, ..., 0] which controls indexing into the alphabet to generate a string
	var dataLine; // will hold the result stored to the file
	var encoding;

	// sort the alphabet to start
	alphabet = alphabet.sort;

	postln("testAllPossibleStrings: Generating header");
	header = this.formatHeader(
	alphabet,
	prefix,
	suffix,
	technique,
	getEncodingString(technique),
	alphabetSize ** len // stringCount
	);

	postln("testAllPossibleStrings: Creating file");
	if(File.exists(filename)) {
	Error("testAllPossibleStrings: File % already exists: "
	"please delete before continuing".format(filename.quote)).throw;
	};

	file = File.new(filename, "w");
	if(file.isOpen.not) {
	Error("testAllPossibleStrings: Failed to open file").throw;
	};

	protect {
	postln("testAllPossibleStrings: Writing header");
	file.write(header);

	postln("testAllPossibleStrings: Writing data");

	// we don't have your fancy do-whiles down in these parts
	// TODO: benchmark to see if using a flag in the while loop is /really/ so bad
	// TODO: benchmark to see if there's a better way than reduce
	// TODO: refactor this for clarity
	testString = alphabet[counter].reduce('++')?""; // reduce on an empty array returns nil :(
	fullTestString = prefix ++ testString ++ suffix;
	dataLine = this.testOneString(fullTestString, technique);
	file.write((this.stringToHexString(testString) ++ "\t" ++ dataLine));
	prevResult = dataLine;

	while {this.incrementAlphabetCount(counter, len, alphabetSize)} {
	testString = alphabet[counter].reduce('++')?"";
	fullTestString = prefix ++ testString ++ suffix;
	dataLine = this.testOneString(fullTestString, technique);

	if(compressRepeatResults) {
	if(dataLine == prevResult) {
	// if we just saw this result, don't print it, but keep track of it
	repeatedResultCount = repeatedResultCount + 1;
	} {
	if(repeatedResultCount > 0) {
	// give a count if it's more than 0. 0 is the assumed default
	file.putChar($\t);
	file.write(repeatedResultCount.asString);
	repeatedResultCount = 0;
	};

	file.putChar($\n);
	file.write((this.stringToHexString(testString) ++ "\t" ++ dataLine));
	prevResult = dataLine; // only update if it changed
	}
	} {
	// just write the line normally
	file.putChar($\n);
	file.write((this.stringToHexString(testString) ++ "\t" ++ dataLine));
	}

	};

	if(compressRepeatResults && (repeatedResultCount > 0)) {
	// "flush the buffer"
	file.putChar($\t);
	file.write(repeatedResultCount.asString);
	};

	file.putChar($\n);
	} {
	file.close;

	postln("testAllPossibleStrings: Validating against standard output");
	//diffs = this.validate(filename);

	if(deleteOnFinish) {
	postln("testAllPossibleStrings: Deleting test file");
	File.delete(filename);
	}
	};

	postln("testAllPossibleStrings: Success!");

	^diffs;
	}

	// TODO: add more encodings
	getEncodingString {
	arg technique;

	^switch(technique,
	\compile,
	"<hex-format input string><tab>"
	"<\"!cErr\" (compile error) \| \"!rErr\" (runtime error) \| hex-format output string>"
	"<opt tab & class name of output>"
	"<opt tab & count of consecutive repeats of this result (no number means no repetitions)>",
	{Error("getEncodingString: unrecognized technique: %".format(technique.quote))}
	);
	}

	// returns `false` if there is an overflow
	// modifies the array in-place!
	*incrementAlphabetCount {
	arg ctr, len, n; // the counter array, string length, and alphabet size (n)

	// I benchmarked this pretty hard, but there might be a better way!
	// especially if you can avoid the repeated indexing into `ctr` - Brian
	while {len > 0} {
	len = len-1;
	ctr[len] = ctr[len] + 1;
	if(len == 0) {
	"incrementAlphabetCount: at %\n".postf(ctr);
	};
	if(ctr[len] != n) {^true} {ctr[len] = 0};
	};

	^false;
	}

	*testOneString {
	arg string, technique;
	var r, rclass;

	// string.postln;

	// TODO: use this.clearAll (clears a,b,...,z)
	// TODO: benchmark vs format

	switch(technique,
	\compile, {
	r = string.compile;
	if(r.isNil) {
	// compile error
	r = compileErrorString;
	} {
	// interpret using .value
	try {
	r = r.value;
	rclass = r.class;
	r = this.stringToHexString(r.asString)++"\t"++rclass.asString;
	} {
	r = runtimeErrorString;
	}
	}
	},
	\bytecode, {
	r = string.compile;
	if(r.isNil) {
	// compile error
	r = compileErrorString;
	} {
	r = this.bytecodeToHexString(r.def.code);
	}
	}, {
	Error("testOneString: invalid technique option: %".format(technique)).throw;
	}
	);

	^r;
	// TODO: benchmark w/ format and switching around .asString

	// TODO: write faster methods for when prefix/suffix/both are empty
	}

	// TODO: find better names than afile and efile
	*validate {
	arg filename;

	var validated = filename ++ validatedOutputFilenameSuffix;

	var efile, afile; // expected file, actual file
	var diffs; // array of pairs of lines that are not identical

	File.exists(validated).not.if {
	Error("validate: couldn't find file for validation: %".format(validated.quote)).throw;
	};
	File.exists(filename).not.if {
	Error("validate: couldn't find file for validation: %".format(filename.quote)).throw;
	};

	efile = File(validated, "r");
	afile = File(filename, "r");

	if(efile.isOpen.not) {
	Error("validate: failed to open validation file: %".format(validated.quote)).throw;
	};
	if(afile.isOpen.not) {
	Error("validate: failed to open validation file: %".format(filename.quote)).throw;
	};

	protect {
	var eheader, aheader, keySet, areAlphabetsEqual;

	eheader = this.parseHeader(this.readHeader(efile));
	aheader = this.parseHeader(this.readHeader(afile));

	// compare the headers, warn if there are issues
	keySet = union(aheader.keys, eheader.keys);
	keySet.do {
	arg key;
	if(aheader[key] != eheader[key]) {
	"validate: headers differ on key %\n"
	"\texpected header has: %\n"
	"\tactual header has: %".format(
	key, aheader[key], eheader[key]
	).warn;
	}
	};

	areAlphabetsEqual = aheader[\alphabet] == eheader[\alphabet];

	diffs = areAlphabetsEqual.if {
	"validate: alphabets are equal, using fast method".postln;
	validateDataFast(afile, efile);
	} {
	"validate: alphabets are unequal, using slow method".postln;
	validateDataSlow(afile, efile, aheader[\alphabet], eheader[\alphabet]);
	};



	// TODO: read data lines
	// parse into input + output
	// compare
	// record
	// also, handle state for unrolling repeated-line notation

	} {
	afile.close;
	efile.close;
	}

	// TODO: benchmark with Pair vs Array

	^diffs;
	}

	// TODO
	// TODO: this is only for .compile -- write one for bytecode!
	*validateDataSlow {
	arg afile, efile, aset, eset;
	var asize = aset.size, esize = eset.size;
	var actr = 0!asize, ectr = 0!esize;

	var diffs = []; // can't use a dict because some entries may be nil




	}

	// TODO
	// TODO: this is only for .compile -- write one for bytecode!
	*validateDataFast {
	arg afile, efile;

	var aline, eline;
	var areps = 0, ereps = 0;
	var diffs = []; // TODO: benchmark against array/set/collection of pairs, dictionary/set/etc. (less important)

	while {
	if(areps > 0) {
	aline = afile.getLine(this.maxline);
	if(aline.size >= (this.maxline-1)) {
	"validateDataFast: maxline characters read from actual data file: %".format(aline.quote).warn;
	};
	};
	if(ereps > 0) {
	eline = efile.getLine(this.maxline);
	if(eline.size >= (this.maxline-1)) {
	"validateDataFast: maxline characters read from expected data file: %".format(eline.quote).warn;
	};
	};

	((areps > 0) \|\| aline.notNil) &&
	((ereps > 0) \|\| eline.notNil);
	} {
	// either pull next line or decrement current line.
	// only one of these counters can ever be decremented in a single pass.
	if(areps == 0) {
	aline = this.parseCompileDataLine(aline);
	areps = aline[\reps];
	} {
	areps = areps - 1;
	};

	if(ereps == 0) {
	eline = this.parseCompileDataLine(eline);
	ereps = aline[\reps];
	} {
	ereps = ereps - 1;
	};

	if(
	(aline[\output] != eline[\output]) \|\|
	(aline[\outputClass] != eline[\outputClass])
	) {
	diffs = diffs.add(this.mkCompileDataDiff(aline, eline));
	};

	// only bother if there's work to be done
	// TODO: benchmark w/o this check
	if((areps > 0) && (ereps > 0)) {
	var minReps = min(areps, ereps);
	areps = areps - minReps;
	ereps = ereps - minReps;
	};
	};

	if(aline.notNil \|\| eline.notNil) {
	"validateDataFast: not all data has been exhausted: actual file has %, expected file has %".format(aline.quote, eline.quote).warn;
	};

	^diffs;
	}

	*parseCompileDataLine {
	arg line;

	var result = Dictionary[];

	line = line.split($\t);

	result[\string] = line[0];
	result[\output] = line[1];
	result[\hadError] = this.isErrorString(result[\output]);

	if(result[\hadError]) {
	result[\reps] = line[2]?0;
	} {
	result[\outputClass] = line[2];
	result[\reps] = line[3]?0;
	};

	^result;
	}

	*mkCompileDataDiff {
	arg aline, eline;

	^[aline[\string], [aline[\output], aline[\outputClass]], [eline[\output], eline[\outputClass]]];
	}

	*isErrorString {
	arg str;

	^(str == compileErrorString) \|\| (str == runtimeErrorString);
	}


	// TODO: determine naming scheme
	*mkFilename {
	arg testID;

	// this works for now; maybe it should really be this simple?
	^testID.resolveRelative;
	}

	/////////////////////////////////////
	///// SMALL CONVENIENCE METHODS /////
	/////////////////////////////////////

	*bytecodeToHexString {
	arg bytes;

	var hexString = String.newClear(bytes.size*2);

	bytes.do {
	arg byte, i;
	hexString[i*2] = byte.mod(256).div(16).asDigit;
	hexString[i*2+1] = byte.mod(16).asDigit;
	};

	^hexString;
	}

	*bytecodeFromHexString {
	arg hexString;

	var bytes = Int8Array.newClear(hexString.size.div(2));

	// about 4/3 faster
	bytes.size.do {
	arg i;
	bytes[i] = hexString[i2].digit16 + hexString[i*2+1].digit;
	};

	/*var i=0;
	hexString.pairsDo {
	arg hb, lb;
	bytes[i] = hb.digit*16 + lb.digit;
	i = i + 1;
	};*/

	^bytes;
	}

	/* validation:
	a = Int8Array.newFrom((-256..256));
	b = StubTestLexerParserCompiler.bytecodeToHexString(a);
	c = StubTestLexerParserCompiler.bytecodeFromHexString(b);
	a == c
	*/

	// converts an input String to a String representing its hex ASCII values.
	// The output string is twice the length of the input.
	*stringToHexString {
	arg string;

	// var hexString = "";
	var hexString = String.newClear(string.size*2);

	// I was originally using .asHexString; this is much faster (250+%) - Brian
	string.do {
	arg char, i;
	var hx = char.ascii.mod(256);
	hexString[i*2] = hx.div(16).asDigit;
	hexString[i*2+1] = hx.mod(16).asDigit;
	};

	^hexString;
	}

	// inverse of stringToHexString
	*stringFromHexString {
	arg hexString;

	var string = String.newClear(hexString.size.div(2));

	string.size.do {
	arg i;
	string[i] = (hexString[i2].digit16 + hexString[i*2+1].digit).asAscii;
	}

	^string;
	}

	/* validation:
	a = (-256..256).collect(_.asAscii).reduce('++');
	b = StubTestLexerParserCompiler.stringToHexString(a);
	c = StubTestLexerParserCompiler.stringFromHexString(b);
	a == c
	*/

	///////////////////////////////////
	///// FILE FORMATTING METHODS /////
	///////////////////////////////////

	// given proper input, creates a header for the test file format used in these tests
	*formatHeader {
	arg alphabetSize, alphabet, prefix, suffix, technique, encodingDesc, stringCount;

	var alphabetString = "";
	var tmp;

	// data validation: BEGIN
	if(alphabetSize != alphabet.size) {
	Error("formatHeader: alphabet.size (%) and alphabetSize (%) should be equal".format(alphabet.size, alphabetSize)).throw;
	};
	if(alphabetSize <= 0) {
	Error("formatHeader: alphabetSize (%) must be greater than 0".format(alphabetSize)).throw;
	}
	if(alphabet.isKindOf(Array).not) {
	Error("formatHeader: alphabet should be an array").throw;
	};
	if(alphabet.every(_.isKindOf(String)).not) {
	Error("formatHeader: alphabet should be composed of strings").throw;
	};
	if(prefix.isKindOf(String).not) {
	Error("formatHeader: prefix should be a string").throw;
	};
	if(suffix.isKindOf(String).not) {
	Error("formatHeader: suffix should be a string").throw;
	};
	if(technique.isKindOf(Symbol).not) {
	Error("formatHeader: technique should be a symbol").throw;
	};
	if(encodingDesc.isKindOf(String).not) {
	Error("formatHeader: encodingDesc should be a string").throw;
	};
	if(stringCount <= 0) {
	Error("formatHeader: stringCount must be a positive number").throw;
	};
	// data validation: END

	alphabetString = alphabet.collect(this.stringToHexString(_)).join(",");

	^format(
	"HEAD\n"
	"alphabet size:%\n"
	"alphabet:%\n"
	"prefix:%\n"
	"suffix:%\n"
	"technique:%\n"
	"encoding:%\n"
	"DATA\n"
	"%\n",
	alphabetSize,
	alphabetString,
	this.stringToHexString(prefix),
	this.stringToHexString(suffix),
	technique,
	encodingDesc,
	stringCount
	);
	}

	// does pretty hefty validation on the header format
	// validation is performed in the parsing subroutines
	// returns results in a Dictionary
	*readHeader {
	arg file;

	var result = Dictionary[];

	// assert that file is already readable
	if(file.isOpen.not) {
	Error("readHeader: given file is not open: %".format(file.path)).throw;
	};

	this.readHeader_parseBlockName(file.getLine(this.maxline), "HEAD");
	result[\alphabetSize] = this.parseAlphabetSize(file.getLine(this.maxline));
	result[\alphabet] = this.parseAlphabet(file.getLine(this.maxline));
	result[\prefix] = this.parsePrefix(file.getLine(this.maxline));
	result[\suffix] = this.parseSuffix(file.getLine(this.maxline));
	result[\technique] = this.parseTechnique(file.getLine(this.maxline));
	result[\encoding] = this.parseEncoding(file.getLine(this.maxline));

	this.readHeader_parseBlockName(file.getLine(this.maxline), "DATA");
	result[\stringCount] = this.parseStringCount(file.getLine(this.maxline));

	^result;
	}

	////////////////////////////////////////////
	/// HEADER READING HELPER METHODS: BEGIN ///
	////////////////////////////////////////////

	*readHeader_verifyFieldName {
	arg str, expected;

	var len = expected.size;

	if(str.isNil) {
	Error("parseHeader: unexpectedly reached end of document while parsing %".format(expected.quote)).throw;
	};

	if(str[..len-1] != expected) {
	Error("parseHeader: expected %: got %".format(expected.quote, str[..len-1].quote)).throw;
	};

	^str[len..];
	}

	*readHeader_parseBlockName {
	arg line, blockName;

	if(line != blockName) {
	Error("readHeader: expected % block, got %".format(blockName.quote, line.quote));
	};
	}

	*parseAlphabetSize {
	arg str;
	var size;

	str = this.parseHeader_verifyFieldName(str, "alphabet size:");
	size = str.asInteger;

	if(size <= 0) {
	Error("parseHeader: alphabet size must be > 0: got %".format(size.quote)).throw;
	};

	^size;
	}

	*parseAlphabet {
	arg str;
	var alphabet;

	str = this.parseHeader_verifyFieldName(str, "alphabet:");
	alphabet = str.split($,);

	if(alphabet.isEmpty) {
	Error("parseHeader: alphabet is empty: got %".format(str.quote)).throw;
	};

	try {
	^alphabet.collect(this.stringFromHexString(_));
	} {
	arg e;
	Error("parseHeader: error while decoding alphabet %: %".format(alphabet, e.errorString)).throw;
	};
	}

	*parsePrefix {
	arg str;

	str = this.parseHeader_verifyFieldName(str, "prefix:");

	try {
	^this.stringFromHexString(str);
	} {
	arg e;
	Error("parseHeader: error while decoding prefix: %".format(e.errorString)).throw;
	};
	}

	*parseSuffix {
	arg str;

	str = this.parseHeader_verifyFieldName(str, "suffix:");

	try {
	^this.stringFromHexString(str);
	} {
	arg e;
	Error("parseHeader: error while decoding suffix: %".format(e.errorString)).throw;
	};
	}

	*parseTechnique {
	arg str;

	str = this.parseHeader_verifyFieldName(str, "technique:");

	if(str.isEmpty) {
	Error("parseHeader: must provide a technique name").throw;
	}

	^str.asSymbol;
	}

	*parseEncoding {
	arg str;

	str = this.parseHeader_verifyFieldName(str, "encoding:");

	^str;
	}

	*parseStringCount {
	arg str;
	var cnt = str.asInteger;
	if(cnt <= 0) {
	Error("parseHeader: string count must be > 0: got %".format(cnt)).throw;
	};
	^cnt;
	}

	//////////////////////////////////////////
	/// HEADER READING HELPER METHODS: END ///
	//////////////////////////////////////////

	}