Skip to content

Instantly share code, notes, and snippets.

@mossheim
Last active February 22, 2017 15:36
Show Gist options
  • Save mossheim/d8eb73e7a124b4c55ec774fb566c934f to your computer and use it in GitHub Desktop.
Save mossheim/d8eb73e7a124b4c55ec774fb566c934f to your computer and use it in GitHub Desktop.
/* Untitled.sc (TODO: Actual name for class)
*
* Tests for the SuperCollider 3 lexer, parser, compiler
* Original author: Brian Heim 2017-02-19
* This code is licensed under the GPLv3
*
* Note: performance really matters for some of these things. I
* did a lot of intense benchmarking and came up with low-level
* functions and loops that are blazingly fast for SC. I left in
* my old code in some cases so you can benchmark yourself if
* you're so inclined. Also, there is the validation code I've
* left commented under some functions, try it out. - Brian
*/
StubTestLexerParserCompiler : UnitTest {
const compileErrorString = "!cErr";
const runtimeErrorString = "!rErr";
const validatedOutputFilenameSuffix = "_expected";
const maxline = 1024; // may need to update this, some strings like `0!99` can produce very long output
// TODO: (possible) add option for non-hex-string output (maybe you're just actually doing normal looking code sometimes)
// tests all possible combinations-with-replacement of members of a given alphabet,
// writes the results to a file, and checks it against validated results.
// The client may also specify an optional prefix and/or suffix to be added to each
// string before testing. Only strings of length `len` are tested.
*testAllPossibleStrings {
arg alphabet, // Array of strings to test
len, // String length
prefix = "", suffix = "",
testID = "", // unique filename ID
technique = \compile, // best
deleteOnFinish = true, // technique to use for testing
compressRepeatResults = true;
var alphabetSize = alphabet.size;
var filename, file;
var header; // intro content to be written to the file
var diffs; // holds a list (empty under sucessful execution) of differences between expected and actual output
var testString, fullTestString; // holds the string that will be tested with testOneString
var prevResult, repeatedResultCount = 0; // hold the previous result for compressing repeat results
var counter = 0!len; // [0, 0, ..., 0] which controls indexing into the alphabet to generate a string
var dataLine; // will hold the result stored to the file
var encoding;
// sort the alphabet to start
alphabet = alphabet.sort;
postln("testAllPossibleStrings: Generating header");
header = this.formatHeader(
alphabet,
prefix,
suffix,
technique,
getEncodingString(technique),
alphabetSize ** len // stringCount
);
postln("testAllPossibleStrings: Creating file");
if(File.exists(filename)) {
Error("testAllPossibleStrings: File % already exists: "
"please delete before continuing".format(filename.quote)).throw;
};
file = File.new(filename, "w");
if(file.isOpen.not) {
Error("testAllPossibleStrings: Failed to open file").throw;
};
protect {
postln("testAllPossibleStrings: Writing header");
file.write(header);
postln("testAllPossibleStrings: Writing data");
// we don't have your fancy do-whiles down in these parts
// TODO: benchmark to see if using a flag in the while loop is /really/ so bad
// TODO: benchmark to see if there's a better way than reduce
// TODO: refactor this for clarity
testString = alphabet[counter].reduce('++')?""; // reduce on an empty array returns nil :(
fullTestString = prefix ++ testString ++ suffix;
dataLine = this.testOneString(fullTestString, technique);
file.write((this.stringToHexString(testString) ++ "\t" ++ dataLine));
prevResult = dataLine;
while {this.incrementAlphabetCount(counter, len, alphabetSize)} {
testString = alphabet[counter].reduce('++')?"";
fullTestString = prefix ++ testString ++ suffix;
dataLine = this.testOneString(fullTestString, technique);
if(compressRepeatResults) {
if(dataLine == prevResult) {
// if we just saw this result, don't print it, but keep track of it
repeatedResultCount = repeatedResultCount + 1;
} {
if(repeatedResultCount > 0) {
// give a count if it's more than 0. 0 is the assumed default
file.putChar($\t);
file.write(repeatedResultCount.asString);
repeatedResultCount = 0;
};
file.putChar($\n);
file.write((this.stringToHexString(testString) ++ "\t" ++ dataLine));
prevResult = dataLine; // only update if it changed
}
} {
// just write the line normally
file.putChar($\n);
file.write((this.stringToHexString(testString) ++ "\t" ++ dataLine));
}
};
if(compressRepeatResults && (repeatedResultCount > 0)) {
// "flush the buffer"
file.putChar($\t);
file.write(repeatedResultCount.asString);
};
file.putChar($\n);
} {
file.close;
postln("testAllPossibleStrings: Validating against standard output");
//diffs = this.validate(filename);
if(deleteOnFinish) {
postln("testAllPossibleStrings: Deleting test file");
File.delete(filename);
}
};
postln("testAllPossibleStrings: Success!");
^diffs;
}
// TODO: add more encodings
getEncodingString {
arg technique;
^switch(technique,
\compile,
"<hex-format input string><tab>"
"<\"!cErr\" (compile error) | \"!rErr\" (runtime error) | hex-format output string>"
"<opt tab & class name of output>"
"<opt tab & count of consecutive repeats of this result (no number means no repetitions)>",
{Error("getEncodingString: unrecognized technique: %".format(technique.quote))}
);
}
// returns `false` if there is an overflow
// modifies the array in-place!
*incrementAlphabetCount {
arg ctr, len, n; // the counter array, string length, and alphabet size (n)
// I benchmarked this pretty hard, but there might be a better way!
// especially if you can avoid the repeated indexing into `ctr` - Brian
while {len > 0} {
len = len-1;
ctr[len] = ctr[len] + 1;
if(len == 0) {
"incrementAlphabetCount: at %\n".postf(ctr);
};
if(ctr[len] != n) {^true} {ctr[len] = 0};
};
^false;
}
*testOneString {
arg string, technique;
var r, rclass;
// string.postln;
// TODO: use this.clearAll (clears a,b,...,z)
// TODO: benchmark vs format
switch(technique,
\compile, {
r = string.compile;
if(r.isNil) {
// compile error
r = compileErrorString;
} {
// interpret using .value
try {
r = r.value;
rclass = r.class;
r = this.stringToHexString(r.asString)++"\t"++rclass.asString;
} {
r = runtimeErrorString;
}
}
},
\bytecode, {
r = string.compile;
if(r.isNil) {
// compile error
r = compileErrorString;
} {
r = this.bytecodeToHexString(r.def.code);
}
}, {
Error("testOneString: invalid technique option: %".format(technique)).throw;
}
);
^r;
// TODO: benchmark w/ format and switching around .asString
// TODO: write faster methods for when prefix/suffix/both are empty
}
// TODO: find better names than afile and efile
*validate {
arg filename;
var validated = filename ++ validatedOutputFilenameSuffix;
var efile, afile; // expected file, actual file
var diffs; // array of pairs of lines that are not identical
File.exists(validated).not.if {
Error("validate: couldn't find file for validation: %".format(validated.quote)).throw;
};
File.exists(filename).not.if {
Error("validate: couldn't find file for validation: %".format(filename.quote)).throw;
};
efile = File(validated, "r");
afile = File(filename, "r");
if(efile.isOpen.not) {
Error("validate: failed to open validation file: %".format(validated.quote)).throw;
};
if(afile.isOpen.not) {
Error("validate: failed to open validation file: %".format(filename.quote)).throw;
};
protect {
var eheader, aheader, keySet, areAlphabetsEqual;
eheader = this.parseHeader(this.readHeader(efile));
aheader = this.parseHeader(this.readHeader(afile));
// compare the headers, warn if there are issues
keySet = union(aheader.keys, eheader.keys);
keySet.do {
arg key;
if(aheader[key] != eheader[key]) {
"validate: headers differ on key %\n"
"\texpected header has: %\n"
"\tactual header has: %".format(
key, aheader[key], eheader[key]
).warn;
}
};
areAlphabetsEqual = aheader[\alphabet] == eheader[\alphabet];
diffs = areAlphabetsEqual.if {
"validate: alphabets are equal, using fast method".postln;
validateDataFast(afile, efile);
} {
"validate: alphabets are unequal, using slow method".postln;
validateDataSlow(afile, efile, aheader[\alphabet], eheader[\alphabet]);
};
// TODO: read data lines
// parse into input + output
// compare
// record
// also, handle state for unrolling repeated-line notation
} {
afile.close;
efile.close;
}
// TODO: benchmark with Pair vs Array
^diffs;
}
// TODO
// TODO: this is only for .compile -- write one for bytecode!
*validateDataSlow {
arg afile, efile, aset, eset;
var asize = aset.size, esize = eset.size;
var actr = 0!asize, ectr = 0!esize;
var diffs = []; // can't use a dict because some entries may be nil
}
// TODO
// TODO: this is only for .compile -- write one for bytecode!
*validateDataFast {
arg afile, efile;
var aline, eline;
var areps = 0, ereps = 0;
var diffs = []; // TODO: benchmark against array/set/collection of pairs, dictionary/set/etc. (less important)
while {
if(areps > 0) {
aline = afile.getLine(this.maxline);
if(aline.size >= (this.maxline-1)) {
"validateDataFast: maxline characters read from actual data file: %".format(aline.quote).warn;
};
};
if(ereps > 0) {
eline = efile.getLine(this.maxline);
if(eline.size >= (this.maxline-1)) {
"validateDataFast: maxline characters read from expected data file: %".format(eline.quote).warn;
};
};
((areps > 0) || aline.notNil) &&
((ereps > 0) || eline.notNil);
} {
// either pull next line or decrement current line.
// only one of these counters can ever be decremented in a single pass.
if(areps == 0) {
aline = this.parseCompileDataLine(aline);
areps = aline[\reps];
} {
areps = areps - 1;
};
if(ereps == 0) {
eline = this.parseCompileDataLine(eline);
ereps = aline[\reps];
} {
ereps = ereps - 1;
};
if(
(aline[\output] != eline[\output]) ||
(aline[\outputClass] != eline[\outputClass])
) {
diffs = diffs.add(this.mkCompileDataDiff(aline, eline));
};
// only bother if there's work to be done
// TODO: benchmark w/o this check
if((areps > 0) && (ereps > 0)) {
var minReps = min(areps, ereps);
areps = areps - minReps;
ereps = ereps - minReps;
};
};
if(aline.notNil || eline.notNil) {
"validateDataFast: not all data has been exhausted: actual file has %, expected file has %".format(aline.quote, eline.quote).warn;
};
^diffs;
}
*parseCompileDataLine {
arg line;
var result = Dictionary[];
line = line.split($\t);
result[\string] = line[0];
result[\output] = line[1];
result[\hadError] = this.isErrorString(result[\output]);
if(result[\hadError]) {
result[\reps] = line[2]?0;
} {
result[\outputClass] = line[2];
result[\reps] = line[3]?0;
};
^result;
}
*mkCompileDataDiff {
arg aline, eline;
^[aline[\string], [aline[\output], aline[\outputClass]], [eline[\output], eline[\outputClass]]];
}
*isErrorString {
arg str;
^(str == compileErrorString) || (str == runtimeErrorString);
}
// TODO: determine naming scheme
*mkFilename {
arg testID;
// this works for now; maybe it should really be this simple?
^testID.resolveRelative;
}
/////////////////////////////////////
///// SMALL CONVENIENCE METHODS /////
/////////////////////////////////////
*bytecodeToHexString {
arg bytes;
var hexString = String.newClear(bytes.size*2);
bytes.do {
arg byte, i;
hexString[i*2] = byte.mod(256).div(16).asDigit;
hexString[i*2+1] = byte.mod(16).asDigit;
};
^hexString;
}
*bytecodeFromHexString {
arg hexString;
var bytes = Int8Array.newClear(hexString.size.div(2));
// about 4/3 faster
bytes.size.do {
arg i;
bytes[i] = hexString[i*2].digit*16 + hexString[i*2+1].digit;
};
/*var i=0;
hexString.pairsDo {
arg hb, lb;
bytes[i] = hb.digit*16 + lb.digit;
i = i + 1;
};*/
^bytes;
}
/* validation:
a = Int8Array.newFrom((-256..256));
b = StubTestLexerParserCompiler.bytecodeToHexString(a);
c = StubTestLexerParserCompiler.bytecodeFromHexString(b);
a == c
*/
// converts an input String to a String representing its hex ASCII values.
// The output string is twice the length of the input.
*stringToHexString {
arg string;
// var hexString = "";
var hexString = String.newClear(string.size*2);
// I was originally using .asHexString; this is much faster (250+%) - Brian
string.do {
arg char, i;
var hx = char.ascii.mod(256);
hexString[i*2] = hx.div(16).asDigit;
hexString[i*2+1] = hx.mod(16).asDigit;
};
^hexString;
}
// inverse of stringToHexString
*stringFromHexString {
arg hexString;
var string = String.newClear(hexString.size.div(2));
string.size.do {
arg i;
string[i] = (hexString[i*2].digit*16 + hexString[i*2+1].digit).asAscii;
}
^string;
}
/* validation:
a = (-256..256).collect(_.asAscii).reduce('++');
b = StubTestLexerParserCompiler.stringToHexString(a);
c = StubTestLexerParserCompiler.stringFromHexString(b);
a == c
*/
///////////////////////////////////
///// FILE FORMATTING METHODS /////
///////////////////////////////////
// given proper input, creates a header for the test file format used in these tests
*formatHeader {
arg alphabetSize, alphabet, prefix, suffix, technique, encodingDesc, stringCount;
var alphabetString = "";
var tmp;
// data validation: BEGIN
if(alphabetSize != alphabet.size) {
Error("formatHeader: alphabet.size (%) and alphabetSize (%) should be equal".format(alphabet.size, alphabetSize)).throw;
};
if(alphabetSize <= 0) {
Error("formatHeader: alphabetSize (%) must be greater than 0".format(alphabetSize)).throw;
}
if(alphabet.isKindOf(Array).not) {
Error("formatHeader: alphabet should be an array").throw;
};
if(alphabet.every(_.isKindOf(String)).not) {
Error("formatHeader: alphabet should be composed of strings").throw;
};
if(prefix.isKindOf(String).not) {
Error("formatHeader: prefix should be a string").throw;
};
if(suffix.isKindOf(String).not) {
Error("formatHeader: suffix should be a string").throw;
};
if(technique.isKindOf(Symbol).not) {
Error("formatHeader: technique should be a symbol").throw;
};
if(encodingDesc.isKindOf(String).not) {
Error("formatHeader: encodingDesc should be a string").throw;
};
if(stringCount <= 0) {
Error("formatHeader: stringCount must be a positive number").throw;
};
// data validation: END
alphabetString = alphabet.collect(this.stringToHexString(_)).join(",");
^format(
"HEAD\n"
"alphabet size:%\n"
"alphabet:%\n"
"prefix:%\n"
"suffix:%\n"
"technique:%\n"
"encoding:%\n"
"DATA\n"
"%\n",
alphabetSize,
alphabetString,
this.stringToHexString(prefix),
this.stringToHexString(suffix),
technique,
encodingDesc,
stringCount
);
}
// does pretty hefty validation on the header format
// validation is performed in the parsing subroutines
// returns results in a Dictionary
*readHeader {
arg file;
var result = Dictionary[];
// assert that file is already readable
if(file.isOpen.not) {
Error("readHeader: given file is not open: %".format(file.path)).throw;
};
this.readHeader_parseBlockName(file.getLine(this.maxline), "HEAD");
result[\alphabetSize] = this.parseAlphabetSize(file.getLine(this.maxline));
result[\alphabet] = this.parseAlphabet(file.getLine(this.maxline));
result[\prefix] = this.parsePrefix(file.getLine(this.maxline));
result[\suffix] = this.parseSuffix(file.getLine(this.maxline));
result[\technique] = this.parseTechnique(file.getLine(this.maxline));
result[\encoding] = this.parseEncoding(file.getLine(this.maxline));
this.readHeader_parseBlockName(file.getLine(this.maxline), "DATA");
result[\stringCount] = this.parseStringCount(file.getLine(this.maxline));
^result;
}
////////////////////////////////////////////
/// HEADER READING HELPER METHODS: BEGIN ///
////////////////////////////////////////////
*readHeader_verifyFieldName {
arg str, expected;
var len = expected.size;
if(str.isNil) {
Error("parseHeader: unexpectedly reached end of document while parsing %".format(expected.quote)).throw;
};
if(str[..len-1] != expected) {
Error("parseHeader: expected %: got %".format(expected.quote, str[..len-1].quote)).throw;
};
^str[len..];
}
*readHeader_parseBlockName {
arg line, blockName;
if(line != blockName) {
Error("readHeader: expected % block, got %".format(blockName.quote, line.quote));
};
}
*parseAlphabetSize {
arg str;
var size;
str = this.parseHeader_verifyFieldName(str, "alphabet size:");
size = str.asInteger;
if(size <= 0) {
Error("parseHeader: alphabet size must be > 0: got %".format(size.quote)).throw;
};
^size;
}
*parseAlphabet {
arg str;
var alphabet;
str = this.parseHeader_verifyFieldName(str, "alphabet:");
alphabet = str.split($,);
if(alphabet.isEmpty) {
Error("parseHeader: alphabet is empty: got %".format(str.quote)).throw;
};
try {
^alphabet.collect(this.stringFromHexString(_));
} {
arg e;
Error("parseHeader: error while decoding alphabet %: %".format(alphabet, e.errorString)).throw;
};
}
*parsePrefix {
arg str;
str = this.parseHeader_verifyFieldName(str, "prefix:");
try {
^this.stringFromHexString(str);
} {
arg e;
Error("parseHeader: error while decoding prefix: %".format(e.errorString)).throw;
};
}
*parseSuffix {
arg str;
str = this.parseHeader_verifyFieldName(str, "suffix:");
try {
^this.stringFromHexString(str);
} {
arg e;
Error("parseHeader: error while decoding suffix: %".format(e.errorString)).throw;
};
}
*parseTechnique {
arg str;
str = this.parseHeader_verifyFieldName(str, "technique:");
if(str.isEmpty) {
Error("parseHeader: must provide a technique name").throw;
}
^str.asSymbol;
}
*parseEncoding {
arg str;
str = this.parseHeader_verifyFieldName(str, "encoding:");
^str;
}
*parseStringCount {
arg str;
var cnt = str.asInteger;
if(cnt <= 0) {
Error("parseHeader: string count must be > 0: got %".format(cnt)).throw;
};
^cnt;
}
//////////////////////////////////////////
/// HEADER READING HELPER METHODS: END ///
//////////////////////////////////////////
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment