Last active
January 15, 2018 10:19
-
-
Save Floofies/aca8d37ee348294a61d6af04e94a144b to your computer and use it in GitHub Desktop.
Basic Lexical Scanner
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const SEMICOLON = [[59, 59]]; | |
const SPACE = [[32, 32]]; | |
const NUMERIC = [[48, 57]]; | |
const ALPHA = [[65, 90], [97, 122]]; | |
const ALPHA_SPACE = [...SPACE, ...ALPHA]; | |
const ALPHA_NUMERIC = [...NUMERIC, ...ALPHA]; | |
const ALPHA_NUMERIC_SPACE = [...SPACE, ...ALPHA_NUMERIC]; | |
// Returns the first contiguous substring in `string`, starting at `start`, which precedes symbols found in `nt`. | |
function scanToken(string, start = 0, nt = [" "]) { | |
var subString = ""; | |
var char; | |
_scanString: for (var loc = start; char = string[loc], loc < string.length; loc++) { | |
_scanNonterminals: for (var ntString of nt) { | |
var ntChar; | |
_matchNonterminal: for (var mLoc = 0; ntChar = ntString[mLoc], mLoc < ntString.length; mLoc++) { | |
if (string[loc + mLoc] !== ntChar) { | |
break _matchNonterminal; | |
} | |
if (mLoc === ntString.length - 1) { | |
break _scanString; | |
} | |
} | |
} | |
subString += char; | |
} | |
return subString; | |
} | |
// Returns the first contiguous substring in `string`, starting at `start`, which precedes symbols found in `nt`. | |
function scanRange(string, start = 0, ranges = SPACE) { | |
if ((typeof ranges) === "string") { | |
ranges = getRanges(ranges); | |
} | |
var subString = ""; | |
var char; | |
var code; | |
_scanString: for (var loc = start; loc < string.length; loc++) { | |
char = string[loc]; | |
code = char.charCodeAt(0); | |
_scanRange: for (var range of ranges) { | |
if (code >= range[0] && code <= range[1]) { | |
break _scanString; | |
} | |
} | |
subString += char; | |
} | |
return subString; | |
} | |
// Returns an ordered array containing the unique alphabet of `string`. | |
function getAlphabet(string) { | |
return Array.from(string) | |
.map(symbol => symbol.charCodeAt(0)) | |
.sort((t1, t2) => t1 > t2) | |
.filter((symbol, loc, symbols) => loc === 0 || symbol !== symbols[loc - 1]); | |
} | |
function getRanges(string) { | |
var symbols = getAlphabet(string); | |
var codeRanges = []; | |
// Reduce contiguous ranges of codes down to arrays of two. | |
symbols.forEach(function (symbol, loc) { | |
var symbol = symbols[loc] | |
if (loc !== 0 && symbol === symbols[loc - 1] + 1) { | |
codeRanges[codeRanges.length - 1].push(symbol); | |
} else { | |
codeRanges.push([symbol]); | |
} | |
}); | |
// Complete the ranges for single codes. | |
codeRanges.forEach(function (range) { | |
if (range.length === 1) { | |
range.push(range[0]); | |
} | |
}); | |
return codeRanges; | |
} | |
function findClosingPos(string, start = 0, opener = "{", closer = "}") { | |
var depth = 1; | |
var char; | |
_scanString: for (var loc = start; char = string[loc], loc < string.length; loc++) { | |
if (char === closer) { | |
depth--; | |
if (depth === 0) { | |
return loc; | |
} | |
} else if (char === opener) { | |
depth++; | |
} | |
} | |
return null; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment