Skip to content

Instantly share code, notes, and snippets.

@matutter
Created August 30, 2014 21:58
Show Gist options
  • Select an option

  • Save matutter/39ba130bdb5ca9a68981 to your computer and use it in GitHub Desktop.

Select an option

Save matutter/39ba130bdb5ca9a68981 to your computer and use it in GitHub Desktop.
Common Language parser idea
void Tokenize(string s, langDef l) {
unsigned int cursor = 0;
unsigned int offset = 0;
string chunk = "";
while (cursor + offset < s.length()){
Token token("");
Identity hint;
chunk = s.substr(cursor, offset);
if (ctxNoMatch(chunk, l, cursor, &hint))
{
offset += 1;
}
else if (l.hasTokenMatch(chunk))
{
if (hint.SharesAffinity)
SecondChanceMatch(s, l, cursor, &hint, &chunk);
else if (hint.variable_length)
token.context = hint;
token.partial = chunk;
if (token.partial.length() == 0) continue;
cout << token.partial << " is " << token.context.Name << endl;
cursor += chunk.length();
offset = 0;
}
else
cursor++;
}
cout << "done" << endl;
}
bool SecondChanceMatch(string s, langDef l, unsigned int cursor, Identity * hint, string * chunk) {
Identity best = *hint;
int offset = 0;
for (vector<Identity>::iterator it = hint->AffinityPairs.begin(); it != hint->AffinityPairs.end(); ++it) {
offset = 0;
while (cursor + offset < s.length()) {
if (it->match(s.substr(cursor, offset))) {
if (s.substr(cursor, offset).length() > chunk->length()) {
*chunk = s.substr(cursor, offset);
best = *it;
}
}
offset++;
}
}
*hint = best;
return true;
}
bool ctxNoMatch(string s, langDef l, unsigned int cursor, Identity * hint) {
for (vector<Identity>::iterator it = l.Identities.begin(); it != l.Identities.end(); ++it)
if (regex_match(s, it->reg_match))
{
*hint = *it;
return false;
}
return true;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment