Created
August 21, 2019 11:40
-
-
Save Islidius/e036a9f261b18f9132cf26ef5a9a06c9 to your computer and use it in GitHub Desktop.
Small piece of code to generate FIRST and FOLLOW sets
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// 'ε' symbol for empty symbol | |
const EPSILON = 'ε'; | |
// this is a set that tracks the fact it contains 'ε' seperatly | |
class CustomSet { | |
constructor() { | |
this.set = new Set(); | |
this.eps = false; | |
} | |
addTerminal(terminal) { | |
this.set.add(terminal); | |
} | |
union(other) { | |
for(let k of other.set) { | |
this.set.add(k); | |
} | |
} | |
setEpsilon() { | |
this.eps = true; | |
} | |
hasEpsilon() { | |
return this.eps; | |
} | |
asArray() { | |
if(this.hasEpsilon()) return [...this.set, EPSILON]; | |
return [...this.set]; | |
} | |
} | |
// simple graph wrapper | |
class Graph { | |
constructor() { | |
this.graph = {}; | |
} | |
addLink(from, to) { | |
if(this.graph[from] === undefined) { | |
this.graph[from] = []; | |
} | |
this.graph[from].push(to); | |
} | |
getLink(from) { | |
return this.graph[from] ? this.graph[from] : []; | |
} | |
} | |
const isTerminal = (symbol) => !/[A-Z]/.test(symbol); | |
const isNonTerminal = (symbol) => /[A-Z]/.test(symbol); | |
const isEpsilon = (symbol) => symbol === EPSILON; | |
const getLHS = (production) => | |
production.split('->')[0].replace(/\s+/g, ''); | |
const getRHS = (production) => | |
production.split('->')[1].replace(/\s+/g, ''); | |
const getProductionsForSymbol = (symbol, grammar) => | |
grammar.filter((p) => getLHS(p) === symbol); | |
const getProductionsWithSymbol = (symbol, grammar) => | |
grammar.filter((p) => getRHS(p).indexOf(symbol) !== -1); | |
const generateFirst = (symbol, firstSets, grammar) => { | |
if(firstSets[symbol]) return; | |
let first = firstSets[symbol] = new CustomSet; | |
// the first set of a terminal symbol is just the terminal | |
if(isTerminal(symbol)) { | |
first.addTerminal(symbol); | |
return; | |
} | |
for(let production of getProductionsForSymbol(symbol, grammar)) { | |
let rhs = getRHS(production); | |
let hasAtLeastOneTerminal = false; | |
for(let prodSymbol of rhs) { | |
// if production is 'ε' then the first also contains it | |
if(prodSymbol === EPSILON) { | |
first.setEpsilon(); | |
break; | |
} | |
// get the first of the current symbol | |
generateFirst(prodSymbol, firstSets, grammar); | |
let firstOfNonTerminal = firstSets[prodSymbol]; | |
first.union(firstOfNonTerminal); | |
// if the first of the current symbol does not contain 'ε' break | |
if(!firstOfNonTerminal.hasEpsilon()) { | |
hasAtLeastOneTerminal = true; | |
break; | |
} | |
} | |
// remember that if all non-terminals contain 'ε' first also contains 'ε' | |
if(!hasAtLeastOneTerminal) { | |
first.setEpsilon(); | |
} | |
} | |
}; | |
const buildFirst = (grammar) => { | |
let firstSets = {}; | |
for(let prod of grammar) { | |
generateFirst(getLHS(prod), firstSets, grammar); | |
} | |
return firstSets; | |
}; | |
// this follow pass does only copy in the first sets and builds the follow graph | |
const generateFollow = (symbol, followSets, firstSets, graph, grammar) => { | |
if(followSets[symbol]) return; | |
let follow = followSets[symbol] = new CustomSet; | |
for(let production of getProductionsWithSymbol(symbol, grammar)) { | |
let rhs = getRHS(production); | |
let symbolIndex = 0; | |
while(true) { | |
// find the location of the symbol in question | |
symbolIndex = rhs.indexOf(symbol, symbolIndex); | |
// was not found | |
if(symbolIndex === -1) break; | |
// symbol after the symbol | |
symbolIndex += 1; | |
while(true) { | |
// symbol is the last one. This means FOLLOW(symbol) = FOLLOW(getLHS(production)) | |
// if we copy here we run in caching problems, so just add it | |
// to the graph and later propagate it | |
if(symbolIndex === rhs.length) { | |
graph.addLink(symbol, getLHS(production)); | |
break; | |
} | |
let followSymbol = rhs[symbolIndex]; | |
generateFirst(followSymbol, firstSets, grammar); | |
let firstOfFollow = firstSets[followSymbol]; | |
follow.union(firstOfFollow); | |
// break if the FIRST does not contain 'ε' | |
if(!firstOfFollow.hasEpsilon()) break; | |
symbolIndex += 1; | |
} | |
} | |
} | |
}; | |
// propagate the FOLLOW sets using the graph previously build | |
// this employs a simple breath first search of the graph and | |
// unions the sets. | |
const propagateFollow = (symbol, followSets, graph) => { | |
let visited = [symbol]; | |
let current = [symbol]; | |
while(current.length !== 0) { | |
let next = []; | |
for(let c of current) { | |
for(let n of graph.getLink(c)) { | |
if(!visited.includes(n)) { | |
visited.push(n); | |
next.push(n); | |
followSets[symbol].union(followSets[n]); | |
} | |
} | |
} | |
current = next; | |
} | |
}; | |
// first build the follow sets and graph, the propagate the follow | |
// set using the graph. | |
const buildFollow = (grammar, firstSets, startingSymbol) => { | |
let followSets = {}; | |
let graph = new Graph; | |
for(let prod of grammar) { | |
generateFollow(getLHS(prod), followSets, firstSets, graph, grammar); | |
} | |
// add the end of input to the starting symbol | |
followSets[startingSymbol].addTerminal('$'); | |
for(let prod of grammar) { | |
propagateFollow(getLHS(prod), followSets, graph); | |
} | |
return followSets; | |
}; | |
const printGrammar = (grammar) => { | |
for(let k of grammar) { | |
console.log(' ', k); | |
} | |
}; | |
const printSet = (set) => { | |
for(let k in set) { | |
console.log(' ', k, ':', set[k].asArray()); | |
} | |
}; | |
const buildAndPrint = (grammar, startingSymbol) => { | |
let firstSets = buildFirst(grammar); | |
let followSets = buildFollow(grammar, firstSets, startingSymbol); | |
console.log('Grammar:\n'); | |
printGrammar(grammar); | |
console.log(''); | |
console.log('First sets:\n'); | |
printSet(firstSets); | |
console.log(''); | |
console.log('Follow sets:\n'); | |
printSet(followSets); | |
console.log(''); | |
} | |
let grammar = [ | |
'S -> F', | |
'S -> (S + F)', | |
'F -> a' | |
]; | |
buildAndPrint(grammar, 'S'); | |
grammar = [ | |
'E -> TX', | |
'X -> +TX', | |
'X -> ε', | |
'T -> FY', | |
'Y -> *FY', | |
'Y -> ε', | |
'F -> a', | |
'F -> (E)', | |
]; | |
buildAndPrint(grammar, 'E'); | |
grammar = [ | |
'S -> AB', | |
'A -> ε', | |
'B -> ε' | |
]; | |
buildAndPrint(grammar, 'S'); | |
grammar = [ | |
'E -> aT', | |
'E -> ε', | |
'T -> bE', | |
'T -> ε', | |
'S -> Ec' | |
]; | |
buildAndPrint(grammar, 'S'); | |
grammar = [ | |
'E -> EE', | |
'E -> a' | |
]; | |
buildAndPrint(grammar, 'E'); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment