Last active
May 23, 2016 04:28
-
-
Save vy-let/7a9814761bbbc64769f8850ecb589346 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import Foundation | |
// | |
// Parsley | |
// a little sprig of a parser | |
// | |
// Violet Baddley | |
// | |
// Written for Swift 2.2 | |
// I don't yet expect all of this to work, | |
// but at the bottom it parses whitespace | |
// including comments. | |
// | |
enum ASTKind { | |
case Generic | |
case Terminal | |
case Sequence | |
case Nil | |
case Whitespace | |
case Commentary | |
} | |
enum ParsError: ErrorType { | |
case EmptyInput(origin: String) | |
case NoMatch(origin: String) | |
} | |
// Lovely little Ruby goodness: | |
func * (lhs: String, rhs: Int) -> String { | |
var produce = "" | |
produce.reserveCapacity(lhs.utf8.count * rhs) | |
for _ in 0..<rhs { | |
produce.appendContentsOf(lhs) | |
} | |
return produce | |
} | |
class ParsleyAST { | |
let kind: ASTKind | |
let text: String | |
let parentOffset: Int | |
let children: [ParsleyAST] | |
required init(kind: ASTKind, children: [ParsleyAST], text: String, parentOffset: Int) { | |
self.kind = kind | |
self.text = text | |
self.parentOffset = parentOffset | |
self.children = children | |
} | |
convenience init(children: [ParsleyAST], text: String, parentOffset: Int) { | |
self.init(kind: .Generic, children: children, text: text, parentOffset: parentOffset) | |
} | |
convenience init(children: [ParsleyAST], text: String) { | |
self.init(kind: .Generic, children: children, text: text, parentOffset: 0) | |
} | |
convenience init() { | |
self.init(kind: .Nil, children: [], text: "", parentOffset: 0) | |
} | |
func ofKind(imposed: ASTKind) -> Self { | |
return self.dynamicType.init( kind: imposed, | |
children: self.children, | |
text: self.text, | |
parentOffset: self.parentOffset ) | |
} | |
func sortaYAML(indentationLevel: Int = 0) -> String { | |
let indentation = " " * indentationLevel | |
let childrenMap = children.map { child in child.sortaYAML(indentationLevel + 1) } | |
.joinWithSeparator("\n") | |
switch kind { | |
case .Sequence: | |
return indentation + "Sequence:\n\(childrenMap)" | |
case .Nil: | |
return "" | |
case .Whitespace: | |
let iffyChildren = childrenMap.isEmpty ? "" : ":\n\(childrenMap)" | |
return indentation + "Whitespace (\(text.unicodeScalars.count) chars)\(iffyChildren)" | |
default: | |
let iffyChildren = childrenMap.isEmpty ? "" : ":\n\(childrenMap)" | |
return indentation + "\(kind) “\(text)”\(iffyChildren)" | |
} | |
} | |
} | |
protocol Parsley { | |
func pars(text: String, parentOffset: Int) throws -> ParsleyAST | |
func ofKind(imposed: ASTKind) -> Self | |
var kind: ASTKind { get } | |
} | |
class ParsleyTerminal: Parsley { | |
let sigChars: String.UnicodeScalarView | |
let inverse: Bool | |
var positive: Bool { return !inverse } | |
let kind: ASTKind | |
required init(with: String, kind: ASTKind = .Terminal) { | |
sigChars = with.unicodeScalars | |
inverse = false | |
self.kind = kind | |
} | |
required init(excepting: String, kind: ASTKind = .Terminal) { | |
sigChars = excepting.unicodeScalars | |
inverse = true | |
self.kind = kind | |
} | |
func ofKind(imposed: ASTKind) -> Self { | |
if inverse { | |
return self.dynamicType.init(excepting: String(sigChars), kind: imposed) | |
} else { | |
return self.dynamicType.init(with: String(sigChars), kind: imposed) | |
} | |
} | |
func not() -> Self { | |
if inverse { | |
return self.dynamicType.init(with: String(sigChars), kind: self.kind) | |
} else { | |
return self.dynamicType.init(excepting: String(sigChars), kind: self.kind) | |
} | |
} | |
func union(other: ParsleyTerminal) -> Self { | |
// The logical union of the character sets. Any character included | |
// in either operand will be included in the result. | |
let resultantKind = (other.kind == self.kind) ? self.kind : .Terminal | |
if self.positive && other.positive { | |
let combinedChars = String(self.sigChars) + String(other.sigChars) | |
return self.dynamicType.init(with: combinedChars, kind: resultantKind) | |
} else if self.inverse && other.inverse { | |
// Keep only the mutual exclusions: | |
var mutualChars = "" | |
for potential in other.sigChars { | |
if self.sigChars.contains(potential) { | |
mutualChars.append(potential) | |
} | |
} | |
return self.dynamicType.init(excepting: mutualChars, kind: resultantKind) | |
} else /* one positive and other inverse */ { | |
var excludedChars: String | |
let includedChars: String.UnicodeScalarView | |
if self.positive { | |
excludedChars = String(other.sigChars) | |
includedChars = self.sigChars | |
} else { // other is positive | |
excludedChars = String(self.sigChars) | |
includedChars = other.sigChars | |
} | |
// Same as the last one, but with roles reversed: | |
for includedChar in includedChars { | |
excludedChars = excludedChars.stringByReplacingOccurrencesOfString(String(includedChar), | |
withString: "") | |
} | |
return self.dynamicType.init(excepting: excludedChars, kind: resultantKind) | |
} | |
} | |
func intersect(other: ParsleyTerminal) -> Self { | |
// Invert both operands, union them, and invert the result back: | |
return self.not().union( other.not() ).not() | |
} | |
func pars(text: String, parentOffset: Int) throws -> ParsleyAST { | |
let text_chars = text.unicodeScalars | |
guard !text_chars.isEmpty | |
else { throw ParsError.EmptyInput(origin: "in Terminal \(inverse ? "excepting" : "with") \"\(String(sigChars))\"") } | |
// Surely there has to be a better way of doing this... | |
var match = "" | |
for text_char in text_chars { | |
if charMatch(text_char) { | |
match.append(text_char) | |
} else { | |
break | |
} | |
} | |
// Ensure we found something! This is different from the guard- | |
// check above, in that we need to distinguish no-input from no-match. | |
guard !match.isEmpty | |
else { throw ParsError.NoMatch(origin: "in Terminal \(inverse ? "excepting" : "with") \"\(String(sigChars))\"") } | |
return ParsleyAST( kind: kind, | |
children: [], | |
text: match, | |
parentOffset: parentOffset ) | |
} | |
private func charMatch(text_char: UnicodeScalar) -> Bool { | |
let is_match = sigChars.contains(text_char) | |
return is_match != inverse // logical xor | |
} | |
} | |
class ParsleySeq: Parsley { | |
let seq: [Parsley] | |
let kind: ASTKind | |
required init(fromArray: [Parsley], kind: ASTKind = .Sequence) { | |
self.seq = fromArray | |
self.kind = kind | |
} | |
convenience init(_ seq: Parsley..., kind: ASTKind = .Sequence) { | |
self.init(fromArray: seq, kind: kind) | |
} | |
func ofKind(imposed: ASTKind) -> Self { | |
return self.dynamicType.init(fromArray: self.seq, kind: imposed) | |
} | |
func pars(text: String, parentOffset: Int) throws -> ParsleyAST { | |
let textChars = text.unicodeScalars | |
var runningOffset = 0 | |
let initial: [ParsleyAST] = [] | |
let children = try seq.reduce(initial) { (memo, parsley) in | |
let textRest = String(textChars.dropFirst(runningOffset)) | |
let match = try parsley.pars(textRest, parentOffset: runningOffset) | |
runningOffset = match.parentOffset + match.text.unicodeScalars.count | |
return memo + [ match ] | |
} | |
let matchedText = String(text.unicodeScalars.prefix(runningOffset)) | |
return ParsleyAST( kind: kind, | |
children: children, | |
text: matchedText, | |
parentOffset: parentOffset ) | |
} | |
} | |
class ParsleyRun: Parsley { | |
let reps: Parsley | |
let kind: ASTKind | |
required init(_ reps: Parsley, kind: ASTKind = .Sequence) { | |
self.reps = reps | |
self.kind = kind | |
} | |
func ofKind(imposed: ASTKind) -> Self { | |
return self.dynamicType.init(reps, kind: imposed) | |
} | |
func pars(text: String, parentOffset: Int) throws -> ParsleyAST { | |
let textChars = text.unicodeScalars | |
var matches: [ParsleyAST] = [] | |
var runningOffset = 0 | |
while true { | |
do { | |
let textRest = String(textChars.dropFirst(runningOffset)) | |
let match = try reps.pars(textRest, parentOffset: runningOffset) | |
matches.append(match) | |
runningOffset = match.parentOffset + match.text.unicodeScalars.count | |
} catch ParsError.NoMatch { | |
break | |
} catch ParsError.EmptyInput { | |
// Woooot! We parsed the entire input! | |
break | |
} | |
} | |
// A run requires at least one match. | |
guard matches.count > 0 | |
else { throw ParsError.NoMatch(origin: "in Run of \(reps) -> \(kind)") } | |
let matchedText = String(text.unicodeScalars.prefix(runningOffset)) | |
return ParsleyAST(kind: kind, children: matches, text: matchedText, parentOffset: parentOffset) | |
} | |
} | |
class ParsleyHopper: Parsley { | |
let seq: [Parsley] | |
let kind: ASTKind | |
required init(fromArray: [Parsley], imposedKind: ASTKind = .Nil) { | |
assert(fromArray.count > 0, "A Hopper must have at least one alternative to try.") | |
self.seq = fromArray | |
self.kind = imposedKind | |
} | |
convenience init(_ seq: Parsley...) { | |
self.init(fromArray: seq) | |
} | |
func ofKind(imposed: ASTKind) -> Self { | |
return self.dynamicType.init(fromArray: seq, imposedKind: imposed) | |
} | |
func pars(text: String, parentOffset: Int) throws -> ParsleyAST { | |
var firstMatch: ParsleyAST? = nil | |
for hop in seq { | |
do { | |
firstMatch = try hop.pars(text, parentOffset: parentOffset) | |
break | |
} catch ParsError.NoMatch { | |
// Carry on | |
} | |
} | |
// If none of them stuck, propagate the no-match: | |
guard firstMatch != nil | |
else { throw ParsError.NoMatch(origin: "in Hopper of \(seq)") } | |
if self.kind != .Nil { | |
// We have an explicit kind to impose upon the resulting AST: | |
firstMatch = firstMatch!.ofKind(self.kind) | |
} | |
return firstMatch! | |
} | |
} | |
class ParsleyOptional: Parsley { | |
let sub: Parsley | |
let kind: ASTKind | |
required init(_ sub: Parsley) { | |
self.sub = sub | |
self.kind = sub.kind | |
} | |
func ofKind(imposed: ASTKind) -> Self { | |
// Instead of trying to impose upon output, as the Hopper does, | |
// we'll just impose the kind on the wrapped sprig: | |
return self.dynamicType.init( sub.ofKind(imposed) ) | |
} | |
func pars(text: String, parentOffset: Int) throws -> ParsleyAST { | |
do { | |
return try sub.pars(text, parentOffset: parentOffset) | |
} catch ParsError.NoMatch { | |
return ParsleyAST() // nil AST | |
} | |
} | |
} | |
postfix operator ... {} | |
postfix func ... (reps: Parsley) -> ParsleyRun { | |
return ParsleyRun(reps) | |
} | |
func + (lhs: Parsley, rhs: Parsley) -> ParsleySeq { | |
let lhs_elements, rhs_elements: [Parsley] | |
if let lhs_seq = lhs as? ParsleySeq { | |
lhs_elements = lhs_seq.seq | |
} else { | |
lhs_elements = [lhs] | |
} | |
if let rhs_seq = rhs as? ParsleySeq { | |
rhs_elements = rhs_seq.seq | |
} else { | |
rhs_elements = [rhs] | |
} | |
return ParsleySeq(fromArray: lhs_elements + rhs_elements) | |
} | |
func || (lhs: Parsley, rhs: Parsley) -> ParsleyHopper { | |
let lhs_elements, rhs_elements: [Parsley] | |
if let lhs_hopper = lhs as? ParsleyHopper { | |
lhs_elements = lhs_hopper.seq | |
} else { | |
lhs_elements = [lhs] | |
} | |
if let rhs_hopper = rhs as? ParsleyHopper { | |
rhs_elements = rhs_hopper.seq | |
} else { | |
rhs_elements = [rhs] | |
} | |
return ParsleyHopper(fromArray: lhs_elements + rhs_elements) | |
} | |
prefix func ! (term: ParsleyTerminal) -> ParsleyTerminal { | |
return term.not() | |
} | |
prefix operator ~ {} | |
prefix func ~ (terminalChars: String) -> ParsleyTerminal { | |
return ParsleyTerminal(with: terminalChars) | |
} | |
prefix operator ~! {} | |
prefix func ~! (nonterminalChars: String) -> ParsleyTerminal { | |
return ParsleyTerminal(excepting: nonterminalChars) | |
} | |
// Lower precedence than logical or; higher than ternary conditional: | |
infix operator -- { associativity left precedence 105 } | |
func -- <ParsleyType: Parsley>(lhs: ParsleyType, rhs: ASTKind) -> ParsleyType { | |
return lhs.ofKind(rhs) | |
} | |
let literalWhitespace = ParsleyTerminal(with: " \n\r\t" + | |
"\u{1680}\u{2000}\u{2001}\u{2002}\u{2003}\u{2004}\u{2005}\u{2006}\u{2007}" + | |
"\u{2008}\u{2009}\u{200a}\u{202f}\u{205f}\u{3000}") -- .Whitespace | |
let commentStarter = ~";" -- .Commentary | |
let eol = ~"\n" -- .Whitespace | |
let commentBody = !eol -- .Commentary | |
let commentary = commentStarter + commentBody -- .Commentary | |
let ws = (literalWhitespace || commentary)... -- .Whitespace | |
let inputs = "" + | |
" ; placeholder-words => “hoge fuga piyo”: reversed." + | |
"\n ; more commentary \n But then you have some stuff which is not commentary or whitespace." | |
do { | |
let ws_match = try ws.pars(inputs, parentOffset: 0) | |
print(ws_match.sortaYAML()) | |
} catch let to_err as ParsError { | |
print("Oh noes!!", to_err) | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment