Skip to content

Instantly share code, notes, and snippets.

@wildthink
Last active April 19, 2020 04:12
Show Gist options
  • Save wildthink/4b58eaf4caa7935e5872469972a144d1 to your computer and use it in GitHub Desktop.
Save wildthink/4b58eaf4caa7935e5872469972a144d1 to your computer and use it in GitHub Desktop.
Simple Tokenizer w/ read() option to create Arrays of tokens delimited by parans, brackets, and braces.
//
// Tokenizer.swift
//
// Created by Jason Jobe on 4/15/20.
// Copyright © 2020 Jason Jobe. All rights reserved.
//
public struct Tokenizer {
public struct ReadError: Error {
var str: String
var pos: Int
var ch: Character
}
let scanr: Scanner
let dels = CharacterSet(charactersIn: "{}[]():|,~=+-<>!")
let ops = CharacterSet(charactersIn: ":|,~=+-<>!")
init (_ str: String) {
scanr = Scanner(string: str)
scanr.charactersToBeSkipped = CharacterSet(charactersIn: " ")
}
func next() -> Any? {
while !scanr.isAtEnd {
guard let ch = scanr.peekChar else { continue }
switch ch {
case "{", "}", "(", ")", "[", "]":
return scanr.scanCharacter()
default:
if ch.isNumber, let d = scanr.scanDouble() {
return NSNumber(value: d)
}
}
if let str = scanr.scanUpToCharacters(from: dels) {
return str
} else if let str = scanr.scanCharacters(from: ops) {
return str
}
}
return nil
}
func ender(_ ch: Character) -> Character? {
if ch == "(" { return ")" }
if ch == "{" { return "}" }
if ch == "[" { return "]" }
return nil
}
func read() throws -> Any? {
let it = next()
if let ch = it as? Character {
switch ch {
case "{", "(", "[":
var list: [Any]? = []
let str = scanr.string
let cur = str.index(before: scanr.currentIndex)
while let r = try read() {
if (r as? Character) == ender(ch) {
return list
} else {
list?.append(r)
}
}
// Error: Unclosed Collection. Gather some context
// to help identify the source of the error
let pos: Int = str.distance(from: str.startIndex, to: cur)
var count = str.distance(from: cur, to: scanr.currentIndex)
if count > 10 { count = 10 }
let end = str.index(cur, offsetBy: count)
let ers = scanr.string[cur..<end]
throw ReadError(str: String(ers), pos: pos, ch: ch)
case "}", ")", "]":
break
default:
break
}
}
return it
}
}
public extension Scanner {
var peekChar: Character? {
guard !isAtEnd else { return nil }
let end = string.endIndex
var ndx = currentIndex
while ndx < end {
let ch = string[ndx]
if let skips = self.charactersToBeSkipped,
ch.unicodeScalars.contains(where: { !skips.contains($0)}) {
return ch
}
ndx = string.index(after: ndx)
}
return nil
}
var rawPeekChar: Character? {
isAtEnd ? nil : string[currentIndex]
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment