Last active
April 19, 2020 04:12
-
-
Save wildthink/4b58eaf4caa7935e5872469972a144d1 to your computer and use it in GitHub Desktop.
Simple Tokenizer w/ read() option to create Arrays of tokens delimited by parans, brackets, and braces.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// | |
// Tokenizer.swift | |
// | |
// Created by Jason Jobe on 4/15/20. | |
// Copyright © 2020 Jason Jobe. All rights reserved. | |
// | |
public struct Tokenizer { | |
public struct ReadError: Error { | |
var str: String | |
var pos: Int | |
var ch: Character | |
} | |
let scanr: Scanner | |
let dels = CharacterSet(charactersIn: "{}[]():|,~=+-<>!") | |
let ops = CharacterSet(charactersIn: ":|,~=+-<>!") | |
init (_ str: String) { | |
scanr = Scanner(string: str) | |
scanr.charactersToBeSkipped = CharacterSet(charactersIn: " ") | |
} | |
func next() -> Any? { | |
while !scanr.isAtEnd { | |
guard let ch = scanr.peekChar else { continue } | |
switch ch { | |
case "{", "}", "(", ")", "[", "]": | |
return scanr.scanCharacter() | |
default: | |
if ch.isNumber, let d = scanr.scanDouble() { | |
return NSNumber(value: d) | |
} | |
} | |
if let str = scanr.scanUpToCharacters(from: dels) { | |
return str | |
} else if let str = scanr.scanCharacters(from: ops) { | |
return str | |
} | |
} | |
return nil | |
} | |
func ender(_ ch: Character) -> Character? { | |
if ch == "(" { return ")" } | |
if ch == "{" { return "}" } | |
if ch == "[" { return "]" } | |
return nil | |
} | |
func read() throws -> Any? { | |
let it = next() | |
if let ch = it as? Character { | |
switch ch { | |
case "{", "(", "[": | |
var list: [Any]? = [] | |
let str = scanr.string | |
let cur = str.index(before: scanr.currentIndex) | |
while let r = try read() { | |
if (r as? Character) == ender(ch) { | |
return list | |
} else { | |
list?.append(r) | |
} | |
} | |
// Error: Unclosed Collection. Gather some context | |
// to help identify the source of the error | |
let pos: Int = str.distance(from: str.startIndex, to: cur) | |
var count = str.distance(from: cur, to: scanr.currentIndex) | |
if count > 10 { count = 10 } | |
let end = str.index(cur, offsetBy: count) | |
let ers = scanr.string[cur..<end] | |
throw ReadError(str: String(ers), pos: pos, ch: ch) | |
case "}", ")", "]": | |
break | |
default: | |
break | |
} | |
} | |
return it | |
} | |
} | |
public extension Scanner { | |
var peekChar: Character? { | |
guard !isAtEnd else { return nil } | |
let end = string.endIndex | |
var ndx = currentIndex | |
while ndx < end { | |
let ch = string[ndx] | |
if let skips = self.charactersToBeSkipped, | |
ch.unicodeScalars.contains(where: { !skips.contains($0)}) { | |
return ch | |
} | |
ndx = string.index(after: ndx) | |
} | |
return nil | |
} | |
var rawPeekChar: Character? { | |
isAtEnd ? nil : string[currentIndex] | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment