-
-
Save alemar11/030a2cbd7146a9fc777fec13363bd9cc to your computer and use it in GitHub Desktop.
Scanning Sequences
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import Foundation | |
// Alternatives to `Scanner` (before: `NSScanner`). | |
// A scanner only needs a way to peek and to move to the next token. | |
protocol ScannerProtocol { | |
associatedtype Token: Equatable | |
var peek: Token? { get } | |
mutating func moveToNextToken() | |
} | |
// Scanning sequences is nice, but possibly inefficient (and destructive!) | |
struct SequenceScanner<S> where S: Sequence { | |
typealias Element = S.Element | |
var peek: Element? | |
var iterator: S.Iterator | |
init(_ s: S) { | |
iterator = s.makeIterator() | |
peek = iterator.next() | |
} | |
} | |
// Some scanners also allow you to read (and set!) the location. | |
// This is useful for "undoing" a scan, or moving to a different location. | |
protocol RandomAccessScannerProtocol: ScannerProtocol { | |
associatedtype Location | |
var location: Location { get set } | |
} | |
// Collection scanners are a little bit more interesting | |
struct CollectionScanner<C>: RandomAccessScannerProtocol where C: Collection, C.Element: Equatable { | |
var location: C.Index // note: clients are allowed to *set* this as well | |
let collection: C | |
init(_ c: C) { | |
location = c.startIndex | |
collection = c | |
} | |
mutating func moveToNextToken() { | |
collection.formIndex(after: &location) | |
} | |
var peek: C.Element? { | |
guard location < collection.endIndex else { return nil } | |
return collection[location] | |
} | |
} | |
// We can define a whole bunch of methods on `ScannerProtocol` | |
extension ScannerProtocol { | |
mutating func scan() -> Token? { | |
defer { moveToNextToken() } | |
return peek | |
} | |
mutating func scan(_ literal: Token) -> Bool { | |
guard peek == literal else { | |
return false | |
} | |
moveToNextToken() | |
return true | |
} | |
mutating func scan(_ condition: (Token) -> Bool) -> Token? { | |
guard let p = peek, condition(p) else { | |
return nil | |
} | |
moveToNextToken() | |
return p | |
} | |
mutating func scanMany<Result>(_ condition: (Token) -> Bool, initial: Result, _ combine: (inout Result, Token) -> ()) -> Result { | |
var result = initial | |
while let p = peek, condition(p) { | |
combine(&result, p) | |
moveToNextToken() | |
} | |
return result | |
} | |
mutating func scanMany(_ condition: (Token) -> Bool) -> [Token] { | |
return scanMany(condition, initial: [], { $0.append($1) }) | |
} | |
mutating func scan(count: Int) -> [Token] { | |
var result: [Token] = [] | |
while result.count < count, let p = scan() { | |
result.append(p) | |
} | |
return result | |
} | |
var eof: Bool { | |
return peek == nil | |
} | |
} | |
// We can also define methods for Character scanners. | |
extension ScannerProtocol where Token == Character { | |
mutating func scanInt() -> Int? { | |
let digits = ("0" as Character)..."9" | |
let arr = scanMany { digits.contains($0) } | |
guard !arr.isEmpty else { return nil } | |
return Int(String(arr)) | |
} | |
} | |
// If we can "backtrack", we can also try scanning a prefix | |
extension RandomAccessScannerProtocol { | |
mutating func scan<S>(prefix: S) -> Bool where S: Sequence, S.Element == Token { | |
let startLocation = location | |
for token in prefix { | |
guard scan(token) else { | |
location = startLocation | |
return false | |
} | |
} | |
return true | |
} | |
} | |
// Scanning into a subsequence | |
extension CollectionScanner { | |
mutating func scan(count: C.IndexDistance) -> C.SubSequence? { | |
let endLocation: C.Index = collection.index(location, offsetBy: count) | |
guard endLocation <= collection.endIndex else { return nil } | |
defer { location = endLocation } | |
return collection[location..<endLocation] | |
} | |
} | |
// In the current version of Swift, data[i..j] is broken for i > 0. | |
extension CollectionScanner where C == Data { | |
mutating func scan(count: Int) -> Data? { | |
let endLocation: C.Index = collection.index(location, offsetBy: count) | |
guard endLocation <= collection.endIndex else { return nil } | |
defer { location = endLocation } | |
return collection.subdata(in: location..<endLocation) | |
} | |
// Scans two bytes into a little-endian encoded int16 | |
mutating func scanUInt16() -> UInt16? { | |
let data: Data? = scan(count: 2) | |
return data.map(UInt16.init) | |
} | |
} | |
// A convenience method | |
extension Character { | |
/// Only call this on UInt8's that you know are ASCII. | |
var asciiValue: UInt8 { | |
return UInt8(self.unicodeScalars.first!.value) | |
} | |
} | |
// Another convenience method | |
extension UInt16 { | |
init(littleEndian data: Data) { | |
assert(data.count == 2) | |
self = data.withUnsafeBytes { | |
return $0.pointee | |
} | |
} | |
} | |
// We can make `Byte` a random access collection! | |
struct Byte: RandomAccessCollection { | |
let value: UInt8 | |
init(_ value: UInt8) { self.value = value } | |
var startIndex: UInt8 { return 0 } | |
var endIndex: UInt8 { return 8 } | |
subscript(_ index: UInt8) -> Bool { | |
let mask: UInt8 = 128 >> index | |
return (mask & value) > 0 | |
} | |
} | |
// Now off to the real stuff: parsing a GIF header. First, we'll define the data structures | |
// From http://www.matthewflickinger.com/lab/whatsinagif/bits_and_bytes.asp | |
struct GIFHeader { | |
var width: UInt16 | |
var height: UInt16 | |
var globalColorTable: Bool | |
var colorResolution: (Bool,Bool,Bool) | |
var sort: Bool | |
var globalColorTableSizeRaw: (Bool,Bool,Bool) | |
var pixelAspectRatio: UInt16 | |
var globalColorTableSize: UInt8 { | |
var result: UInt8 = 0 | |
if globalColorTableSizeRaw.0 { | |
result |= 1 << 2 | |
} | |
if globalColorTableSizeRaw.1 { | |
result |= 1 << 1 | |
} | |
if globalColorTableSizeRaw.2 { | |
result |= 1 << 0 | |
} | |
return result | |
} | |
} | |
struct Color { | |
let r: UInt8 | |
let g: UInt8 | |
let b: UInt8 | |
} | |
// A convenience extension | |
extension ScannerProtocol where Token == UInt8 { | |
mutating func scanColor() -> Color? { | |
guard let r = scan(), | |
let g = scan(), | |
let b = scan() else { return nil } | |
return Color(r: r, g: g, b: b) | |
} | |
} | |
// Here is the parser: | |
struct GifParser { | |
var scanner: CollectionScanner<Data> | |
init(_ data: Data) { | |
scanner = CollectionScanner(data) | |
} | |
private mutating func scanHeader() -> GIFHeader? { | |
guard scanner.scan(prefix: "GIF".characters.map { $0.asciiValue }) else { return nil } | |
let version = String(scanner.scan(count: 3).map { Character(UnicodeScalar($0)) }) | |
guard version == "89a" else { return nil } | |
guard let width = scanner.scanUInt16(), | |
let height = scanner.scanUInt16(), | |
let packed = scanner.scan(), | |
let pixelAspectRatio = scanner.scanUInt16() | |
else { return nil } | |
let packedByte = Byte(packed) | |
return GIFHeader(width: width, height: height, globalColorTable: packedByte[0], colorResolution: (packedByte[1],packedByte[2],packedByte[3]), sort: packedByte[4], globalColorTableSizeRaw: (packedByte[5],packedByte[6],packedByte[7]), pixelAspectRatio: pixelAspectRatio) | |
} | |
private mutating func colorTable(globalColorTableSize: UInt8) -> [Color]? { | |
// numberOfColors = 2^(n+1) | |
let numberOfColors: UInt8 = 1 << (globalColorTableSize+1) | |
var result: [Color] = [] | |
for _ in 0..<numberOfColors { | |
guard let color = scanner.scanColor() else { return nil } | |
result.append(color) | |
} | |
return result | |
} | |
mutating func parse() -> (GIFHeader, [Color])? { | |
guard let header = scanHeader() else { return nil } | |
if header.globalColorTable { | |
guard let table = colorTable(globalColorTableSize: header.globalColorTableSize) else { return nil } | |
return (header, table) | |
} else { | |
return (header, []) | |
} | |
} | |
} | |
// http://www.matthewflickinger.com/lab/whatsinagif/images/sample_1.gif | |
let file = Bundle.main.url(forResource: "sample_1", withExtension: "gif")! | |
let data = try! Data(contentsOf: file) | |
var parser = GifParser(data) | |
dump(parser.parse()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
https://www.objc.io/blog/2019/02/05/a-scanner-alternative/