Last active
February 24, 2018 18:10
-
-
Save alejandro-isaza/7b5b9b2bafe5052828f6 to your computer and use it in GitHub Desktop.
Swift Character and String extensions for parsing.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import Foundation | |
public extension Character { | |
/// Determine if the character is a space, tab or newline | |
public func isSpace() -> Bool { | |
return self == " " || self == "\t" || self == "\n" || self == "\r" | |
} | |
/// Conver the character to a UTF16 code unit | |
public var utf16: UInt16 { | |
get { | |
let s = String(self).utf16 | |
return s[s.startIndex] | |
} | |
} | |
/// Convert the character to lowercase | |
public var lowercaseCharacter: Character { | |
get { | |
let s = String(self).lowercaseString | |
return s[s.startIndex] | |
} | |
} | |
/// Convert the character to uppercase | |
public var uppercaseCharacter: Character { | |
get { | |
let s = String(self).uppercaseString | |
return s[s.startIndex] | |
} | |
} | |
/// If the character is a decimal digit return its integer value, otherwise return nil | |
internal func decimalDigitValue() -> Int? { | |
let zero = 0x30 | |
let nine = 0x39 | |
let value = Int(self.utf16) | |
switch (value) { | |
case zero...nine: | |
return value - zero | |
default: | |
return nil | |
} | |
} | |
/// If the character is a hexadecimal digit return its integer value, otherwise return nil | |
internal func hexadecimalDigitValue() -> Int? { | |
let zero = 0x30 | |
let nine = 0x39 | |
let lowerA = 0x61 | |
let lowerF = 0x66 | |
let upperA = 0x41 | |
let upperF = 0x46 | |
let value = Int(self.utf16) | |
switch (value) { | |
case zero...nine: | |
return value - zero | |
case lowerA...lowerF: | |
return value - lowerA + 10 | |
case upperA...upperF: | |
return value - upperA + 10 | |
default: | |
return nil | |
} | |
} | |
} | |
public extension String { | |
/** | |
Skip whitespace characters. Updates the range and returns true is characters were skipped. | |
*/ | |
public func skipWhitespace(inout range: Range<Index>) -> Bool { | |
var skipped = false | |
while range.startIndex != range.endIndex { | |
if !self[range.startIndex].isSpace() { | |
return skipped | |
} | |
skipped = true | |
range.startIndex = range.startIndex.successor() | |
} | |
return skipped | |
} | |
/** | |
Skip a particular character. Updates the range and returns true if the character was found. | |
*/ | |
public func skipCharacter(inout range: Range<Index>, skip: Character) -> Bool { | |
if range.startIndex != range.endIndex && self[range.startIndex] == skip { | |
range.startIndex = range.startIndex.successor() | |
return true | |
} | |
return false | |
} | |
/** | |
Skip a particular string. Updates the range and returns true if the string was found. | |
*/ | |
public func skipString(inout range: Range<Index>, skip: String) -> Bool { | |
let originalRange = range | |
var skipIndex = skip.startIndex | |
while range.startIndex != range.endIndex && skipIndex != skip.endIndex { | |
if self[range.startIndex] != skip[skipIndex] { | |
range = originalRange | |
return false | |
} | |
range.startIndex = range.startIndex.successor() | |
skipIndex = skipIndex.successor() | |
} | |
return true | |
} | |
/** | |
Skip a particular string ignoring case. Updates the range and returns true if the string was found. | |
*/ | |
public func skipCaseInsensitiveString(inout range: Range<Index>, skip: String) -> Bool { | |
let originalRange = range | |
var skipIndex = skip.startIndex | |
while range.startIndex != range.endIndex && skipIndex != skip.endIndex { | |
if self[range.startIndex].lowercaseCharacter != skip[skipIndex].lowercaseCharacter { | |
range = originalRange | |
return false | |
} | |
range.startIndex = range.startIndex.successor() | |
skipIndex = skipIndex.successor() | |
} | |
return true | |
} | |
} | |
public extension String { | |
/** | |
Collect characters into a string until a whitespace character is found. | |
*/ | |
public func collectWord() -> String { | |
var range = startIndex..<endIndex | |
return collectWord(&range) | |
} | |
/** | |
Collect characters into a string until a whitespace character is found. Updates the range and returns the | |
collected string. | |
*/ | |
public func collectWord(inout range: Range<Index>) -> String { | |
var word = String() | |
while range.startIndex != range.endIndex && !self[range.startIndex].isSpace() { | |
word.append(self[range.startIndex]) | |
range.startIndex = range.startIndex.successor() | |
} | |
return word | |
} | |
/** | |
Collect characters into a string until a stop character is found or the end of the string is reached. | |
*/ | |
public func collect(#stop: Character) -> String { | |
var range = startIndex..<endIndex | |
return collect(&range, stop: stop) | |
} | |
/** | |
Collect characters into a string until a stop character is found or the end of the range is reached. Updates the | |
range and returns the collected string. | |
*/ | |
public func collect(inout range: Range<Index>, stop: Character) -> String { | |
var word = String() | |
while range.startIndex != range.endIndex && self[range.startIndex] != stop { | |
word.append(self[range.startIndex]) | |
range.startIndex = range.startIndex.successor() | |
} | |
return word | |
} | |
/** | |
Collect characters into a string until an of the stop characters is found. | |
*/ | |
public func collect(#stop: [Character]) -> String { | |
var range = startIndex..<endIndex | |
return collect(&range, stop: stop) | |
} | |
/** | |
Collect characters into a string until any of the stop characters is found. Updates the range and returns the | |
collected string. | |
*/ | |
public func collect(inout range: Range<Index>, stop: [Character]) -> String { | |
var word = String() | |
while range.startIndex != range.endIndex && !contains(stop, self[range.startIndex]) { | |
word.append(self[range.startIndex]) | |
range.startIndex = range.startIndex.successor() | |
} | |
return word | |
} | |
} | |
public extension String { | |
/// Parse an integer value | |
public func parseInteger() -> Int? { | |
var range = startIndex..<endIndex | |
return parseInteger(&range) | |
} | |
/** | |
Parse an integer value. Return the index of the first character that is not part of the integer or `end`, and the | |
parsed value. | |
*/ | |
public func parseInteger(inout range: Range<Index>) -> Int? { | |
let originalRange = range | |
let sign = parseSign(&range) | |
let result = parseDigits(&range) | |
if let result = result { | |
return sign * result | |
} | |
range = originalRange | |
return nil | |
} | |
internal func parseSign(inout range: Range<Index>) -> Int { | |
if skipCharacter(&range, skip: "-") { | |
return -1; | |
} | |
skipCharacter(&range, skip: "+") | |
return 1; | |
} | |
internal func parseDigits(inout range: Range<Index>) -> Int? { | |
var result: Int? | |
while range.startIndex != range.endIndex { | |
if let value = self[range.startIndex].decimalDigitValue() { | |
if result == nil { | |
result = value | |
} else { | |
result = result! * 10 + value | |
} | |
range.startIndex = range.startIndex.successor() | |
} else { | |
break | |
} | |
} | |
return result | |
} | |
/// Parse an hexadecimal integer value. | |
public func parseHexadecimalInteger() -> Int? { | |
var range = startIndex..<endIndex | |
return parseHexadecimalInteger(&range) | |
} | |
/** | |
Parse an hexadecimal integer value. Return the index of the first character that is not part of the integer or | |
`end`, and the parsed value. | |
:start: The start index. | |
:end: The end index. | |
*/ | |
public func parseHexadecimalInteger(inout range: Range<Index>) -> Int? { | |
var sign = parseSign(&range) | |
var result = parseHexadecimalDigits(&range) | |
if let result = result { | |
return sign * result; | |
} | |
return nil | |
} | |
internal func parseHexadecimalDigits(inout range: Range<Index>) -> Int? { | |
var result: Int? | |
while range.startIndex != range.endIndex { | |
if let value = self[range.startIndex].hexadecimalDigitValue() { | |
if result == nil { | |
result = value | |
} else { | |
result = result! * 16 + value | |
} | |
range.startIndex = range.startIndex.successor() | |
} else { | |
break | |
} | |
} | |
return result | |
} | |
/// Parse a floating point value | |
public func parseFloat() -> Double? { | |
var range = startIndex..<endIndex | |
return parseFloat(&range) | |
} | |
/** | |
Parse a floating point value. Return the index of the first character that is not part of the floating point | |
number or `end`, and the parsed value. | |
:start: The start index. | |
:end: The end index. | |
*/ | |
public func parseFloat(inout range: Range<Index>) -> Double? { | |
var sign = parseSign(&range) | |
let integerPart = parseDigits(&range) | |
let hasDecimalPoint = skipCharacter(&range, skip :".") | |
var decimalPart: Double? | |
if hasDecimalPoint { | |
decimalPart = parseDecimalDigits(&range) | |
} | |
if integerPart == nil && (!hasDecimalPoint || decimalPart == nil) { | |
return nil | |
} | |
var exponent: Int? | |
if (skipCharacter(&range, skip :"e") || skipCharacter(&range, skip :"E")) { | |
exponent = parseInteger(&range) | |
if exponent == nil { | |
return nil | |
} | |
} | |
var result = 0.0 | |
if let v = integerPart { | |
result += Double(v) | |
} | |
if let v = decimalPart { | |
result += v | |
} | |
result *= Double(sign) | |
if let v = exponent { | |
result *= pow(10.0, Double(v)) | |
} | |
return result | |
} | |
internal func parseDecimalDigits(inout range: Range<Index>) -> Double? { | |
var denominator = 1.0 / 10.0 | |
var result: Double? | |
while range.startIndex != range.endIndex { | |
if let value = self[range.startIndex].decimalDigitValue() { | |
if result == nil { | |
result = Double(value) * denominator | |
} else { | |
result = result! + Double(value) * denominator | |
} | |
denominator /= 10.0 | |
range.startIndex = range.startIndex.successor() | |
} else { | |
break | |
} | |
} | |
return result | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import Foundation | |
import XCTest | |
import C4iOS | |
class StringParsingTests : XCTestCase { | |
func testSkipWhitespace() { | |
let testString = " \t\nabc\n" | |
var range = testString.startIndex..<testString.endIndex | |
let skipped = testString.skipWhitespace(&range) | |
XCTAssertTrue(skipped) | |
XCTAssertEqual(range.startIndex, advance(testString.startIndex, 4)) | |
} | |
func testSkipCharacter() { | |
let testString = "abc" | |
var range = testString.startIndex..<testString.endIndex | |
let skipped = testString.skipCharacter(&range, skip: "a") | |
XCTAssertTrue(skipped) | |
XCTAssertEqual(range.startIndex, advance(testString.startIndex, 1)) | |
} | |
func testSkipCharacterFail() { | |
let testString = "abc" | |
var range = testString.startIndex..<testString.endIndex | |
let skipped = testString.skipCharacter(&range, skip: "b") | |
XCTAssertFalse(skipped) | |
XCTAssertEqual(range.startIndex, testString.startIndex) | |
} | |
func testSkipString() { | |
let testString = "abc123" | |
var range = testString.startIndex..<testString.endIndex | |
let skipped = testString.skipString(&range, skip: "abc") | |
XCTAssertTrue(skipped) | |
XCTAssertEqual(range.startIndex, advance(testString.startIndex, 3)) | |
} | |
func testSkipStringFail() { | |
let testString = "abc123" | |
var range = testString.startIndex..<testString.endIndex | |
let skipped = testString.skipString(&range, skip: "123") | |
XCTAssertFalse(skipped) | |
XCTAssertEqual(range.startIndex, testString.startIndex) | |
} | |
func testCollect() { | |
let testString = "abc,123" | |
let output = testString.collect(stop: ",") | |
XCTAssertEqual(output, "abc") | |
} | |
func testCollectNoStop() { | |
let testString = "abc" | |
let output = testString.collect(stop: ",") | |
XCTAssertEqual(output, "abc") | |
} | |
func testParseInteger() { | |
let testString = "234"; | |
let value = testString.parseInteger() | |
XCTAssertNotNil(value) | |
XCTAssertEqual(value!, 234) | |
} | |
func testParseNegativeInteger() { | |
let testString = "-234" | |
let value = testString.parseInteger() | |
XCTAssertNotNil(value) | |
XCTAssertEqual(value!, -234) | |
} | |
func testParseIntegerFollowedByLetters() { | |
let testString = "321abc" | |
var range = testString.startIndex..<testString.endIndex | |
let value = testString.parseInteger(&range) | |
XCTAssertNotNil(value) | |
XCTAssertEqual(value!, 321) | |
XCTAssertEqual(range.startIndex, advance(testString.startIndex, 3)) | |
} | |
func testParseInvalidInteger() { | |
let testString = "-p23" | |
var range = testString.startIndex..<testString.endIndex | |
let value = testString.parseInteger(&range) | |
XCTAssertNil(value) | |
XCTAssertEqual(range.startIndex, testString.startIndex) | |
} | |
func testParseHexInteger() { | |
let testString = "E40b" | |
let value = testString.parseHexadecimalInteger() | |
XCTAssertNotNil(value) | |
XCTAssertEqual(value!, 0xE40b) | |
} | |
func testParseFloatDecimalOnly() { | |
let testString = ".902" | |
let value = testString.parseFloat() | |
XCTAssertNotNil(value) | |
XCTAssertEqualWithAccuracy(value!, 0.902, 0.001) | |
} | |
func testParseFloat() { | |
let testString = "345.024" | |
let value = testString.parseFloat() | |
XCTAssertNotNil(value) | |
XCTAssertEqualWithAccuracy(value!, 345.024, 0.001) | |
} | |
func testParseFloatInvalid() { | |
let testString = "-." | |
var range = testString.startIndex..<testString.endIndex | |
let value = testString.parseFloat() | |
XCTAssertNil(value) | |
XCTAssertEqual(range.startIndex, testString.startIndex) | |
} | |
func testParseFloatScientificNotation() { | |
let testString = "12e1" | |
let value = testString.parseFloat() | |
XCTAssertNotNil(value) | |
XCTAssertEqualWithAccuracy(value!, 12e1, 0.001) | |
} | |
func testParseFloatScientificNotationWithNegativeExponent() { | |
let testString = "345.24e-12"; | |
let value = testString.parseFloat() | |
XCTAssertNotNil(value) | |
XCTAssertEqualWithAccuracy(value!, 345.24e-12, 1e-14) | |
} | |
func testParseFloatScientificNotationWithNoDecimalPart() { | |
let testString = "43.e-12"; | |
let value = testString.parseFloat() | |
XCTAssertNotNil(value) | |
XCTAssertEqualWithAccuracy(value!, 43.0e-12, 1e-14) | |
} | |
func testParseFloatScientificNotationLarge() { | |
let testString = "1023.2382e35"; | |
let value = testString.parseFloat() | |
XCTAssertNotNil(value) | |
XCTAssertEqualWithAccuracy(value!, 1023.2382e35, 1e27) | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment