Created
October 10, 2022 17:44
-
-
Save VaslD/601447cc1cfe39de65c2f9dde1061230 to your computer and use it in GitHub Desktop.
Working with old style (ASCII) plists in Swift...
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import Foundation | |
import TabularData | |
/// OpenStep 格式 Property List 工具集。 | |
/// | |
/// OpenStep 格式 Property List 也称 ASCII Property List 或旧版 Property List。 | |
public enum OldStylePropertyList { | |
// MARK: Property List | |
/// 将 `String` (`NSString`) 转为 OpenStep Property List 输出格式。 | |
/// | |
/// 为保证与 Core Foundation 框架的兼容性,此方法使用最保守的 `NSString` 编码标准,非 ASCII 字符一律使用 UTF-16 | |
/// 转义。实际上新版系统和工具链能够直接处理绝大部分 Unicode 字符。 | |
/// | |
/// - Parameter string: 待转换字符串。 | |
/// - Returns: 用于输出 OpenStep Property List 的字符串。此返回值保证经过 ASCII 编码后写入的文件格式正确,不保证编程打印效果。 | |
public static func encode(_ string: String) -> String { | |
guard !string.isEmpty else { | |
return "\"\"" | |
} | |
var escaped = String() | |
var alphanumeric = true | |
for var character in string { | |
if character.isASCII, | |
character.isLetter || character.isNumber { | |
escaped.append(character) | |
continue | |
} | |
alphanumeric = false | |
if character.isNewline { | |
character = "\n" | |
} | |
escaped.append(character.unicodeScalars.map { | |
// " " → "\t" | |
// "你" → "\u{4F60}" | |
// "🐼" → "\u{0001F43C}" | |
let escaped = $0.escaped(asASCII: true) | |
guard let range = escaped.range(of: #"(?<=\{).+(?=\})"#, options: .regularExpression) else { | |
// [OK] " " → "\t" | |
return escaped | |
} | |
let hex = escaped[range] | |
guard hex.count <= 4 else { | |
/* https://pewpewthespells.com/blog/dangers_of_ascii_plists.html | |
* | |
* The panda emoji is not going to display from a NeXTSTEP plist because it doesn't know | |
* how to interpret escaped unicode characters that are longer than 4 hex digits. | |
*/ | |
/* https://developer.apple.com/documentation/foundation/nsstring | |
* | |
* An NSString object encodes a Unicode-compliant text string, | |
* represented as a sequence of UTF–16 code units. All lengths, character indexes, | |
* and ranges are expressed in terms of 16-bit platform-endian values, | |
* with index values starting at 0. | |
*/ | |
// [OK] "🐼" → "\u{0001F43C}" → "\u{D83D}\u{DC3C}" → "\UD83D\UDC3C" | |
return $0.utf16.map { | |
"\\U\(String(format: "%04X", $0))" | |
}.joined() | |
} | |
// [OK] "你" → "\u{4F60}" → "\U4F60" | |
return "\\U\(hex)" | |
}.joined()) | |
} | |
/* https://developer.apple.com/library/archive/documentation/Cocoa/Conceptual/PropertyLists/OldStylePlists/OldStylePLists.html | |
* | |
* The quotation marks can be omitted if the string is composed strictly of alphanumeric characters | |
* and contains no white space (numbers are handled as strings in property lists). | |
*/ | |
if alphanumeric { | |
return escaped | |
} | |
return "\"\(escaped)\"" | |
} | |
/// 将 `Data` (`NSData`) 转为 OpenStep Property List 输出格式。 | |
/// | |
/// `NSData` 的 LLDB 调试器表达使用此格式,给定相同数据理应与此方法返回值相同。 | |
/// | |
/// - Parameter data: 待转换二进制数据。 | |
/// - Returns: 用于输出 OpenStep Property List 的字符串。此返回值保证经过 ASCII 编码后写入的文件格式正确,不保证编程打印效果。 | |
public static func encode(_ data: Data) -> String { | |
let hex = sequence(state: data.startIndex) { start -> Data.SubSequence? in | |
let endIndex = data.endIndex | |
guard start < endIndex else { | |
return nil | |
} | |
let end = data.index(start, offsetBy: 4, limitedBy: endIndex) ?? endIndex | |
defer { start = end } | |
return data[start..<end] | |
}.map { subsequence -> String in | |
subsequence.map { element -> String in | |
String(format: "%02x", element) | |
}.joined() | |
}.joined(separator: " ") | |
/* https://developer.apple.com/library/archive/documentation/Cocoa/Conceptual/PropertyLists/OldStylePlists/OldStylePLists.html | |
* | |
* Binary data is enclosed in angle brackets and encoded in hexadecimal ASCII. Spaces are ignored. | |
*/ | |
return "<\(hex)>" | |
} | |
/// 将 `[Any]` (`NSArray`) 转为 OpenStep Property List 输出格式。 | |
/// | |
/// `Any` 只能是字符串、二进制数据、数组、字典之一。 | |
/// | |
/// 此方法使用人类可读(友好)输出模式,相比最精简模式添加了额外的换行和空格。 | |
/// | |
/// - Parameter array: 待转换数组。 | |
/// - Returns: 用于输出 OpenStep Property List 的字符串。此返回值保证经过 ASCII 编码后写入的文件格式正确,不保证编程打印效果。 | |
public static func encode(_ array: [Any]) throws -> String { | |
var elements = [String]() | |
for item in array { | |
try elements.append(Self.encode(item: item)) | |
} | |
elements = elements.map { | |
"\($0)," | |
}.joined(separator: "\n").split(separator: "\n").map { | |
" \($0)" | |
} | |
/* https://developer.apple.com/library/archive/documentation/Cocoa/Conceptual/PropertyLists/OldStylePlists/OldStylePLists.html | |
* | |
* An array is enclosed in parentheses, with the elements separated by commas. | |
*/ | |
return "(\n\(elements.joined(separator: "\n"))\n)" | |
} | |
/// 将 `[AnyHashable: Any]` (`NSDictionary`) 转为 OpenStep Property List 输出格式。 | |
/// | |
/// `AnyHashable` 和 `Any` 只能是字符串、二进制数据、数组、字典之一。 | |
/// | |
/// 此方法使用人类可读(友好)输出模式,相比最精简模式添加了额外的换行和空格。 | |
/// | |
/// - Parameter dictionary: 待转换字典。 | |
/// - Returns: 用于输出 OpenStep Property List 的字符串。此返回值保证经过 ASCII 编码后写入的文件格式正确,不保证编程打印效果。 | |
public static func encode(_ dictionary: [AnyHashable: Any]) throws -> String { | |
var pairs = [(String, String)]() | |
for (key, value) in dictionary { | |
try pairs.append((Self.encode(item: key), Self.encode(item: value))) | |
} | |
let elements = pairs.map { | |
"\($0.0) = \($0.1);" | |
}.joined(separator: "\n").split(separator: "\n").map { | |
" \($0)" | |
} | |
/* https://developer.apple.com/library/archive/documentation/Cocoa/Conceptual/PropertyLists/OldStylePlists/OldStylePLists.html | |
* | |
* A dictionary is enclosed in curly braces, and contains a list of keys with their values. | |
* Each key-value pair ends with a semicolon. | |
*/ | |
return "{\n\(elements.joined(separator: "\n"))\n}" | |
} | |
/// 将 `Any` (`NSObject`) 转为 OpenStep Property List 输出格式。 | |
/// | |
/// `Any` 只能是字符串、二进制数据、数组、字典之一。 | |
/// | |
/// 此方法使用人类可读(友好)输出模式,相比最精简模式添加了额外的换行和空格。 | |
/// | |
/// - Parameter item: 待转换对象。 | |
/// - Returns: 用于输出 OpenStep Property List 的字符串。此返回值保证经过 ASCII 编码后写入的文件格式正确,不保证编程打印效果。 | |
public static func encode(item: Any) throws -> String { | |
switch item { | |
case let string as String: | |
return Self.encode(string) | |
case let data as Data: | |
return Self.encode(data) | |
case let array as [Any]: | |
return try Self.encode(array) | |
case let dictionary as [AnyHashable: Any]: | |
return try Self.encode(dictionary) | |
default: | |
throw CocoaError(.propertyListWriteInvalid) | |
} | |
} | |
// MARK: Strings | |
/// 将键值对转为 `*.strings` 文件格式。 | |
/// | |
/// - Parameter pair: 字符串键值对。 | |
/// - Returns: 用于输出 `*.strings` 的字符串。此返回值保证经过 ASCII 编码后写入的文件格式正确,不保证编程打印效果。 | |
public static func encode(strings pair: (String, String)) -> String { | |
"\(Self.encode(pair.0)) = \(Self.encode(pair.1));" | |
} | |
/// 将字典转为 `*.strings` 文件格式。 | |
/// | |
/// - Parameter dictionary: 字符串字典。 | |
/// - Returns: 用于输出 `*.strings` 的字符串。此返回值保证经过 ASCII 编码后写入的文件格式正确,不保证编程打印效果。 | |
public static func encode(strings dictionary: [String: String]) -> String { | |
dictionary.map(Self.encode(strings:)).joined(separator: "\n") | |
} | |
/// 将键值对数组转为 `*.strings` 文件格式。 | |
/// | |
/// - Parameter pairs: 字符串键值对数组。 | |
/// - Returns: 用于输出 `*.strings` 的字符串。此返回值保证经过 ASCII 编码后写入的文件格式正确,不保证编程打印效果。 | |
public static func encode(strings pairs: [(String, String)]) -> String { | |
pairs.map(Self.encode(strings:)).joined(separator: "\n") | |
} | |
/// 将双列数据表转为 `*.strings` 文件格式。 | |
/// | |
/// - Parameter frame: TabularData 框架中的数据表。 | |
/// - Returns: 用于输出 `*.strings` 的字符串。此返回值保证经过 ASCII 编码后写入的文件格式正确,不保证编程打印效果。 | |
public static func encode(strings frame: DataFrame) -> String { | |
var pairs = [(String, String)]() | |
for row in frame.rows { | |
guard row.count == 2, let key = row[0] as? String, let value = row[1] as? String else { continue } | |
pairs.append((key, value)) | |
} | |
return Self.encode(strings: pairs) | |
} | |
} | |
// MARK: - Decoding | |
public extension OldStylePropertyList { | |
static func decode(strings data: Data) -> [(String, String)]? { | |
guard let lines = String(data: data, encoding: .ascii)?.split(whereSeparator: \.isNewline) else { | |
return nil | |
} | |
var pairs = [(String, String)]() | |
for line in lines { | |
let split = line.split(separator: "=") | |
guard split.count == 2 else { | |
return nil | |
} | |
var key = String(split[0]) | |
if let range = key.range(of: #"\s*(?<=\").*(?=(?<!\\)\")\s*"#, options: .regularExpression) { | |
key = String(key[range]) | |
} else if key.range(of: "\"") == nil { | |
key = key.trimmingCharacters(in: .whitespacesAndNewlines) | |
} else { | |
return nil | |
} | |
Self.unescape(&key) | |
var value = String(split[1]) | |
if let range = value.range(of: #"\s*(?<=\").*(?=(?<!\\)\")\s*"#, options: .regularExpression) { | |
value = String(value[range]) | |
} else if value.range(of: "\"") == nil { | |
value = value.trimmingCharacters(in: .whitespacesAndNewlines.union(CharacterSet(charactersIn: ";"))) | |
} else { | |
return nil | |
} | |
Self.unescape(&value) | |
pairs.append((Self.decode(key), Self.decode(value))) | |
} | |
return pairs | |
} | |
static func unescape(_ string: inout String) { | |
let cEscapes = [ | |
"\\n": "\n", | |
"\\r": "\r", | |
"\\t": "\t", | |
"\\0": "\0", | |
"\\\"": "\"", | |
"\\\\": "\\", | |
] | |
for (pattern, target) in cEscapes { | |
string = string.replacingOccurrences(of: pattern, with: target) | |
} | |
} | |
/// 将 UTF-16 转义的字符串解析为 `String`。 | |
/// | |
/// `CFPropertyList`, `PropertyListSerialization`, `PropertyListDecoder` 会自动解析 Property List | |
/// 中转义的字符串。此方法仅用于手动实现特殊功能。 | |
/// | |
/// - Parameter string: 经过转义的字符串。 | |
/// - Returns: 移除转义的字符串。 | |
static func decode(_ string: String) -> String { | |
var unescaped = string | |
let regex = try! NSRegularExpression(pattern: #"\\U([A-F0-9]{4})"#) | |
for match in regex.matches(in: unescaped, | |
range: NSRange(unescaped.startIndex..<unescaped.endIndex, | |
in: unescaped)).reversed() { | |
var range = Range<String.Index>(match.range(at: 1), in: unescaped)! | |
let scalar = UInt16(unescaped[range], radix: 16)! | |
// Surrogates 必须成对解码,第一遍扫描将可单独解码的移除转义 | |
guard !Unicode.UTF16.isSurrogate(scalar) else { | |
continue | |
} | |
let character = String(Unicode.Scalar(scalar)!) | |
range = Range<String.Index>(match.range, in: unescaped)! | |
unescaped.replaceSubrange(range, with: character) | |
} | |
// 重新扫描解码 surrogate pairs | |
return Self.decode(surrogates: unescaped) | |
} | |
static func decode(surrogates string: String) -> String { | |
var unescaped = string | |
let regex = try! NSRegularExpression(pattern: #"\\U([A-F0-9]{4})\\U([A-F0-9]{4})"#) | |
for match in regex.matches(in: unescaped, | |
range: NSRange(unescaped.startIndex..<unescaped.endIndex, | |
in: unescaped)).reversed() { | |
var range = Range<String.Index>(match.range(at: 1), in: unescaped)! | |
let high = UInt16(unescaped[range], radix: 16)! | |
range = Range<String.Index>(match.range(at: 2), in: unescaped)! | |
let low = UInt16(unescaped[range], radix: 16)! | |
// 保证每两个 surrogates 符合高低配对 | |
guard Unicode.UTF16.isLeadSurrogate(high), Unicode.UTF16.isTrailSurrogate(low) else { | |
continue | |
} | |
let character = String(Unicode.UTF16.decode(Unicode.UTF16.EncodedScalar([high, low]))) | |
range = Range<String.Index>(match.range, in: unescaped)! | |
unescaped.replaceSubrange(range, with: character) | |
} | |
return unescaped | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment