Skip to content

Instantly share code, notes, and snippets.

@VaslD
Created October 10, 2022 17:44
Show Gist options
  • Save VaslD/601447cc1cfe39de65c2f9dde1061230 to your computer and use it in GitHub Desktop.
Save VaslD/601447cc1cfe39de65c2f9dde1061230 to your computer and use it in GitHub Desktop.
Working with old style (ASCII) plists in Swift...
import Foundation
import TabularData
/// OpenStep 格式 Property List 工具集。
///
/// OpenStep 格式 Property List 也称 ASCII Property List 或旧版 Property List。
public enum OldStylePropertyList {
// MARK: Property List
/// 将 `String` (`NSString`) 转为 OpenStep Property List 输出格式。
///
/// 为保证与 Core Foundation 框架的兼容性,此方法使用最保守的 `NSString` 编码标准,非 ASCII 字符一律使用 UTF-16
/// 转义。实际上新版系统和工具链能够直接处理绝大部分 Unicode 字符。
///
/// - Parameter string: 待转换字符串。
/// - Returns: 用于输出 OpenStep Property List 的字符串。此返回值保证经过 ASCII 编码后写入的文件格式正确,不保证编程打印效果。
public static func encode(_ string: String) -> String {
guard !string.isEmpty else {
return "\"\""
}
var escaped = String()
var alphanumeric = true
for var character in string {
if character.isASCII,
character.isLetter || character.isNumber {
escaped.append(character)
continue
}
alphanumeric = false
if character.isNewline {
character = "\n"
}
escaped.append(character.unicodeScalars.map {
// " " → "\t"
// "你" → "\u{4F60}"
// "🐼" → "\u{0001F43C}"
let escaped = $0.escaped(asASCII: true)
guard let range = escaped.range(of: #"(?<=\{).+(?=\})"#, options: .regularExpression) else {
// [OK] " " → "\t"
return escaped
}
let hex = escaped[range]
guard hex.count <= 4 else {
/* https://pewpewthespells.com/blog/dangers_of_ascii_plists.html
*
* The panda emoji is not going to display from a NeXTSTEP plist because it doesn't know
* how to interpret escaped unicode characters that are longer than 4 hex digits.
*/
/* https://developer.apple.com/documentation/foundation/nsstring
*
* An NSString object encodes a Unicode-compliant text string,
* represented as a sequence of UTF–16 code units. All lengths, character indexes,
* and ranges are expressed in terms of 16-bit platform-endian values,
* with index values starting at 0.
*/
// [OK] "🐼" → "\u{0001F43C}" → "\u{D83D}\u{DC3C}" → "\UD83D\UDC3C"
return $0.utf16.map {
"\\U\(String(format: "%04X", $0))"
}.joined()
}
// [OK] "你" → "\u{4F60}" → "\U4F60"
return "\\U\(hex)"
}.joined())
}
/* https://developer.apple.com/library/archive/documentation/Cocoa/Conceptual/PropertyLists/OldStylePlists/OldStylePLists.html
*
* The quotation marks can be omitted if the string is composed strictly of alphanumeric characters
* and contains no white space (numbers are handled as strings in property lists).
*/
if alphanumeric {
return escaped
}
return "\"\(escaped)\""
}
/// 将 `Data` (`NSData`) 转为 OpenStep Property List 输出格式。
///
/// `NSData` 的 LLDB 调试器表达使用此格式,给定相同数据理应与此方法返回值相同。
///
/// - Parameter data: 待转换二进制数据。
/// - Returns: 用于输出 OpenStep Property List 的字符串。此返回值保证经过 ASCII 编码后写入的文件格式正确,不保证编程打印效果。
public static func encode(_ data: Data) -> String {
let hex = sequence(state: data.startIndex) { start -> Data.SubSequence? in
let endIndex = data.endIndex
guard start < endIndex else {
return nil
}
let end = data.index(start, offsetBy: 4, limitedBy: endIndex) ?? endIndex
defer { start = end }
return data[start..<end]
}.map { subsequence -> String in
subsequence.map { element -> String in
String(format: "%02x", element)
}.joined()
}.joined(separator: " ")
/* https://developer.apple.com/library/archive/documentation/Cocoa/Conceptual/PropertyLists/OldStylePlists/OldStylePLists.html
*
* Binary data is enclosed in angle brackets and encoded in hexadecimal ASCII. Spaces are ignored.
*/
return "<\(hex)>"
}
/// 将 `[Any]` (`NSArray`) 转为 OpenStep Property List 输出格式。
///
/// `Any` 只能是字符串、二进制数据、数组、字典之一。
///
/// 此方法使用人类可读(友好)输出模式,相比最精简模式添加了额外的换行和空格。
///
/// - Parameter array: 待转换数组。
/// - Returns: 用于输出 OpenStep Property List 的字符串。此返回值保证经过 ASCII 编码后写入的文件格式正确,不保证编程打印效果。
public static func encode(_ array: [Any]) throws -> String {
var elements = [String]()
for item in array {
try elements.append(Self.encode(item: item))
}
elements = elements.map {
"\($0),"
}.joined(separator: "\n").split(separator: "\n").map {
" \($0)"
}
/* https://developer.apple.com/library/archive/documentation/Cocoa/Conceptual/PropertyLists/OldStylePlists/OldStylePLists.html
*
* An array is enclosed in parentheses, with the elements separated by commas.
*/
return "(\n\(elements.joined(separator: "\n"))\n)"
}
/// 将 `[AnyHashable: Any]` (`NSDictionary`) 转为 OpenStep Property List 输出格式。
///
/// `AnyHashable` 和 `Any` 只能是字符串、二进制数据、数组、字典之一。
///
/// 此方法使用人类可读(友好)输出模式,相比最精简模式添加了额外的换行和空格。
///
/// - Parameter dictionary: 待转换字典。
/// - Returns: 用于输出 OpenStep Property List 的字符串。此返回值保证经过 ASCII 编码后写入的文件格式正确,不保证编程打印效果。
public static func encode(_ dictionary: [AnyHashable: Any]) throws -> String {
var pairs = [(String, String)]()
for (key, value) in dictionary {
try pairs.append((Self.encode(item: key), Self.encode(item: value)))
}
let elements = pairs.map {
"\($0.0) = \($0.1);"
}.joined(separator: "\n").split(separator: "\n").map {
" \($0)"
}
/* https://developer.apple.com/library/archive/documentation/Cocoa/Conceptual/PropertyLists/OldStylePlists/OldStylePLists.html
*
* A dictionary is enclosed in curly braces, and contains a list of keys with their values.
* Each key-value pair ends with a semicolon.
*/
return "{\n\(elements.joined(separator: "\n"))\n}"
}
/// 将 `Any` (`NSObject`) 转为 OpenStep Property List 输出格式。
///
/// `Any` 只能是字符串、二进制数据、数组、字典之一。
///
/// 此方法使用人类可读(友好)输出模式,相比最精简模式添加了额外的换行和空格。
///
/// - Parameter item: 待转换对象。
/// - Returns: 用于输出 OpenStep Property List 的字符串。此返回值保证经过 ASCII 编码后写入的文件格式正确,不保证编程打印效果。
public static func encode(item: Any) throws -> String {
switch item {
case let string as String:
return Self.encode(string)
case let data as Data:
return Self.encode(data)
case let array as [Any]:
return try Self.encode(array)
case let dictionary as [AnyHashable: Any]:
return try Self.encode(dictionary)
default:
throw CocoaError(.propertyListWriteInvalid)
}
}
// MARK: Strings
/// 将键值对转为 `*.strings` 文件格式。
///
/// - Parameter pair: 字符串键值对。
/// - Returns: 用于输出 `*.strings` 的字符串。此返回值保证经过 ASCII 编码后写入的文件格式正确,不保证编程打印效果。
public static func encode(strings pair: (String, String)) -> String {
"\(Self.encode(pair.0)) = \(Self.encode(pair.1));"
}
/// 将字典转为 `*.strings` 文件格式。
///
/// - Parameter dictionary: 字符串字典。
/// - Returns: 用于输出 `*.strings` 的字符串。此返回值保证经过 ASCII 编码后写入的文件格式正确,不保证编程打印效果。
public static func encode(strings dictionary: [String: String]) -> String {
dictionary.map(Self.encode(strings:)).joined(separator: "\n")
}
/// 将键值对数组转为 `*.strings` 文件格式。
///
/// - Parameter pairs: 字符串键值对数组。
/// - Returns: 用于输出 `*.strings` 的字符串。此返回值保证经过 ASCII 编码后写入的文件格式正确,不保证编程打印效果。
public static func encode(strings pairs: [(String, String)]) -> String {
pairs.map(Self.encode(strings:)).joined(separator: "\n")
}
/// 将双列数据表转为 `*.strings` 文件格式。
///
/// - Parameter frame: TabularData 框架中的数据表。
/// - Returns: 用于输出 `*.strings` 的字符串。此返回值保证经过 ASCII 编码后写入的文件格式正确,不保证编程打印效果。
public static func encode(strings frame: DataFrame) -> String {
var pairs = [(String, String)]()
for row in frame.rows {
guard row.count == 2, let key = row[0] as? String, let value = row[1] as? String else { continue }
pairs.append((key, value))
}
return Self.encode(strings: pairs)
}
}
// MARK: - Decoding
public extension OldStylePropertyList {
static func decode(strings data: Data) -> [(String, String)]? {
guard let lines = String(data: data, encoding: .ascii)?.split(whereSeparator: \.isNewline) else {
return nil
}
var pairs = [(String, String)]()
for line in lines {
let split = line.split(separator: "=")
guard split.count == 2 else {
return nil
}
var key = String(split[0])
if let range = key.range(of: #"\s*(?<=\").*(?=(?<!\\)\")\s*"#, options: .regularExpression) {
key = String(key[range])
} else if key.range(of: "\"") == nil {
key = key.trimmingCharacters(in: .whitespacesAndNewlines)
} else {
return nil
}
Self.unescape(&key)
var value = String(split[1])
if let range = value.range(of: #"\s*(?<=\").*(?=(?<!\\)\")\s*"#, options: .regularExpression) {
value = String(value[range])
} else if value.range(of: "\"") == nil {
value = value.trimmingCharacters(in: .whitespacesAndNewlines.union(CharacterSet(charactersIn: ";")))
} else {
return nil
}
Self.unescape(&value)
pairs.append((Self.decode(key), Self.decode(value)))
}
return pairs
}
static func unescape(_ string: inout String) {
let cEscapes = [
"\\n": "\n",
"\\r": "\r",
"\\t": "\t",
"\\0": "\0",
"\\\"": "\"",
"\\\\": "\\",
]
for (pattern, target) in cEscapes {
string = string.replacingOccurrences(of: pattern, with: target)
}
}
/// 将 UTF-16 转义的字符串解析为 `String`。
///
/// `CFPropertyList`, `PropertyListSerialization`, `PropertyListDecoder` 会自动解析 Property List
/// 中转义的字符串。此方法仅用于手动实现特殊功能。
///
/// - Parameter string: 经过转义的字符串。
/// - Returns: 移除转义的字符串。
static func decode(_ string: String) -> String {
var unescaped = string
let regex = try! NSRegularExpression(pattern: #"\\U([A-F0-9]{4})"#)
for match in regex.matches(in: unescaped,
range: NSRange(unescaped.startIndex..<unescaped.endIndex,
in: unescaped)).reversed() {
var range = Range<String.Index>(match.range(at: 1), in: unescaped)!
let scalar = UInt16(unescaped[range], radix: 16)!
// Surrogates 必须成对解码,第一遍扫描将可单独解码的移除转义
guard !Unicode.UTF16.isSurrogate(scalar) else {
continue
}
let character = String(Unicode.Scalar(scalar)!)
range = Range<String.Index>(match.range, in: unescaped)!
unescaped.replaceSubrange(range, with: character)
}
// 重新扫描解码 surrogate pairs
return Self.decode(surrogates: unescaped)
}
static func decode(surrogates string: String) -> String {
var unescaped = string
let regex = try! NSRegularExpression(pattern: #"\\U([A-F0-9]{4})\\U([A-F0-9]{4})"#)
for match in regex.matches(in: unescaped,
range: NSRange(unescaped.startIndex..<unescaped.endIndex,
in: unescaped)).reversed() {
var range = Range<String.Index>(match.range(at: 1), in: unescaped)!
let high = UInt16(unescaped[range], radix: 16)!
range = Range<String.Index>(match.range(at: 2), in: unescaped)!
let low = UInt16(unescaped[range], radix: 16)!
// 保证每两个 surrogates 符合高低配对
guard Unicode.UTF16.isLeadSurrogate(high), Unicode.UTF16.isTrailSurrogate(low) else {
continue
}
let character = String(Unicode.UTF16.decode(Unicode.UTF16.EncodedScalar([high, low])))
range = Range<String.Index>(match.range, in: unescaped)!
unescaped.replaceSubrange(range, with: character)
}
return unescaped
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment