Skip to content

Instantly share code, notes, and snippets.

@marcoarment
Created September 29, 2024 16:23
Show Gist options
  • Save marcoarment/85600b7fdc73fada7c666b82dd81be2b to your computer and use it in GitHub Desktop.
Save marcoarment/85600b7fdc73fada7c666b82dd81be2b to your computer and use it in GitHub Desktop.
//
// OCXML.swift
// Created by Marco Arment on 9/23/24.
//
// Released into the public domain. Do whatever you'd like with this.
// No guarantees that it'll do anything, or do it correctly. Good luck!
//
import Foundation
public protocol OCXMLNode: Sendable {
var XML: String { get }
}
public protocol OCXMLContainer: OCXMLNode {
var childNodes: [any OCXMLNode] { get }
}
public struct OCXMLDocument: OCXMLContainer {
public let documentElement: OCXMLElement
public var childNodes: [any OCXMLNode] { [documentElement] }
public var XML: String { documentElement.documentXML }
public init(_ rootElement: OCXMLElement) {
documentElement = rootElement
}
}
public struct OCXMLElement: OCXMLNode, OCXMLContainer {
public let name: String
public let childNodes: [any OCXMLNode]
private let attributes: [String: String]
fileprivate let normalizedName: String
public init(_ name: String, attributes: [String: String] = [:], @OCXMLBuilder childNodes: () -> [any OCXMLNode] = { [] }) {
self.init(name, attributes: attributes, childNodes: childNodes())
}
public init(_ name: String, attributes: [String: String] = [:], childNodes: [any OCXMLNode]) {
self.name = name
self.normalizedName = Self.normalizeName(name)
self.attributes = Self.normalizeAttributes(attributes)
self.childNodes = childNodes
}
public func attribute(_ attributeName: String) -> String? { attributes[OCXMLElement.normalizeName(attributeName)] }
public var documentXML: String { "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n\(XML)" }
public var XML: String {
var xml = "<\(name)"
if !attributes.isEmpty {
xml += " " + attributes.map { "\($0.ocXMLEntityEncoded)=\"\($1.ocXMLEntityEncoded)\"" }.joined(separator: " ")
}
// content
let encodedChildren = childNodes.map { $0.XML }.joined()
if !encodedChildren.isEmpty {
xml += ">\(encodedChildren)</\(name)>"
} else {
xml += "/>"
}
return xml
}
fileprivate static func normalizeName(_ name: String) -> String { name.lowercased() }
private static func normalizeAttributes(_ attributes: [String: String]) -> [String: String] {
attributes.reduce([String: String]()) {
var d = $0
d[normalizeName($1.key)] = $1.value
return d
}
}
}
fileprivate extension String {
var ocXMLEntityEncoded: String {
var xml = replacingOccurrences(of: "&" , with: "&amp;", options: .literal)
xml = xml.replacingOccurrences(of: "<" , with: "&lt;", options: .literal)
xml = xml.replacingOccurrences(of: ">" , with: "&gt;", options: .literal)
xml = xml.replacingOccurrences(of: "\"", with: "&quot;", options: .literal)
xml = xml.replacingOccurrences(of: "'" , with: "&apos;", options: .literal)
xml = xml.replacingOccurrences(of: "\n", with: "&#10;", options: .literal)
return xml
}
}
// Strings are used as children directly for text (instead of having separate Text nodes)
extension String: OCXMLNode {
public var XML: String { ocXMLEntityEncoded }
public var textValue: String { self }
}
public struct OCXMLDocumentFragment: OCXMLContainer {
public var childNodes: [any OCXMLNode]
public var XML: String { childNodes.map { $0.XML }.joined() }
}
@resultBuilder
public struct OCXMLBuilder {
public static func buildBlock(_ components: any OCXMLNode...) -> [any OCXMLNode] { components }
public static func buildEither(first components: [any OCXMLNode]) -> any OCXMLNode { OCXMLDocumentFragment(childNodes: components) }
public static func buildEither(second components: [any OCXMLNode]) -> any OCXMLNode { OCXMLDocumentFragment(childNodes: components) }
public static func buildArray(_ components: [[any OCXMLNode]]) -> any OCXMLNode { OCXMLDocumentFragment(childNodes: components.flatMap { $0 }) }
public static func buildExpression(_ expression: any OCXMLNode) -> any OCXMLNode { OCXMLDocumentFragment(childNodes: [expression]) }
public static func buildExpression(_ expression: [any OCXMLNode]) -> any OCXMLNode { OCXMLDocumentFragment(childNodes: expression) }
public static func buildOptional(_ components: [any OCXMLNode]?) -> any OCXMLNode { OCXMLDocumentFragment(childNodes: components ?? []) }
}
// MARK: - Optional add-on: Querying
public extension OCXMLNode {
var textValue: String {
if let string = self as? String { string }
else if let container = self as? OCXMLContainer { container.childNodes.map { ($0 as? String) ?? "" }.joined() }
else { "" }
}
}
public extension OCXMLElement {
fileprivate func attribute(normalizedAttributeName: String) -> String? { attributes[normalizedAttributeName] }
}
public extension OCXMLContainer {
var childElements: [OCXMLElement] { childNodes.compactMap { $0 as? OCXMLElement } }
var allDescendants: [OCXMLElement] {
childElements + childElements.flatMap { $0.allDescendants }
}
func childElements(named elementName: String) -> [OCXMLElement] {
let normalizedName = OCXMLElement.normalizeName(elementName)
return childElements.filter { $0.normalizedName == normalizedName }
}
func firstChild(named elementName: String) -> OCXMLElement? { childElements(named: elementName).first }
func allDescendants(named elementName: String) -> [OCXMLElement] {
let normalizedName = OCXMLElement.normalizeName(elementName)
return allDescendants.filter { $0.normalizedName == elementName }
}
func allDescendantAttributeValues(named attributeName: String) -> [String] {
let normalizedName = OCXMLElement.normalizeName(attributeName)
return allDescendants.compactMap { $0.attribute(normalizedAttributeName: normalizedName) }
}
var textValue: String { childNodes.map { ($0 as? String) ?? "" }.joined() }
// Supports VERY basic XPath queries, e.g. "//item/enclosure/@url"
func queryElements(_ xPath: String) -> [OCXMLElement] { query(xPath).compactMap { $0 as? OCXMLElement } }
func queryStrings(_ xPath: String) -> [String] { query(xPath).map { $0.textValue } }
func query(_ xPath: String) -> [any OCXMLNode] {
let scanner = Scanner(string: xPath)
var contexts: [OCXMLNode] = [self]
while !scanner.isAtEnd {
var nextContexts: [OCXMLNode] = []
if scanner.scanString("//") != nil {
if scanner.scanString("@") != nil, let attributeName = scanner.scanUpToString("/") {
nextContexts = contexts.compactMap { $0 as? OCXMLContainer }.flatMap { $0.allDescendantAttributeValues(named: attributeName) }
} else if let name = scanner.scanUpToString("/") {
nextContexts = contexts.compactMap { $0 as? OCXMLContainer }.flatMap { $0.allDescendants(named: name) }
}
} else if scanner.scanString("/") != nil {
if scanner.scanString("@") != nil, let attributeName = scanner.scanUpToString("/") {
nextContexts = contexts.compactMap { $0 as? OCXMLElement }.flatMap { $0.attribute(attributeName) }
} else if let name = scanner.scanUpToString("/") {
nextContexts = contexts.compactMap { $0 as? OCXMLContainer }.flatMap { $0.childElements(named: name) }
}
} else {
fatalError("Unrecognized XPath syntax: \(xPath)")
}
contexts = nextContexts
}
return contexts
}
}
// MARK: - Optional add-on: XML parsing
// A VERY simple parser that doesn't support namespaces and performs no real error handling.
public extension OCXMLDocument {
init?(xml: String) {
guard let data = xml.data(using: .utf8) else { return nil }
let delegate = OCXMLParserDelegate()
let parser = XMLParser(data: data)
parser.delegate = delegate
parser.parse()
guard let rootElement = delegate.xmlNode as? OCXMLElement else { return nil }
documentElement = rootElement
}
}
fileprivate class OCXMLReadingNode {
fileprivate enum NodeType {
case element(name: String, attributes: [String: String])
case text(content: String)
}
let type: NodeType
var childNodes: [OCXMLReadingNode] = []
weak var parentNode: OCXMLReadingNode? = nil
init(text: String) { self.type = .text(content: text) }
init(name: String, attributes: [String: String]) { self.type = .element(name: name, attributes: attributes) }
var xmlNode: any OCXMLNode {
switch type {
case .element(let name, let attributes): OCXMLElement(name, attributes: attributes, childNodes: childNodes.map { $0.xmlNode })
case .text(let content): content
}
}
}
fileprivate class OCXMLParserDelegate : NSObject, XMLParserDelegate {
private var rootElement: OCXMLReadingNode? = nil
private var inProgressElement: OCXMLReadingNode? = nil
var xmlNode: (any OCXMLNode)? { rootElement?.xmlNode }
func parser(_ parser: XMLParser, didStartElement elementName: String, namespaceURI: String?, qualifiedName qName: String?, attributes attributeDict: [String : String] = [:]) {
let newElement = OCXMLReadingNode(name: elementName, attributes: attributeDict)
newElement.parentNode = inProgressElement
inProgressElement?.childNodes.append(newElement)
self.inProgressElement = newElement
if rootElement == nil { rootElement = newElement }
}
func parser(_ parser: XMLParser, didEndElement elementName: String, namespaceURI: String?, qualifiedName qName: String?) {
inProgressElement = inProgressElement?.parentNode
}
func parser(_ parser: XMLParser, foundCharacters string: String) {
inProgressElement?.childNodes.append(OCXMLReadingNode(text: string))
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment