Created
September 29, 2024 16:23
-
-
Save marcoarment/85600b7fdc73fada7c666b82dd81be2b to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// | |
// OCXML.swift | |
// Created by Marco Arment on 9/23/24. | |
// | |
// Released into the public domain. Do whatever you'd like with this. | |
// No guarantees that it'll do anything, or do it correctly. Good luck! | |
// | |
import Foundation | |
public protocol OCXMLNode: Sendable { | |
var XML: String { get } | |
} | |
public protocol OCXMLContainer: OCXMLNode { | |
var childNodes: [any OCXMLNode] { get } | |
} | |
public struct OCXMLDocument: OCXMLContainer { | |
public let documentElement: OCXMLElement | |
public var childNodes: [any OCXMLNode] { [documentElement] } | |
public var XML: String { documentElement.documentXML } | |
public init(_ rootElement: OCXMLElement) { | |
documentElement = rootElement | |
} | |
} | |
public struct OCXMLElement: OCXMLNode, OCXMLContainer { | |
public let name: String | |
public let childNodes: [any OCXMLNode] | |
private let attributes: [String: String] | |
fileprivate let normalizedName: String | |
public init(_ name: String, attributes: [String: String] = [:], @OCXMLBuilder childNodes: () -> [any OCXMLNode] = { [] }) { | |
self.init(name, attributes: attributes, childNodes: childNodes()) | |
} | |
public init(_ name: String, attributes: [String: String] = [:], childNodes: [any OCXMLNode]) { | |
self.name = name | |
self.normalizedName = Self.normalizeName(name) | |
self.attributes = Self.normalizeAttributes(attributes) | |
self.childNodes = childNodes | |
} | |
public func attribute(_ attributeName: String) -> String? { attributes[OCXMLElement.normalizeName(attributeName)] } | |
public var documentXML: String { "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n\(XML)" } | |
public var XML: String { | |
var xml = "<\(name)" | |
if !attributes.isEmpty { | |
xml += " " + attributes.map { "\($0.ocXMLEntityEncoded)=\"\($1.ocXMLEntityEncoded)\"" }.joined(separator: " ") | |
} | |
// content | |
let encodedChildren = childNodes.map { $0.XML }.joined() | |
if !encodedChildren.isEmpty { | |
xml += ">\(encodedChildren)</\(name)>" | |
} else { | |
xml += "/>" | |
} | |
return xml | |
} | |
fileprivate static func normalizeName(_ name: String) -> String { name.lowercased() } | |
private static func normalizeAttributes(_ attributes: [String: String]) -> [String: String] { | |
attributes.reduce([String: String]()) { | |
var d = $0 | |
d[normalizeName($1.key)] = $1.value | |
return d | |
} | |
} | |
} | |
fileprivate extension String { | |
var ocXMLEntityEncoded: String { | |
var xml = replacingOccurrences(of: "&" , with: "&", options: .literal) | |
xml = xml.replacingOccurrences(of: "<" , with: "<", options: .literal) | |
xml = xml.replacingOccurrences(of: ">" , with: ">", options: .literal) | |
xml = xml.replacingOccurrences(of: "\"", with: """, options: .literal) | |
xml = xml.replacingOccurrences(of: "'" , with: "'", options: .literal) | |
xml = xml.replacingOccurrences(of: "\n", with: " ", options: .literal) | |
return xml | |
} | |
} | |
// Strings are used as children directly for text (instead of having separate Text nodes) | |
extension String: OCXMLNode { | |
public var XML: String { ocXMLEntityEncoded } | |
public var textValue: String { self } | |
} | |
public struct OCXMLDocumentFragment: OCXMLContainer { | |
public var childNodes: [any OCXMLNode] | |
public var XML: String { childNodes.map { $0.XML }.joined() } | |
} | |
@resultBuilder | |
public struct OCXMLBuilder { | |
public static func buildBlock(_ components: any OCXMLNode...) -> [any OCXMLNode] { components } | |
public static func buildEither(first components: [any OCXMLNode]) -> any OCXMLNode { OCXMLDocumentFragment(childNodes: components) } | |
public static func buildEither(second components: [any OCXMLNode]) -> any OCXMLNode { OCXMLDocumentFragment(childNodes: components) } | |
public static func buildArray(_ components: [[any OCXMLNode]]) -> any OCXMLNode { OCXMLDocumentFragment(childNodes: components.flatMap { $0 }) } | |
public static func buildExpression(_ expression: any OCXMLNode) -> any OCXMLNode { OCXMLDocumentFragment(childNodes: [expression]) } | |
public static func buildExpression(_ expression: [any OCXMLNode]) -> any OCXMLNode { OCXMLDocumentFragment(childNodes: expression) } | |
public static func buildOptional(_ components: [any OCXMLNode]?) -> any OCXMLNode { OCXMLDocumentFragment(childNodes: components ?? []) } | |
} | |
// MARK: - Optional add-on: Querying | |
public extension OCXMLNode { | |
var textValue: String { | |
if let string = self as? String { string } | |
else if let container = self as? OCXMLContainer { container.childNodes.map { ($0 as? String) ?? "" }.joined() } | |
else { "" } | |
} | |
} | |
public extension OCXMLElement { | |
fileprivate func attribute(normalizedAttributeName: String) -> String? { attributes[normalizedAttributeName] } | |
} | |
public extension OCXMLContainer { | |
var childElements: [OCXMLElement] { childNodes.compactMap { $0 as? OCXMLElement } } | |
var allDescendants: [OCXMLElement] { | |
childElements + childElements.flatMap { $0.allDescendants } | |
} | |
func childElements(named elementName: String) -> [OCXMLElement] { | |
let normalizedName = OCXMLElement.normalizeName(elementName) | |
return childElements.filter { $0.normalizedName == normalizedName } | |
} | |
func firstChild(named elementName: String) -> OCXMLElement? { childElements(named: elementName).first } | |
func allDescendants(named elementName: String) -> [OCXMLElement] { | |
let normalizedName = OCXMLElement.normalizeName(elementName) | |
return allDescendants.filter { $0.normalizedName == elementName } | |
} | |
func allDescendantAttributeValues(named attributeName: String) -> [String] { | |
let normalizedName = OCXMLElement.normalizeName(attributeName) | |
return allDescendants.compactMap { $0.attribute(normalizedAttributeName: normalizedName) } | |
} | |
var textValue: String { childNodes.map { ($0 as? String) ?? "" }.joined() } | |
// Supports VERY basic XPath queries, e.g. "//item/enclosure/@url" | |
func queryElements(_ xPath: String) -> [OCXMLElement] { query(xPath).compactMap { $0 as? OCXMLElement } } | |
func queryStrings(_ xPath: String) -> [String] { query(xPath).map { $0.textValue } } | |
func query(_ xPath: String) -> [any OCXMLNode] { | |
let scanner = Scanner(string: xPath) | |
var contexts: [OCXMLNode] = [self] | |
while !scanner.isAtEnd { | |
var nextContexts: [OCXMLNode] = [] | |
if scanner.scanString("//") != nil { | |
if scanner.scanString("@") != nil, let attributeName = scanner.scanUpToString("/") { | |
nextContexts = contexts.compactMap { $0 as? OCXMLContainer }.flatMap { $0.allDescendantAttributeValues(named: attributeName) } | |
} else if let name = scanner.scanUpToString("/") { | |
nextContexts = contexts.compactMap { $0 as? OCXMLContainer }.flatMap { $0.allDescendants(named: name) } | |
} | |
} else if scanner.scanString("/") != nil { | |
if scanner.scanString("@") != nil, let attributeName = scanner.scanUpToString("/") { | |
nextContexts = contexts.compactMap { $0 as? OCXMLElement }.flatMap { $0.attribute(attributeName) } | |
} else if let name = scanner.scanUpToString("/") { | |
nextContexts = contexts.compactMap { $0 as? OCXMLContainer }.flatMap { $0.childElements(named: name) } | |
} | |
} else { | |
fatalError("Unrecognized XPath syntax: \(xPath)") | |
} | |
contexts = nextContexts | |
} | |
return contexts | |
} | |
} | |
// MARK: - Optional add-on: XML parsing | |
// A VERY simple parser that doesn't support namespaces and performs no real error handling. | |
public extension OCXMLDocument { | |
init?(xml: String) { | |
guard let data = xml.data(using: .utf8) else { return nil } | |
let delegate = OCXMLParserDelegate() | |
let parser = XMLParser(data: data) | |
parser.delegate = delegate | |
parser.parse() | |
guard let rootElement = delegate.xmlNode as? OCXMLElement else { return nil } | |
documentElement = rootElement | |
} | |
} | |
fileprivate class OCXMLReadingNode { | |
fileprivate enum NodeType { | |
case element(name: String, attributes: [String: String]) | |
case text(content: String) | |
} | |
let type: NodeType | |
var childNodes: [OCXMLReadingNode] = [] | |
weak var parentNode: OCXMLReadingNode? = nil | |
init(text: String) { self.type = .text(content: text) } | |
init(name: String, attributes: [String: String]) { self.type = .element(name: name, attributes: attributes) } | |
var xmlNode: any OCXMLNode { | |
switch type { | |
case .element(let name, let attributes): OCXMLElement(name, attributes: attributes, childNodes: childNodes.map { $0.xmlNode }) | |
case .text(let content): content | |
} | |
} | |
} | |
fileprivate class OCXMLParserDelegate : NSObject, XMLParserDelegate { | |
private var rootElement: OCXMLReadingNode? = nil | |
private var inProgressElement: OCXMLReadingNode? = nil | |
var xmlNode: (any OCXMLNode)? { rootElement?.xmlNode } | |
func parser(_ parser: XMLParser, didStartElement elementName: String, namespaceURI: String?, qualifiedName qName: String?, attributes attributeDict: [String : String] = [:]) { | |
let newElement = OCXMLReadingNode(name: elementName, attributes: attributeDict) | |
newElement.parentNode = inProgressElement | |
inProgressElement?.childNodes.append(newElement) | |
self.inProgressElement = newElement | |
if rootElement == nil { rootElement = newElement } | |
} | |
func parser(_ parser: XMLParser, didEndElement elementName: String, namespaceURI: String?, qualifiedName qName: String?) { | |
inProgressElement = inProgressElement?.parentNode | |
} | |
func parser(_ parser: XMLParser, foundCharacters string: String) { | |
inProgressElement?.childNodes.append(OCXMLReadingNode(text: string)) | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment