Created
May 11, 2025 16:18
-
-
Save Crydust/12305b5c183ca02e2fc15b1cc1a18b04 to your computer and use it in GitHub Desktop.
Read and write xml with whitespace in an attribute.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import org.w3c.dom.Document; | |
import org.w3c.dom.Element; | |
import javax.xml.parsers.DocumentBuilder; | |
import javax.xml.parsers.DocumentBuilderFactory; | |
import javax.xml.stream.XMLInputFactory; | |
import javax.xml.stream.XMLOutputFactory; | |
import javax.xml.stream.XMLStreamReader; | |
import javax.xml.stream.XMLStreamWriter; | |
import javax.xml.transform.Transformer; | |
import javax.xml.transform.TransformerFactory; | |
import javax.xml.transform.dom.DOMSource; | |
import javax.xml.transform.stream.StreamResult; | |
import java.io.InputStream; | |
import java.io.OutputStream; | |
import java.io.Writer; | |
import java.nio.charset.StandardCharsets; | |
import java.nio.file.Files; | |
import java.nio.file.Path; | |
/** | |
* See <a href="https://stackoverflow.com/q/79614120">Why do the stax and the dom api write xml attributes with newlines differently?</a> | |
* <p> | |
* The XOM parts require this dependency: | |
* | |
* <pre> | |
* <dependency> | |
* <groupId>xom</groupId> | |
* <artifactId>xom</artifactId> | |
* <version>1.3.9</version> | |
* <exclusions> | |
* <exclusion> | |
* <groupId>xerces</groupId> | |
* <artifactId>xercesImpl</artifactId> | |
* </exclusion> | |
* </exclusions> | |
* </dependency> | |
* </pre> | |
*/ | |
public class WriteXml { | |
public static void main(String[] args) throws Exception { | |
// returns "foo SPACE bar" because writeWithStax literally writes the newline | |
Path staxFile = writeWithStax("foo\nbar"); | |
System.out.println("staxFile = " + Files.readString(staxFile)); | |
System.out.println("attribute read with stax = " + readWithStax(staxFile)); | |
System.out.println("attribute read with dom = " + readWithDom(staxFile)); | |
// System.out.println("attribute read with xom = " + readWithXom(staxFile)); | |
System.out.println(); | |
// returns "foo NEWLINE bar" because writeWithDom escapes the newline to " " | |
Path domFile = writeWithDom("foo\nbar"); | |
System.out.println("domFile = " + Files.readString(domFile)); | |
System.out.println("attribute read with stax = " + readWithStax(domFile)); | |
System.out.println("attribute read with dom = " + readWithDom(domFile)); | |
// System.out.println("attribute read with xom = " + readWithXom(domFile)); | |
System.out.println(); | |
// // returns "foo NEWLINE bar" because writeWithXom escapes the newline to "�A;" | |
// Path xomFile = writeWithXom("foo\nbar"); | |
// System.out.println("xomFile = " + Files.readString(xomFile)); | |
// System.out.println("attribute read with stax = " + readWithStax(xomFile)); | |
// System.out.println("attribute read with dom = " + readWithDom(xomFile)); | |
// System.out.println("attribute read with xom = " + readWithXom(xomFile)); | |
// System.out.println(); | |
} | |
// private static Path writeWithStax(String text) throws Exception { | |
// Path file = Files.createTempFile("stax", ".xml"); | |
// try (OutputStream out = Files.newOutputStream(file)) { | |
// XMLOutputFactory factory = XMLOutputFactory.newInstance(); | |
// XMLStreamWriter writer = factory.createXMLStreamWriter(out); | |
// writer.writeStartDocument(); | |
// writer.writeStartElement("element"); | |
// writer.writeAttribute("attribute", text); | |
// writer.writeEndElement(); | |
// writer.writeEndDocument(); | |
// writer.flush(); | |
// writer.close(); | |
// } | |
// return file; | |
// } | |
private static Path writeWithStax(String text) throws Exception { | |
Path file = Files.createTempFile("stax", ".xml"); | |
try (Writer out = Files.newBufferedWriter(file, StandardCharsets.UTF_8)) { | |
XMLOutputFactory factory = XMLOutputFactory.newInstance(); | |
XMLStreamWriter writer = factory.createXMLStreamWriter(out); | |
writer.writeStartDocument(); | |
writer.writeStartElement("element"); | |
// hack to preserve whitespace | |
writeAttributePreservingWhitespace(out, writer, "attribute", text); | |
writer.writeEndElement(); | |
writer.writeEndDocument(); | |
writer.flush(); | |
writer.close(); | |
} | |
return file; | |
} | |
private static void writeAttributePreservingWhitespace(Writer out, XMLStreamWriter writer, String name, String value) throws Exception { | |
boolean preserveWhitespace = true; | |
// see https://www.w3.org/TR/xml/#AVNormalize | |
if (value.contains("\t") || value.contains("\n") || value.contains("\r")) { | |
writer.flush(); | |
out.write(" "); | |
writeXMLContent(out, name); | |
out.write("=\""); | |
writeXMLContent(out, value); | |
out.write("\""); | |
} else { | |
writer.writeAttribute(name, value); | |
} | |
} | |
private static void writeXMLContent(Writer out, String text) throws Exception { | |
for (char ch : text.toCharArray()) { | |
switch (ch) { | |
case '<' -> out.write("<"); | |
case '&' -> out.write("&"); | |
case '>' -> out.write(">"); | |
case '"' -> out.write("""); | |
case '\t' -> out.write("	"); | |
case '\n' -> out.write("
"); | |
case '\r' -> out.write("
"); | |
default -> out.write(ch); | |
} | |
} | |
} | |
private static Path writeWithDom(String text) throws Exception { | |
Path file = Files.createTempFile("dom", ".xml"); | |
try (OutputStream out = Files.newOutputStream(file)) { | |
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); | |
DocumentBuilder builder = factory.newDocumentBuilder(); | |
Document document = builder.newDocument(); | |
Element warningElement = document.createElement("element"); | |
warningElement.setAttribute("attribute", text); | |
document.appendChild(warningElement); | |
TransformerFactory transformerFactory = TransformerFactory.newInstance(); | |
Transformer transformer = transformerFactory.newTransformer(); | |
transformer.transform(new DOMSource(document), new StreamResult(out)); | |
} | |
return file; | |
} | |
// private static Path writeWithXom(String text) throws Exception { | |
// Path file = Files.createTempFile("xom", ".xml"); | |
// var element = new nu.xom.Element("element"); | |
// var attribute = new nu.xom.Attribute("attribute", text); | |
// element.addAttribute(attribute); | |
// var doc = new nu.xom.Document(element); | |
// String result = doc.toXML(); | |
// Files.writeString(file, result); | |
// return file; | |
// } | |
private static String readWithStax(Path file) throws Exception { | |
try (InputStream in = Files.newInputStream(file)) { | |
XMLInputFactory factory = XMLInputFactory.newInstance(); | |
XMLStreamReader reader = factory.createXMLStreamReader(in); | |
while (reader.hasNext()) { | |
int event = reader.next(); | |
if (event == XMLStreamReader.START_ELEMENT | |
&& "element".equals(reader.getLocalName())) { | |
return reader.getAttributeValue(null, "attribute"); | |
} | |
} | |
} | |
return null; | |
} | |
private static String readWithDom(Path file) throws Exception { | |
try (InputStream in = Files.newInputStream(file)) { | |
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); | |
DocumentBuilder builder = factory.newDocumentBuilder(); | |
Document document = builder.parse(in); | |
return document.getDocumentElement() | |
.getAttribute("attribute"); | |
} | |
} | |
// private static String readWithXom(Path file) throws Exception { | |
// try (InputStream in = Files.newInputStream(file)) { | |
// var parser = new nu.xom.Builder(); | |
// var document = parser.build(in); | |
// return document.getRootElement() | |
// .getAttribute("attribute") | |
// .getValue(); | |
// } | |
// } | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment