Skip to content

Instantly share code, notes, and snippets.

@Crydust
Created May 11, 2025 16:18
Show Gist options
  • Save Crydust/12305b5c183ca02e2fc15b1cc1a18b04 to your computer and use it in GitHub Desktop.
Save Crydust/12305b5c183ca02e2fc15b1cc1a18b04 to your computer and use it in GitHub Desktop.
Read and write xml with whitespace in an attribute.
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLOutputFactory;
import javax.xml.stream.XMLStreamReader;
import javax.xml.stream.XMLStreamWriter;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.Writer;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
/**
* See <a href="https://stackoverflow.com/q/79614120">Why do the stax and the dom api write xml attributes with newlines differently?</a>
* <p>
* The XOM parts require this dependency:
*
* <pre>
* &lt;dependency&gt;
* &lt;groupId&gt;xom&lt;/groupId&gt;
* &lt;artifactId&gt;xom&lt;/artifactId&gt;
* &lt;version&gt;1.3.9&lt;/version&gt;
* &lt;exclusions&gt;
* &lt;exclusion&gt;
* &lt;groupId&gt;xerces&lt;/groupId&gt;
* &lt;artifactId&gt;xercesImpl&lt;/artifactId&gt;
* &lt;/exclusion&gt;
* &lt;/exclusions&gt;
* &lt;/dependency&gt;
* </pre>
*/
public class WriteXml {
public static void main(String[] args) throws Exception {
// returns "foo SPACE bar" because writeWithStax literally writes the newline
Path staxFile = writeWithStax("foo\nbar");
System.out.println("staxFile = " + Files.readString(staxFile));
System.out.println("attribute read with stax = " + readWithStax(staxFile));
System.out.println("attribute read with dom = " + readWithDom(staxFile));
// System.out.println("attribute read with xom = " + readWithXom(staxFile));
System.out.println();
// returns "foo NEWLINE bar" because writeWithDom escapes the newline to "&#10;"
Path domFile = writeWithDom("foo\nbar");
System.out.println("domFile = " + Files.readString(domFile));
System.out.println("attribute read with stax = " + readWithStax(domFile));
System.out.println("attribute read with dom = " + readWithDom(domFile));
// System.out.println("attribute read with xom = " + readWithXom(domFile));
System.out.println();
// // returns "foo NEWLINE bar" because writeWithXom escapes the newline to "&#0A;"
// Path xomFile = writeWithXom("foo\nbar");
// System.out.println("xomFile = " + Files.readString(xomFile));
// System.out.println("attribute read with stax = " + readWithStax(xomFile));
// System.out.println("attribute read with dom = " + readWithDom(xomFile));
// System.out.println("attribute read with xom = " + readWithXom(xomFile));
// System.out.println();
}
// private static Path writeWithStax(String text) throws Exception {
// Path file = Files.createTempFile("stax", ".xml");
// try (OutputStream out = Files.newOutputStream(file)) {
// XMLOutputFactory factory = XMLOutputFactory.newInstance();
// XMLStreamWriter writer = factory.createXMLStreamWriter(out);
// writer.writeStartDocument();
// writer.writeStartElement("element");
// writer.writeAttribute("attribute", text);
// writer.writeEndElement();
// writer.writeEndDocument();
// writer.flush();
// writer.close();
// }
// return file;
// }
private static Path writeWithStax(String text) throws Exception {
Path file = Files.createTempFile("stax", ".xml");
try (Writer out = Files.newBufferedWriter(file, StandardCharsets.UTF_8)) {
XMLOutputFactory factory = XMLOutputFactory.newInstance();
XMLStreamWriter writer = factory.createXMLStreamWriter(out);
writer.writeStartDocument();
writer.writeStartElement("element");
// hack to preserve whitespace
writeAttributePreservingWhitespace(out, writer, "attribute", text);
writer.writeEndElement();
writer.writeEndDocument();
writer.flush();
writer.close();
}
return file;
}
private static void writeAttributePreservingWhitespace(Writer out, XMLStreamWriter writer, String name, String value) throws Exception {
boolean preserveWhitespace = true;
// see https://www.w3.org/TR/xml/#AVNormalize
if (value.contains("\t") || value.contains("\n") || value.contains("\r")) {
writer.flush();
out.write(" ");
writeXMLContent(out, name);
out.write("=\"");
writeXMLContent(out, value);
out.write("\"");
} else {
writer.writeAttribute(name, value);
}
}
private static void writeXMLContent(Writer out, String text) throws Exception {
for (char ch : text.toCharArray()) {
switch (ch) {
case '<' -> out.write("&lt;");
case '&' -> out.write("&amp;");
case '>' -> out.write("&gt;");
case '"' -> out.write("&quot;");
case '\t' -> out.write("&#x9;");
case '\n' -> out.write("&#xA;");
case '\r' -> out.write("&#xD;");
default -> out.write(ch);
}
}
}
private static Path writeWithDom(String text) throws Exception {
Path file = Files.createTempFile("dom", ".xml");
try (OutputStream out = Files.newOutputStream(file)) {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = factory.newDocumentBuilder();
Document document = builder.newDocument();
Element warningElement = document.createElement("element");
warningElement.setAttribute("attribute", text);
document.appendChild(warningElement);
TransformerFactory transformerFactory = TransformerFactory.newInstance();
Transformer transformer = transformerFactory.newTransformer();
transformer.transform(new DOMSource(document), new StreamResult(out));
}
return file;
}
// private static Path writeWithXom(String text) throws Exception {
// Path file = Files.createTempFile("xom", ".xml");
// var element = new nu.xom.Element("element");
// var attribute = new nu.xom.Attribute("attribute", text);
// element.addAttribute(attribute);
// var doc = new nu.xom.Document(element);
// String result = doc.toXML();
// Files.writeString(file, result);
// return file;
// }
private static String readWithStax(Path file) throws Exception {
try (InputStream in = Files.newInputStream(file)) {
XMLInputFactory factory = XMLInputFactory.newInstance();
XMLStreamReader reader = factory.createXMLStreamReader(in);
while (reader.hasNext()) {
int event = reader.next();
if (event == XMLStreamReader.START_ELEMENT
&& "element".equals(reader.getLocalName())) {
return reader.getAttributeValue(null, "attribute");
}
}
}
return null;
}
private static String readWithDom(Path file) throws Exception {
try (InputStream in = Files.newInputStream(file)) {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = factory.newDocumentBuilder();
Document document = builder.parse(in);
return document.getDocumentElement()
.getAttribute("attribute");
}
}
// private static String readWithXom(Path file) throws Exception {
// try (InputStream in = Files.newInputStream(file)) {
// var parser = new nu.xom.Builder();
// var document = parser.build(in);
// return document.getRootElement()
// .getAttribute("attribute")
// .getValue();
// }
// }
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment