Created
February 15, 2015 15:55
-
-
Save Ferada/5bc34d20625dff72e15d to your computer and use it in GitHub Desktop.
CXML embedding DTD incorrectly, https://stackoverflow.com/questions/26738465/non-valid-output-of-broadcast-handler-in-common-lisp-closure-xml-package/28528117#28528117
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
From 991fac513dbd9b86628f99741a66d791552b1f02 Mon Sep 17 00:00:00 2001 | |
From: Olof-Joachim Frahm <[email protected]> | |
Date: Sun, 15 Feb 2015 15:56:21 +0000 | |
Subject: [PATCH] Fix DTD embedding. | |
--- | |
xml/xml-parse.lisp | 4 +++- | |
1 file changed, 3 insertions(+), 1 deletion(-) | |
diff --git a/xml/xml-parse.lisp b/xml/xml-parse.lisp | |
index cfbb441..dc31e3a 100644 | |
--- a/xml/xml-parse.lisp | |
+++ b/xml/xml-parse.lisp | |
@@ -2606,12 +2606,14 @@ (defun p/doctype-decl (input &optional dtd-extid) | |
(let ((xi2 (xstream-open-extid effective-extid))) | |
(with-zstream (zi2 :input-stack (list xi2)) | |
(ensure-dtd) | |
+ (sax:start-internal-subset (handler *ctx*)) | |
(p/ext-subset zi2) | |
(when (and fresh-dtd-p | |
*cache-all-dtds* | |
*validate* | |
(not (standalone-p *ctx*))) | |
- (setf (getdtd sysid *dtd-cache*) (dtd *ctx*))))))))) | |
+ (setf (getdtd sysid *dtd-cache*) (dtd *ctx*))) | |
+ (sax:end-internal-subset (handler *ctx*)))))))) | |
(sax:end-dtd (handler *ctx*)) | |
(let ((dtd (dtd *ctx*))) | |
(sax:entity-resolver | |
-- | |
1.7.10.4 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<!ELEMENT EM #PCDATA> | |
<!ATTLIST EM ID CDATA #REQUIRED> | |
<!ATTLIST EM CATEG CDATA #IMPLIED> | |
<!ATTLIST EM TIPO CDATA #IMPLIED> | |
<!ATTLIST EM COMENT CDATA #IMPLIED> | |
<!ATTLIST EM SUBTIPO CDATA #IMPLIED> | |
<!ELEMENT ALT (#PCDATA|EM)*> | |
<!ELEMENT OMITIDO (#PCDATA|EM|ALT|p)*> | |
<!ELEMENT colHAREM (DOC)*> | |
<!ATTLIST colHAREM versao CDATA #REQUIRED> | |
<!ELEMENT p (#PCDATA|EM|OMITIDO|ALT)*> | |
<!ATTLIST p xml:space (default|preserve) "default"> | |
<!ELEMENT DOC (#PCDATA|p|OMITIDO)*> | |
<!ATTLIST DOC DOCID CDATA #REQUIRED> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?xml version="1.0" encoding="UTF-8"?> | |
<!DOCTYPE colHAREM SYSTEM "harem.dtd"> | |
<colHAREM versao="Segundo_dourada_com_relacoes_14Abril2010"> | |
<DOC DOCID="H2-dftre765"> | |
<p></p> | |
</DOC> | |
</colHAREM> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?xml version="1.0" encoding="UTF-8"?> | |
<!DOCTYPE colHAREM SYSTEM "harem.dtd" [ | |
<!ELEMENT EM #PCDATA> | |
<!ATTLIST EM ID CDATA #REQUIRED> | |
<!ATTLIST EM CATEG CDATA #IMPLIED> | |
<!ATTLIST EM TIPO CDATA #IMPLIED> | |
<!ATTLIST EM COMENT CDATA #IMPLIED> | |
<!ATTLIST EM SUBTIPO CDATA #IMPLIED> | |
<!ELEMENT ALT (#PCDATA|EM)*> | |
<!ELEMENT OMITIDO (#PCDATA|EM|ALT|p)*> | |
<!ELEMENT colHAREM (DOC)*> | |
<!ATTLIST colHAREM versao CDATA #REQUIRED> | |
<!ELEMENT p (#PCDATA|EM|OMITIDO|ALT)*> | |
<!ATTLIST p xml:space (default|preserve) "default"> | |
<!ELEMENT DOC (#PCDATA|p|OMITIDO)*> | |
<!ATTLIST DOC DOCID CDATA #REQUIRED> | |
]> | |
<colHAREM versao="Segundo_dourada_com_relacoes_14Abril2010"> | |
<DOC DOCID="H2-dftre765"> | |
<p xml:space="default"/> | |
</DOC> | |
</colHAREM> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(defclass preproc (cxml:sax-proxy) ()) | |
(defmethod sax:characters ((handler preproc) data) | |
(let ((parts (cl-ppcre:split "\\|" data))) | |
;; check this on edge cases, though, e.g., "", "|", "a|", strings | |
;; without any "|", etc. | |
(call-next-method handler (pop parts)) | |
(dolist (part parts) | |
(sax:start-element handler nil nil "bar" '()) | |
(sax:end-element handler nil nil "bar") | |
(call-next-method handler part)))) | |
(defun foo () | |
(with-open-file (out #P"teste.xml" :if-exists :supersede :direction :output) | |
(let ((h (make-instance 'preproc :chained-handler (cxml:make-character-stream-sink out)))) | |
(cxml:parse #P"harem.xml" h :validate t)))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment