Skip to content

Instantly share code, notes, and snippets.

@bblfish
Created January 31, 2012 22:23
Show Gist options
  • Save bblfish/1713430 to your computer and use it in GitHub Desktop.
Save bblfish/1713430 to your computer and use it in GitHub Desktop.
Work on adapting on Jena's StAX2SAX to non blocking parser
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package patch;
import java.util.Iterator;
import javax.xml.namespace.QName;
import javax.xml.stream.XMLEventReader;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
import javax.xml.stream.events.Attribute;
import javax.xml.stream.events.Characters;
import javax.xml.stream.events.Comment;
import javax.xml.stream.events.DTD;
import javax.xml.stream.events.EndElement;
import javax.xml.stream.events.EntityDeclaration;
import javax.xml.stream.events.Namespace;
import javax.xml.stream.events.ProcessingInstruction;
import javax.xml.stream.events.StartElement;
import javax.xml.stream.events.XMLEvent;
import com.fasterxml.aalto.AsyncXMLStreamReader;
import com.fasterxml.aalto.stax.InputFactoryImpl;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.ext.LexicalHandler;
import org.xml.sax.helpers.AttributesImpl;
/**
*
* Bridge StAX and SAX parsing, with support for asynchronous parsing
*
* @author Henry Story adapted from Damian Steer's jena.rdf.arp.StAX2SAX
*/
public class AsyncJenaParser {
private final ContentHandler handler;
private final LexicalHandler lhandler;
private final XMLEventReader eventReader;
private final AsyncXMLStreamReader streamReader;
/**
* Primes a converter with a SAX handler.
*
* Note: if handler is also LexicalHandler it will pass on lexical events.
*
* @param handler
*/
public AsyncJenaParser(ContentHandler handler, AsyncXMLStreamReader streamReader) throws XMLStreamException {
this.handler = handler;
this.lhandler = (handler instanceof LexicalHandler) ?
(LexicalHandler) handler :
NO_LEXICAL_HANDLER ;
handler.setDocumentLocator(new LocatorConv(streamReader));
final XMLInputFactory xf = InputFactoryImpl.newInstance();
this.streamReader = streamReader;
this.eventReader = xf.createXMLEventReader(streamReader);
}
public void parse() throws XMLStreamException, SAXException {
// We permit nesting, so keep at track of where we are
int level = 0;
while (eventReader.hasNext()) {
XMLEvent e = eventReader.nextEvent();
if (e.getEventType() == AsyncXMLStreamReader.EVENT_INCOMPLETE) return
if (e.isStartDocument()) handler.startDocument();
else if (e.isEndDocument()) handler.endDocument();
else if (e.isStartElement()) { emitSE(e.asStartElement()); level++; }
else if (e.isEndElement()) {
emitEE(e.asEndElement());
level--;
if (level == 0) break;
}
else if (e.isProcessingInstruction()) emitPi((ProcessingInstruction) e);
else if (e.isCharacters()) emitChars(e.asCharacters());
else if (e.isAttribute()) emitAttr((Attribute) e);
else if (e.isEntityReference()) emitEnt((EntityDeclaration) e);
else if (e.isNamespace()) emitNS((Namespace) e);
else if (e instanceof Comment) emitComment((Comment) e);
else if (e instanceof DTD) emitDTD((DTD) e);
else {
//System.err.println("Unknown / unhandled event type " + e);
//throw new SAXException("Unknown / unhandled event type " + e);
}
}
}
private void emitSE(StartElement se) throws SAXException {
@SuppressWarnings("unchecked")
Iterator<Attribute> aIter = se.getAttributes() ;
handler.startElement(se.getName().getNamespaceURI(),
se.getName().getLocalPart(), qnameToS(se.getName()), convertAttrs(aIter));
@SuppressWarnings("unchecked")
Iterator<Namespace> it = se.getNamespaces();
while (it.hasNext()) emitNS(it.next());
}
private void emitEE(EndElement ee) throws SAXException {
handler.endElement(ee.getName().getNamespaceURI(),
ee.getName().getLocalPart(), qnameToS(ee.getName()));
@SuppressWarnings("unchecked")
Iterator<Namespace> it = ee.getNamespaces();
while (it.hasNext()) emitNSGone(it.next());
}
private void emitPi(ProcessingInstruction pi) throws SAXException {
handler.processingInstruction(pi.getTarget(), pi.getData());
}
private void emitChars(Characters chars) throws SAXException {
if (chars.isIgnorableWhiteSpace())
handler.ignorableWhitespace(chars.getData().toCharArray(),
0, chars.getData().length());
else
handler.characters(chars.getData().toCharArray(),
0, chars.getData().length());
}
private void emitAttr(Attribute attribute) {
// nowt to do
}
private void emitEnt(EntityDeclaration entityDeclaration) {
// nowt to do
}
private void emitNS(Namespace namespace) throws SAXException {
// Safety check to work around nasty tests
if (namespace.getPrefix() == null ||
namespace.getNamespaceURI() == null) return;
handler.startPrefixMapping(namespace.getPrefix(), namespace.getNamespaceURI());
}
private void emitNSGone(Namespace namespace) throws SAXException {
handler.endPrefixMapping(namespace.getPrefix());
}
private void emitComment(Comment comment) throws SAXException {
lhandler.comment(comment.getText().toCharArray(), 0, comment.getText().length());
}
private void emitDTD(DTD dtd) {
// Is this useful??
}
private Attributes convertAttrs(Iterator<Attribute> attributes) {
AttributesImpl toReturn = new AttributesImpl();
while (attributes.hasNext()) {
Attribute a = attributes.next();
toReturn.addAttribute(a.getName().getNamespaceURI(), a.getName().getLocalPart(),
qnameToS(a.getName()), a.getDTDType(), a.getValue());
}
return toReturn;
}
private String qnameToS(QName name) {
if (name.getPrefix().length() == 0) return name.getLocalPart();
else return name.getPrefix() + ":" + name.getLocalPart();
}
static class LocatorConv implements Locator {
private final XMLStreamReader reader;
public LocatorConv(XMLStreamReader reader) { this.reader = reader; }
@Override
public final String getPublicId() { return reader.getLocation().getPublicId(); }
@Override
public final String getSystemId() { return reader.getLocation().getSystemId(); }
@Override
public final int getLineNumber() { return reader.getLocation().getLineNumber(); }
@Override
public final int getColumnNumber() { return reader.getLocation().getColumnNumber(); }
}
final static LexicalHandler NO_LEXICAL_HANDLER = new LexicalHandler() {
@Override
public void startDTD(String string, String string1, String string2) throws SAXException {}
@Override
public void endDTD() throws SAXException {}
@Override
public void startEntity(String string) throws SAXException {}
@Override
public void endEntity(String string) throws SAXException {}
@Override
public void startCDATA() throws SAXException {}
@Override
public void endCDATA() throws SAXException {}
@Override
public void comment(char[] chars, int i, int i1) throws SAXException {}
};
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment