Skip to content

Instantly share code, notes, and snippets.

@romain-grecourt
Created April 15, 2020 03:53
Show Gist options
  • Save romain-grecourt/8218a1ba2a6c91cbcadbddd9d8f9f28e to your computer and use it in GitHub Desktop.
Save romain-grecourt/8218a1ba2a6c91cbcadbddd9d8f9f28e to your computer and use it in GitHub Desktop.
reactive html encoder
/**
* Reactive HTML entities "encoder".
*/
class HtmlEntityEncoder extends BaseProcessor<DataChunk, DataChunk> {
private static final ByteBufferCache BBCACHE = new ByteBufferCache(
(int index) -> ByteBuffer.wrap(HtmlEscape.ENTITIES[index][1].getBytes()), HtmlEscape.ENTITIES.length);
protected final VirtualBuffer vbuf = new VirtualBuffer();
private boolean started;
/**
* Create a new encoder.
*/
HtmlEntityEncoder() {
}
protected final void insert(ByteBuffer bb) {
vbuf.insert(bb);
vbuf.incrementPosition(bb.remaining());
}
/**
* Signals the start of the page.
*/
protected void onPageStart(){ }
/**
* Signals the end of the page.
*/
protected void onPageEnd() { }
@Override
public final void hookOnNext(DataChunk item) {
vbuf.offer(item.data());
VirtualChunk.Parent parent = new VirtualChunk.Parent(item);
if (!started) {
onPageStart();
started = true;
}
while (vbuf.remaining() > 0) {
byte b1 = vbuf.get();
int mark = vbuf.position();
int limit = vbuf.limit();
int nbytes = 0;
char c = '?';
if (b1 >= 0) {
nbytes = 1;
c = (char) b1;
} else if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0) {
// 2 bytes, 11 bits: 110xxxxx 10xxxxxx
int b2 = vbuf.get();
if (!((b2 & 0xc0) != 0x80)) {
nbytes = 2;
c = (char) (((b1 << 6) ^ b2)
^ (((byte) 0xC0 << 6)
^ ((byte) 0x80)));
}
} else if ((b1 >> 4) == -2) {
// 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx
int remaining = limit - mark;
if (remaining >= 2) {
int b2 = vbuf.get();
int b3 = vbuf.get();
if (!((b1 == (byte) 0xe0 && (b2 & 0xe0) == 0x80)
|| (b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80)) {
c = (char) ((b1 << 12)
^ (b2 << 6)
^ (b3
^ (((byte) 0xE0 << 12)
^ ((byte) 0x80 << 6)
^ ((byte) 0x80))));
if (!Character.isSurrogate(c)) {
nbytes = 3;
}
}
}
}
int htmlEntityId = HtmlEscape.entityId(c);
if (htmlEntityId >= 0) {
vbuf.deletePrevious(nbytes);
insert(BBCACHE.get(htmlEntityId));
}
}
// It's not possible to submit a VirtualBuffer as-is (yet)
// using drop() to get the underlying buffers as-is
for (ByteBuffer bb : vbuf.position(0).drop()) {
submit(new VirtualChunk(parent, bb));
// re-using buffers
bb.position(0);
}
}
@Override
protected void hookOnComplete() {
onPageEnd();
for (ByteBuffer bb : vbuf.position(0).drop()) {
submit(DataChunk.create(bb));
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment