Skip to content

Instantly share code, notes, and snippets.

@felipecrv
Last active November 24, 2025 02:19
Show Gist options
  • Select an option

  • Save felipecrv/16a75847380dfbbe148bdaa2c7417bf3 to your computer and use it in GitHub Desktop.

Select an option

Save felipecrv/16a75847380dfbbe148bdaa2c7417bf3 to your computer and use it in GitHub Desktop.
Serialization of Cap'n Proto messages to JSON using runtime reflection [1], serde_json [2], and base64 [3].
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
use capnp::introspect::TypeVariant;
use capnp::schema::FieldSubset;
use capnp::schema_capnp::field;
use capnp::{dynamic_struct, dynamic_value};
use serde_core::ser::Error;
use serde_json::ser::{self, CharEscape, Formatter};
use std::{fmt, io};
/// Options for JSON serialization.
pub struct SerOptions {
/// Insert newlines, indentation, and other extra spacing into the output.
/// The default is to use minimal whitespace (pretty = false).
pub pretty: bool,
/// Indentation string to use when pretty printing (pretty = true). The default is two spaces.
pub indent: &'static [u8],
/// Maximum nesting depth when decoding JSON to prevent highly nested input from overflowing
/// the call stack. The default is 64.
pub max_nesting_depth: usize,
/// See [HasMode].
pub has_mode: HasMode,
/// Whether to always print enums as ints.
///
/// By default they are rendered as strings.
pub always_print_enums_as_int: bool,
/// See [BinaryFormat].
pub binary_format: BinaryFormat,
}
impl Default for SerOptions {
fn default() -> Self {
Self {
pretty: false,
indent: b" ",
has_mode: HasMode::NonNull,
max_nesting_depth: 64,
always_print_enums_as_int: false,
binary_format: BinaryFormat::ArrayOfInts,
}
}
}
impl SerOptions {
/// Compact JSON output (no extra whitespace).
pub fn compact() -> Self {
Self::default()
}
/// Pretty-printed JSON output (with newlines and indentation).
pub fn pretty() -> Self {
Self {
pretty: true,
..Self::default()
}
}
/// Pretty-printed JSON output with custom indentation string.
pub fn with_indent(indent: &'static [u8]) -> Self {
Self {
pretty: true,
indent,
..Self::default()
}
}
}
/// Normally, primitive field values are always included even if they are equal
/// to the default value ([HasMode::NonNull] -- only null pointers are omitted).
/// You can use [HasMode::NonDefault] to specify that default-valued primitive
/// fields should be omitted as well.
#[derive(Clone, Copy)]
pub enum HasMode {
/// Only omit null pointers.
NonNull,
/// Omit default-valued primitive fields as well as null pointers.
NonDefault,
}
/// Format to use for binary data (Cap'n Proto Data fields).
#[derive(Clone, Copy)]
pub enum BinaryFormat {
/// Encode Data fields as JSON arrays of integers.
///
/// The C++ implementation renders data as a JSON array of integers.
ArrayOfInts,
/// Encode Data fields as base64 strings.
Base64,
}
/// Serialize to JSON.
///
/// Takes a `&mut io::Write` and writes JSON representation of Cap'n Proto dynamic values to it.
pub fn serialize<W>(
writer: &mut W,
value: dynamic_value::Reader,
options: &SerOptions,
) -> Result<(), serde_json::Error>
where
W: io::Write,
{
if options.pretty {
Serializer::pretty(writer, options).serialize(value)
} else {
Serializer::compact(writer, options).serialize(value)
}
}
struct Serializer<W, F = ser::CompactFormatter> {
/// The destination the formatter will write to.
writer: W,
/// The formatter that controls how JSON is written.
formatter: F,
/// Maximum nesting depth remaining. See [SerOptions::max_nesting_depth].
max_nesting_depth: usize,
/// See [HasMode].
has_mode: HasMode,
/// Whether to always print enums as ints. See [SerOptions::always_print_enums_as_int].
always_print_enums_as_int: bool,
/// See [BinaryFormat].
binary_format: BinaryFormat,
}
impl<W> Serializer<W>
where
W: io::Write,
{
/// Creates a new JSON serializer for Cap'n Proto.
#[inline]
pub fn compact(writer: W, options: &SerOptions) -> Self {
Self {
writer,
formatter: ser::CompactFormatter,
max_nesting_depth: options.max_nesting_depth,
has_mode: options.has_mode,
always_print_enums_as_int: options.always_print_enums_as_int,
binary_format: options.binary_format,
}
}
}
impl<'a, W> Serializer<W, ser::PrettyFormatter<'a>>
where
W: io::Write,
{
/// Creates a new JSON pretty print serializer for Cap'n Proto that uses the `indent` string
/// for indentation.
#[inline]
pub fn pretty(writer: W, options: &SerOptions) -> Self {
Self {
writer,
formatter: ser::PrettyFormatter::with_indent(options.indent),
max_nesting_depth: options.max_nesting_depth,
has_mode: options.has_mode,
always_print_enums_as_int: options.always_print_enums_as_int,
binary_format: options.binary_format,
}
}
}
impl<W, F> Serializer<W, F>
where
W: io::Write,
F: ser::Formatter,
{
pub fn serialize(&mut self, value: dynamic_value::Reader) -> Result<(), serde_json::Error> {
// Cap'n Proto C++ implementation [1].
//
// [1] https://github.com/capnproto/capnproto/blob/164284a476eee4bfbcf7cce2a9a82fb156504210/c%2B%2B/src/capnp/compat/json.c%2B%2B#L225
use base64::engine::general_purpose::STANDARD;
use base64::write::EncoderWriter;
use io::Write as _;
use serde_json::Error;
match value {
dynamic_value::Reader::Void => self
.formatter
.write_null(&mut self.writer)
.map_err(Error::io),
dynamic_value::Reader::Bool(x) => self
.formatter
.write_bool(&mut self.writer, x)
.map_err(Error::io),
dynamic_value::Reader::Int8(x) => self
.formatter
.write_i8(&mut self.writer, x)
.map_err(Error::io),
dynamic_value::Reader::Int16(x) => self
.formatter
.write_i16(&mut self.writer, x)
.map_err(Error::io),
dynamic_value::Reader::Int32(x) => self
.formatter
.write_i32(&mut self.writer, x)
.map_err(Error::io),
dynamic_value::Reader::Int64(x) => self
.formatter
.write_i64(&mut self.writer, x)
.map_err(Error::io),
dynamic_value::Reader::UInt8(x) => self
.formatter
.write_u8(&mut self.writer, x)
.map_err(Error::io),
dynamic_value::Reader::UInt16(x) => self
.formatter
.write_u16(&mut self.writer, x)
.map_err(Error::io),
dynamic_value::Reader::UInt32(x) => self
.formatter
.write_u32(&mut self.writer, x)
.map_err(Error::io),
dynamic_value::Reader::UInt64(x) => self
.formatter
.write_u64(&mut self.writer, x)
.map_err(Error::io),
dynamic_value::Reader::Float32(x) => self
.formatter
.write_f32(&mut self.writer, x)
.map_err(Error::io),
dynamic_value::Reader::Float64(x) => self
.formatter
.write_f64(&mut self.writer, x)
.map_err(Error::io),
dynamic_value::Reader::Enum(e) => {
if !self.always_print_enums_as_int {
if let Some(enumerant) = c2j(e.get_enumerant())? {
let name = c2j(enumerant.get_proto().get_name())?;
// safe=true: enum names are always JSON-safe
return self.serialize_text(name, true).map_err(Error::io);
}
}
self.formatter
.write_u16(&mut self.writer, e.get_value())
.map_err(Error::io)
}
dynamic_value::Reader::Text(t) => self.serialize_text(t, false).map_err(Error::io),
dynamic_value::Reader::Data(d) => {
match self.binary_format {
BinaryFormat::ArrayOfInts => self
.formatter
.write_byte_array(&mut self.writer, d)
.map_err(Error::io),
BinaryFormat::Base64 => {
self.formatter
.begin_string(&mut self.writer)
.map_err(Error::io)?;
{
// JSON string literal escaping is not necessary because
// all base64 characters are safe.
let mut encoder =
EncoderWriter::new(WriteRef::new(&mut self.writer), &STANDARD);
encoder
.write_all(d)
.and_then(|_| encoder.finish())
.map_err(Error::io)?;
}
self.formatter
.end_string(&mut self.writer)
.map_err(Error::io)
}
}
}
dynamic_value::Reader::List(list) => {
self.formatter
.begin_array(&mut self.writer)
.map_err(Error::io)?;
for (idx, value) in list.iter().enumerate() {
self.formatter
.begin_array_value(&mut self.writer, idx == 0)
.map_err(Error::io)?;
self.begin_nested().map_err(Error::io)?;
self.serialize(c2j(value)?)?;
self.end_nested();
self.formatter
.end_array_value(&mut self.writer)
.map_err(Error::io)?;
}
self.formatter
.end_array(&mut self.writer)
.map_err(Error::io)
}
dynamic_value::Reader::Struct(st) => {
let schema = st.get_schema();
let non_union_fields: FieldSubset = c2j(schema.get_non_union_fields())?;
let mut has_field = SmallBitset::new(non_union_fields.len() as usize);
let mut field_count = 0;
for (i, field) in non_union_fields.iter().enumerate() {
if c2j(has(&st, field, self.has_mode))? {
field_count += 1;
has_field.set(i);
}
}
// We try to write the union field, if any, in proper order with the rest.
let mut which = c2j(st.which())?;
let has_union_field = if let Some(field) = which {
// Even if the union field is null, if it is not the default field
// of the union then we have to print it anyway.
let union_field_is_null = !has(&st, field, self.has_mode).unwrap_or(false);
if field.get_proto().get_discriminant_value() != 0 || !union_field_is_null {
field_count += 1;
true
} else {
which = None;
false
}
} else {
false
};
self.formatter
.begin_object(&mut self.writer)
.map_err(Error::io)?;
let mut pos = 0;
let mut first = true;
for (i, field) in non_union_fields.iter().enumerate() {
if let Some(union_field) = which {
if union_field.get_index() < field.get_index() {
pos += 1;
let name = c2j(union_field.get_proto().get_name())?;
if has_union_field {
let value = c2j(st.get(union_field))?;
self.serialize_struct_field(name, value, first)?;
first = false;
}
which = None;
}
}
if has_field.get(i) {
pos += 1;
let name = c2j(field.get_proto().get_name())?;
let value = c2j(st.get(field))?;
self.serialize_struct_field(name, value, first)?;
first = false;
}
}
if let Some(union_field) = which {
// Union field not printed yet; must be last.
pos += 1;
let name = c2j(union_field.get_proto().get_name())?;
if has_union_field {
let value = c2j(st.get(union_field))?;
self.serialize_struct_field(name, value, first)?;
}
}
debug_assert!(pos == field_count);
self.formatter
.end_object(&mut self.writer)
.map_err(Error::io)
}
dynamic_value::Reader::AnyPointer(_) => {
unimplemented!("cannot serialize AnyPointer to JSON")
}
dynamic_value::Reader::Capability(_) => {
unimplemented!("cannot serialize Capability to JSON")
}
}
}
#[inline]
fn serialize_struct_field(
&mut self,
name: capnp::text::Reader<'static>,
value: dynamic_value::Reader<'_>,
first: bool,
) -> Result<(), serde_json::Error> {
use serde_json::Error;
self.formatter
.begin_object_key(&mut self.writer, first)
.map_err(Error::io)?;
// safe=true: struct field names are always JSON-safe
self.serialize_text(name, true).map_err(Error::io)?;
self.formatter
.end_object_key(&mut self.writer)
.map_err(Error::io)?;
self.formatter
.begin_object_value(&mut self.writer)
.map_err(Error::io)?;
self.begin_nested().map_err(Error::io)?;
self.serialize(value)?;
self.end_nested();
self.formatter
.end_object_value(&mut self.writer)
.map_err(Error::io)?;
Ok(())
}
#[inline]
fn serialize_text(&mut self, reader: capnp::text::Reader<'_>, safe: bool) -> io::Result<()> {
match reader.to_str() {
Ok(s) => {
self.formatter.begin_string(&mut self.writer)?;
if safe {
self.writer.write_all(s.as_bytes())?;
} else {
format_escaped_str_contents(&mut self.writer, &mut self.formatter, s)?;
}
self.formatter.end_string(&mut self.writer)
}
Err(_) => {
self.formatter.begin_string(&mut self.writer)?;
// JSON string literal escaping is not necessary because the bytes are rendered
// by the `fmt::Debug` impl for `&[u8]`, which renders it as `[1, 2, 3, ...]`.
write!(&mut self.writer, "<invalid utf-8: {:?}>", reader.as_bytes())?;
self.formatter.end_string(&mut self.writer)
}
}
}
#[inline]
fn begin_nested(&mut self) -> io::Result<()> {
if self.max_nesting_depth > 0 {
self.max_nesting_depth -= 1;
Ok(())
} else {
Err(io::Error::other("maximum nesting depth exceeded"))
}
}
#[inline]
fn end_nested(&mut self) {
self.max_nesting_depth += 1;
}
}
const BB: u8 = b'b'; // \x08
const TT: u8 = b't'; // \x09
const NN: u8 = b'n'; // \x0A
const FF: u8 = b'f'; // \x0C
const RR: u8 = b'r'; // \x0D
const QU: u8 = b'"'; // \x22
const BS: u8 = b'\\'; // \x5C
const UU: u8 = b'u'; // \x00...\x1F except the ones above
const __: u8 = 0;
// Lookup table of escape sequences. A value of b'x' at index i means that byte
// i is escaped as "\x" in JSON. A value of 0 means that byte i is not escaped.
static ESCAPE: [u8; 256] = [
// 1 2 3 4 5 6 7 8 9 A B C D E F
UU, UU, UU, UU, UU, UU, UU, UU, BB, TT, NN, UU, FF, RR, UU, UU, // 0
UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, // 1
__, __, QU, __, __, __, __, __, __, __, __, __, __, __, __, __, // 2
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 3
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 4
__, __, __, __, __, __, __, __, __, __, __, __, BS, __, __, __, // 5
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 6
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 7
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 8
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 9
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // A
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // B
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // C
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // D
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // E
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // F
];
fn format_escaped_str_contents<W, F>(
writer: &mut W,
formatter: &mut F,
value: &str,
) -> io::Result<()>
where
W: ?Sized + io::Write,
F: ?Sized + Formatter,
{
let mut bytes = value.as_bytes();
let mut i = 0;
while i < bytes.len() {
let (string_run, rest) = bytes.split_at(i);
let (&byte, rest) = rest.split_first().unwrap();
let escape = ESCAPE[byte as usize];
i += 1;
if escape == 0 {
continue;
}
bytes = rest;
i = 0;
// Safety: string_run is a valid utf8 string, since we only split on ascii sequences
let string_run = unsafe { str::from_utf8_unchecked(string_run) };
if !string_run.is_empty() {
formatter.write_string_fragment(writer, string_run)?;
}
let char_escape = match escape {
BB => CharEscape::Backspace,
TT => CharEscape::Tab,
NN => CharEscape::LineFeed,
FF => CharEscape::FormFeed,
RR => CharEscape::CarriageReturn,
QU => CharEscape::Quote,
BS => CharEscape::ReverseSolidus,
UU => CharEscape::AsciiControl(byte),
// Safety: the escape table does not contain any other type of character.
_ => unsafe { std::hint::unreachable_unchecked() },
};
formatter.write_char_escape(writer, char_escape)?;
}
// Safety: bytes is a valid utf8 string, since we only split on ascii sequences
let string_run = unsafe { str::from_utf8_unchecked(bytes) };
if string_run.is_empty() {
return Ok(());
}
formatter.write_string_fragment(writer, string_run)
}
/// A alternative to `dynamic_struct::Reader::has(field)` that supports [HasMode].
///
/// It's inspired by the C++ implementation [1]. It would take fewer branches with
/// access to private fields of `dynamic_struct::Reader`, but we don't have that here.
///
/// [1] https://github.com/capnproto/capnproto/blob/c6e0a03f7398d985fedcfa40c95d1d9ecb36a92d/c%2B%2B/src/capnp/dynamic.c%2B%2B#L417
fn has(
reader: &dynamic_struct::Reader<'_>,
field: capnp::schema::Field,
has_mode: HasMode,
) -> capnp::Result<bool> {
let has_non_null = reader.has(field)?;
if !has_non_null {
return Ok(false);
}
// has_non_null is true from here
match has_mode {
HasMode::NonNull => return Ok(true), // has_non_null is true
HasMode::NonDefault => (), // has_non_default is handled below
}
// has_mode == HasMode::NonDefault from here
let proto = field.get_proto();
let _slot = match proto.which()? {
field::Slot(s) => s,
field::Group(_) => return Ok(true), // groups don't have defined default values
};
let ty = field.get_type();
let value = reader.get(field)?;
use dynamic_value::Reader::*;
let has_non_default: bool = match (ty.which(), value) {
(TypeVariant::Void, _) => {
// Void is always equal to the default.
false
}
(TypeVariant::Bool, Bool(b)) => b, // true is non-default
(TypeVariant::Int8, Int8(i)) => i != 0,
(TypeVariant::Int16, Int16(i)) => i != 0,
(TypeVariant::Int32, Int32(i)) => i != 0,
(TypeVariant::Int64, Int64(i)) => i != 0,
(TypeVariant::UInt8, UInt8(i)) => i != 0,
(TypeVariant::UInt16, UInt16(i)) => i != 0,
(TypeVariant::UInt32, UInt32(i)) => i != 0,
(TypeVariant::UInt64, UInt64(i)) => i != 0,
(TypeVariant::Float32, Float32(f)) => f.to_bits() != 0,
(TypeVariant::Float64, Float64(f)) => f.to_bits() != 0,
(TypeVariant::Enum(_), Enum(_)) => proto.get_discriminant_value() != 0,
(
TypeVariant::Bool
| TypeVariant::Int8
| TypeVariant::Int16
| TypeVariant::Int32
| TypeVariant::Int64
| TypeVariant::UInt8
| TypeVariant::UInt16
| TypeVariant::UInt32
| TypeVariant::UInt64
| TypeVariant::Float32
| TypeVariant::Float64
| TypeVariant::Enum(_),
_,
) => {
debug_assert!(false, "mismatched struct field type and dynamic value type");
true
}
(
TypeVariant::Text
| TypeVariant::Data
| TypeVariant::Struct(_)
| TypeVariant::List(_)
| TypeVariant::AnyPointer
| TypeVariant::Capability,
_,
) => {
// the default for these types is null, but has_non_null is already true
true
}
};
Ok(has_non_default)
}
/// Convert a Cap'n Proto result to a serde_json result.
fn c2j<T>(r: capnp::Result<T>) -> Result<T, serde_json::Error> {
r.map_err(|e| {
use fmt::Write as _;
let mut message = String::new();
let _ = write!(&mut message, "Cap'n Proto: {}", e);
serde_json::Error::custom(message)
})
}
/// A wrapper around a `&mut io::Write` so that [EncodeWriter::new](base64::write::EncoderWriter::new)
/// value requirement can be satisfied.
struct WriteRef<'a, W: io::Write + ?Sized> {
inner: &'a mut W,
}
impl<'a, W: io::Write + ?Sized> WriteRef<'a, W> {
#[inline]
pub fn new(inner: &'a mut W) -> Self {
WriteRef { inner }
}
}
impl<'a, W: io::Write + ?Sized> io::Write for WriteRef<'a, W> {
#[inline]
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
self.inner.write(buf)
}
#[inline]
fn flush(&mut self) -> io::Result<()> {
self.inner.flush()
}
#[inline]
fn write_vectored(&mut self, bufs: &[io::IoSlice<'_>]) -> io::Result<usize> {
self.inner.write_vectored(bufs)
}
#[inline]
fn write_all(&mut self, buf: &[u8]) -> io::Result<()> {
self.inner.write_all(buf)
}
#[inline]
fn write_fmt(&mut self, args: fmt::Arguments<'_>) -> io::Result<()> {
self.inner.write_fmt(args)
}
#[inline]
fn by_ref(&mut self) -> &mut Self
where
Self: Sized,
{
self
}
}
struct SmallBitset {
small: u64,
len: usize, // <= 64 or 64 + extra.len()
extra: Vec<bool>,
}
impl SmallBitset {
#[inline]
pub fn new(len: usize) -> Self {
let extra = if len <= 64 {
Vec::new()
} else {
vec![false; len - 64]
};
Self {
small: 0,
len,
extra,
}
}
#[inline]
pub fn set(&mut self, index: usize) {
if index < 64 {
self.small |= 1 << index;
} else {
self.extra[index - 64] = true;
}
}
#[inline]
pub fn get(&self, index: usize) -> bool {
if index < 64 {
(self.small & (1 << index)) != 0
} else {
self.extra[index - 64]
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_bitset() {
let mut bitset = SmallBitset::new(10);
for i in 0..10 {
assert!(!bitset.get(i));
}
bitset.set(0);
bitset.set(5);
bitset.set(9);
assert!(bitset.get(0));
assert!(!bitset.get(1));
assert!(!bitset.get(4));
assert!(bitset.get(5));
assert!(!bitset.get(8));
assert!(bitset.get(9));
let mut bitset = SmallBitset::new(100);
// Test bits in the small range (< 64)
bitset.set(10);
bitset.set(63);
assert!(bitset.get(63));
assert!(!bitset.get(64));
// Test bits in the extra range (>= 64)
bitset.set(64);
assert!(bitset.get(64));
bitset.set(99);
assert!(bitset.get(10));
assert!(bitset.get(64));
assert!(bitset.get(99));
// Test some unset bits
assert!(!bitset.get(0));
assert!(!bitset.get(50));
assert!(!bitset.get(65));
assert!(!bitset.get(70));
}
fn escape_str(input: &str) -> String {
let mut output = Vec::new();
let mut formatter = ser::CompactFormatter;
format_escaped_str_contents(&mut output, &mut formatter, input).unwrap();
String::from_utf8(output).unwrap()
}
#[test]
fn testformat_escaped_str_contents() {
let examples = [
(" ", " "),
(" foo ", " foo "),
(" \" ", " \\\" "),
(" \x08 ", " \\b "),
(" \n ", " \\n "),
(" \r ", " \\r "),
(" \t ", " \\t "),
(" \u{12ab} ", " \u{12ab} "),
(" \u{AB12} ", " \u{AB12} "),
(" \u{1F395} ", " \u{1F395} "),
];
for (input, expected) in &examples {
let escaped = escape_str(input);
assert_eq!(&escaped, expected);
}
}
// tests are in the tests/ folder as they depend on specific Cap'n Proto schemas
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment