Skip to content

Instantly share code, notes, and snippets.

@allen-munsch
Created November 13, 2025 15:15
Show Gist options
  • Select an option

  • Save allen-munsch/295ab0b944ae0c2816896945e3f168a2 to your computer and use it in GitHub Desktop.

Select an option

Save allen-munsch/295ab0b944ae0c2816896945e3f168a2 to your computer and use it in GitHub Desktop.
test flatbuffers for cloud events
namespace io.cloudevents;
// Key-value pair for extension attributes
table ExtensionAttributes {
key: string (required);
value: string (required);
}
// Main CloudEvent record
table CloudEvent {
// REQUIRED attributes
id: string (required);
source: string (required);
specversion: string (required);
type: string (required);
// OPTIONAL attributes
datacontenttype: string;
dataschema: string;
subject: string;
time: string;
// Extension attributes
extensions: [ExtensionAttributes];
// Event data payload
data: [ubyte];
}
root_type CloudEvent;
#! /bin/bash
flatc --python cloudevents.fbs
# generates an io.cloudevents module
#!/usr/bin/env python3
"""
Test to demonstrate how FlatBuffers handles optional fields.
"How are these optional fields serialized?
For example, if 'subject' is missing, is there still some 'subject' type
of entry there? Does the word 'subject' actually appear in the serialization?"
"""
import flatbuffers
import sys
from .io.cloudevents.CloudEvent import CloudEvent, Start, AddId, AddSource, AddSpecversion, AddType, AddSubject, End
def create_cloudevent_with_subject(builder):
"""Create a CloudEvent WITH the optional 'subject' field"""
# Create strings
id_str = builder.CreateString("event-123")
source_str = builder.CreateString("https://example.com/source")
specversion_str = builder.CreateString("1.0")
type_str = builder.CreateString("com.example.event")
subject_str = builder.CreateString("my-subject") # OPTIONAL FIELD INCLUDED
# Build CloudEvent
Start(builder)
AddId(builder, id_str)
AddSource(builder, source_str)
AddSpecversion(builder, specversion_str)
AddType(builder, type_str)
AddSubject(builder, subject_str) # Adding optional field
event = End(builder)
builder.Finish(event)
return bytes(builder.Output())
def create_cloudevent_without_subject(builder):
"""Create a CloudEvent WITHOUT the optional 'subject' field"""
# Create strings
id_str = builder.CreateString("event-456")
source_str = builder.CreateString("https://example.com/source")
specversion_str = builder.CreateString("1.0")
type_str = builder.CreateString("com.example.event")
# NO subject_str created
# Build CloudEvent
Start(builder)
AddId(builder, id_str)
AddSource(builder, source_str)
AddSpecversion(builder, specversion_str)
AddType(builder, type_str)
# NO AddSubject call - optional field omitted
event = End(builder)
builder.Finish(event)
return bytes(builder.Output())
def hex_dump(data, label):
"""Pretty print hex dump of binary data"""
print(f"\n{'='*70}")
print(f"{label}")
print(f"{'='*70}")
print(f"Total size: {len(data)} bytes")
print(f"\nHex dump:")
for i in range(0, len(data), 16):
hex_part = ' '.join(f'{b:02x}' for b in data[i:i+16])
ascii_part = ''.join(chr(b) if 32 <= b < 127 else '.' for b in data[i:i+16])
print(f"{i:04x}: {hex_part:<48} {ascii_part}")
# Check if "subject" appears as a string in the binary
try:
text = data.decode('ascii', errors='ignore')
if 'subject' in text.lower():
print(f"\n⚠️ WARNING: The word 'subject' APPEARS in the binary data!")
print(f" Position: {text.lower().index('subject')}")
else:
print(f"\n✓ The word 'subject' does NOT appear in the binary data")
except:
pass
def analyze_structure(data, label):
"""Parse and print the real vtable layout of a FlatBuffer with full annotations."""
print(f"\n{'-'*70}")
print(f"Structure Analysis: {label}")
print(f"{'-'*70}")
if len(data) < 8:
print("Buffer too small to contain a valid FlatBuffer")
return
# The first 4 bytes contain the root table offset (absolute from buffer start)
root_table_offset = int.from_bytes(data[0:4], 'little')
print(f"Root table offset (absolute from start): {root_table_offset}")
# The root table begins at this offset
table_start = root_table_offset
print(f"Table starts at: {table_start}")
# At the start of the table, there’s a signed relative offset to the vtable
vtable_rel_off = int.from_bytes(data[table_start:table_start+4], 'little', signed=True)
vtable_start = table_start - vtable_rel_off
print(f"VTable relative offset: {vtable_rel_off}")
print(f"VTable starts at: {vtable_start}")
# Read vtable header
vtable_size = int.from_bytes(data[vtable_start:vtable_start+2], 'little')
object_size = int.from_bytes(data[vtable_start+2:vtable_start+4], 'little')
num_fields = (vtable_size - 4) // 2
print(f"VTable size: {vtable_size} bytes")
print(f"Object inline size: {object_size} bytes")
print(f"Number of field entries: {num_fields}")
# Define all fields with required/optional info
fields = [
("id", True),
("source", True),
("specversion", True),
("type", True),
("datacontenttype", False),
("dataschema", False),
("subject", False),
("time", False),
("extensions", False),
("data", False)
]
print(f"\n{'Field':20s} {'Required?':>10s} {'Offset (rel)':>15s} {'Absolute Offset':>18s} {'Present?':>10s}")
print('-' * 80)
for i, (field_name, required) in enumerate(fields):
if i >= num_fields:
print(f"{field_name:20s} {str(required):>10s} {'-':>15s} {'-':>18s} {'NO':>10s}")
continue
entry_pos = vtable_start + 4 + (i * 2)
field_offset = int.from_bytes(data[entry_pos:entry_pos+2], 'little')
present = field_offset != 0
abs_offset = table_start + field_offset if present else "-"
print(f"{field_name:20s} {str(required):>10s} {field_offset:>15} {str(abs_offset):>18} {str(present):>10s}")
def test_optional_field_serialization():
"""
Main test demonstrating FlatBuffers optional field behavior.
KEY FINDINGS:
1. Field names (like "subject") do NOT appear in the binary serialization
2. Optional fields that are not set have NO representation in the binary data
3. The VTable contains offset entries - if a field is not set, its offset is 0
4. The binary size is smaller when optional fields are omitted
"""
print("\n" + "="*70)
print("FLATBUFFERS OPTIONAL FIELD SERIALIZATION TEST")
print("="*70)
print("\nQuestion: How are optional fields serialized?")
print("Specifically: Does 'subject' appear in the binary when omitted?")
# Test 1: WITH optional field
builder1 = flatbuffers.Builder(1024)
data_with_subject = create_cloudevent_with_subject(builder1)
hex_dump(data_with_subject, "CloudEvent WITH 'subject' field")
analyze_structure(data_with_subject, "WITH subject")
# Test 2: WITHOUT optional field
builder2 = flatbuffers.Builder(1024)
data_without_subject = create_cloudevent_without_subject(builder2)
hex_dump(data_without_subject, "CloudEvent WITHOUT 'subject' field")
analyze_structure(data_without_subject, "WITHOUT subject")
# Comparison
print(f"\n{'='*70}")
print("COMPARISON & CONCLUSIONS")
print(f"{'='*70}")
print(f"Size with subject: {len(data_with_subject)} bytes")
print(f"Size without subject: {len(data_without_subject)} bytes")
print(f"Difference: {len(data_with_subject) - len(data_without_subject)} bytes")
# Verify by reading back
print("\n" + "="*70)
print("VERIFICATION: Reading back the data")
print("="*70)
event_with = CloudEvent.GetRootAs(data_with_subject, 0)
print(f"\nEvent WITH subject:")
print(f" ID: {event_with.Id().decode('utf-8')}")
print(f" Subject: {event_with.Subject().decode('utf-8') if event_with.Subject() else 'None'}")
event_without = CloudEvent.GetRootAs(data_without_subject, 0)
print(f"\nEvent WITHOUT subject:")
print(f" ID: {event_without.Id().decode('utf-8')}")
print(f" Subject: {event_without.Subject().decode('utf-8') if event_without.Subject() else 'None'}")
print("\n" + "="*70)
print("TEST COMPLETE")
print("="*70)
if __name__ == "__main__":
test_optional_field_serialization()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment