allen-munsch · November 13, 2025 15:15
diff --git a/cloudevents.fbs b/cloudevents.fbs
 namespace io.cloudevents;
 // Key-value pair for extension attributes
 table ExtensionAttributes {
  key: string (required);
  value: string (required);
 }
 // Main CloudEvent record
 table CloudEvent {
  // REQUIRED attributes
  id: string (required);
  source: string (required);
  specversion: string (required);
  type: string (required);
  // OPTIONAL attributes
  datacontenttype: string;
  dataschema: string;
  subject: string;
  time: string;
  // Extension attributes
  extensions: [ExtensionAttributes];
  // Event data payload
  data: [ubyte];
 }
 root_type CloudEvent;
diff --git a/compile.sh b/compile.sh
 #! /bin/bash

 flatc --python cloudevents.fbs

 # generates an io.cloudevents module
diff --git a/test.py b/test.py
 #!/usr/bin/env python3
 """
 Test to demonstrate how FlatBuffers handles optional fields.

 "How are these optional fields serialized? 
 For example, if 'subject' is missing, is there still some 'subject' type 
 of entry there? Does the word 'subject' actually appear in the serialization?"
 """

 import flatbuffers
 import sys

 from .io.cloudevents.CloudEvent import CloudEvent, Start, AddId, AddSource, AddSpecversion, AddType, AddSubject, End


 def create_cloudevent_with_subject(builder):
    """Create a CloudEvent WITH the optional 'subject' field"""
    # Create strings
    id_str = builder.CreateString("event-123")
    source_str = builder.CreateString("https://example.com/source")
    specversion_str = builder.CreateString("1.0")
    type_str = builder.CreateString("com.example.event")
    subject_str = builder.CreateString("my-subject")  # OPTIONAL FIELD INCLUDED
    
    # Build CloudEvent
    Start(builder)
    AddId(builder, id_str)
    AddSource(builder, source_str)
    AddSpecversion(builder, specversion_str)
    AddType(builder, type_str)
    AddSubject(builder, subject_str)  # Adding optional field
    event = End(builder)
    
    builder.Finish(event)
    return bytes(builder.Output())


 def create_cloudevent_without_subject(builder):
    """Create a CloudEvent WITHOUT the optional 'subject' field"""
    # Create strings
    id_str = builder.CreateString("event-456")
    source_str = builder.CreateString("https://example.com/source")
    specversion_str = builder.CreateString("1.0")
    type_str = builder.CreateString("com.example.event")
    # NO subject_str created
    
    # Build CloudEvent
    Start(builder)
    AddId(builder, id_str)
    AddSource(builder, source_str)
    AddSpecversion(builder, specversion_str)
    AddType(builder, type_str)
    # NO AddSubject call - optional field omitted
    event = End(builder)
    
    builder.Finish(event)
    return bytes(builder.Output())


 def hex_dump(data, label):
    """Pretty print hex dump of binary data"""
    print(f"\n{'='*70}")
    print(f"{label}")
    print(f"{'='*70}")
    print(f"Total size: {len(data)} bytes")
    print(f"\nHex dump:")
    
    for i in range(0, len(data), 16):
        hex_part = ' '.join(f'{b:02x}' for b in data[i:i+16])
        ascii_part = ''.join(chr(b) if 32 <= b < 127 else '.' for b in data[i:i+16])
        print(f"{i:04x}:  {hex_part:<48}  {ascii_part}")
    
    # Check if "subject" appears as a string in the binary
    try:
        text = data.decode('ascii', errors='ignore')
        if 'subject' in text.lower():
            print(f"\n⚠️  WARNING: The word 'subject' APPEARS in the binary data!")
            print(f"   Position: {text.lower().index('subject')}")
        else:
            print(f"\n✓ The word 'subject' does NOT appear in the binary data")
    except:
        pass

 def analyze_structure(data, label):
    """Parse and print the real vtable layout of a FlatBuffer with full annotations."""
    print(f"\n{'-'*70}")
    print(f"Structure Analysis: {label}")
    print(f"{'-'*70}")

    if len(data) < 8:
        print("Buffer too small to contain a valid FlatBuffer")
        return

    # The first 4 bytes contain the root table offset (absolute from buffer start)
    root_table_offset = int.from_bytes(data[0:4], 'little')
    print(f"Root table offset (absolute from start): {root_table_offset}")

    # The root table begins at this offset
    table_start = root_table_offset
    print(f"Table starts at: {table_start}")

    # At the start of the table, there’s a signed relative offset to the vtable
    vtable_rel_off = int.from_bytes(data[table_start:table_start+4], 'little', signed=True)
    vtable_start = table_start - vtable_rel_off
    print(f"VTable relative offset: {vtable_rel_off}")
    print(f"VTable starts at: {vtable_start}")

    # Read vtable header
    vtable_size = int.from_bytes(data[vtable_start:vtable_start+2], 'little')
    object_size = int.from_bytes(data[vtable_start+2:vtable_start+4], 'little')
    num_fields = (vtable_size - 4) // 2

    print(f"VTable size: {vtable_size} bytes")
    print(f"Object inline size: {object_size} bytes")
    print(f"Number of field entries: {num_fields}")

    # Define all fields with required/optional info
    fields = [
        ("id", True),
        ("source", True),
        ("specversion", True),
        ("type", True),
        ("datacontenttype", False),
        ("dataschema", False),
        ("subject", False),
        ("time", False),
        ("extensions", False),
        ("data", False)
    ]

    print(f"\n{'Field':20s} {'Required?':>10s} {'Offset (rel)':>15s} {'Absolute Offset':>18s} {'Present?':>10s}")
    print('-' * 80)

    for i, (field_name, required) in enumerate(fields):
        if i >= num_fields:
            print(f"{field_name:20s} {str(required):>10s} {'-':>15s} {'-':>18s} {'NO':>10s}")
            continue
        entry_pos = vtable_start + 4 + (i * 2)
        field_offset = int.from_bytes(data[entry_pos:entry_pos+2], 'little')
        present = field_offset != 0
        abs_offset = table_start + field_offset if present else "-"
        print(f"{field_name:20s} {str(required):>10s} {field_offset:>15} {str(abs_offset):>18} {str(present):>10s}")


 def test_optional_field_serialization():
    """
    Main test demonstrating FlatBuffers optional field behavior.
    
    KEY FINDINGS:
    1. Field names (like "subject") do NOT appear in the binary serialization
    2. Optional fields that are not set have NO representation in the binary data
    3. The VTable contains offset entries - if a field is not set, its offset is 0
    4. The binary size is smaller when optional fields are omitted
    """
    
    print("\n" + "="*70)
    print("FLATBUFFERS OPTIONAL FIELD SERIALIZATION TEST")
    print("="*70)
    print("\nQuestion: How are optional fields serialized?")
    print("Specifically: Does 'subject' appear in the binary when omitted?")
    
    # Test 1: WITH optional field
    builder1 = flatbuffers.Builder(1024)
    data_with_subject = create_cloudevent_with_subject(builder1)
    hex_dump(data_with_subject, "CloudEvent WITH 'subject' field")
    analyze_structure(data_with_subject, "WITH subject")
    
    # Test 2: WITHOUT optional field
    builder2 = flatbuffers.Builder(1024)
    data_without_subject = create_cloudevent_without_subject(builder2)
    hex_dump(data_without_subject, "CloudEvent WITHOUT 'subject' field")
    analyze_structure(data_without_subject, "WITHOUT subject")
    
    # Comparison
    print(f"\n{'='*70}")
    print("COMPARISON & CONCLUSIONS")
    print(f"{'='*70}")
    print(f"Size with subject:    {len(data_with_subject)} bytes")
    print(f"Size without subject: {len(data_without_subject)} bytes")
    print(f"Difference:           {len(data_with_subject) - len(data_without_subject)} bytes")

    # Verify by reading back
    print("\n" + "="*70)
    print("VERIFICATION: Reading back the data")
    print("="*70)
    
    event_with = CloudEvent.GetRootAs(data_with_subject, 0)
    print(f"\nEvent WITH subject:")
    print(f"  ID: {event_with.Id().decode('utf-8')}")
    print(f"  Subject: {event_with.Subject().decode('utf-8') if event_with.Subject() else 'None'}")
    
    event_without = CloudEvent.GetRootAs(data_without_subject, 0)
    print(f"\nEvent WITHOUT subject:")
    print(f"  ID: {event_without.Id().decode('utf-8')}")
    print(f"  Subject: {event_without.Subject().decode('utf-8') if event_without.Subject() else 'None'}")
    
    print("\n" + "="*70)
    print("TEST COMPLETE")
    print("="*70)


 if __name__ == "__main__":
    test_optional_field_serialization()
	namespace io.cloudevents;
	// Key-value pair for extension attributes
	table ExtensionAttributes {
	key: string (required);
	value: string (required);
	}
	// Main CloudEvent record
	table CloudEvent {
	// REQUIRED attributes
	id: string (required);
	source: string (required);
	specversion: string (required);
	type: string (required);
	// OPTIONAL attributes
	datacontenttype: string;
	dataschema: string;
	subject: string;
	time: string;
	// Extension attributes
	extensions: [ExtensionAttributes];
	// Event data payload
	data: [ubyte];
	}
	root_type CloudEvent;
	#! /bin/bash

	flatc --python cloudevents.fbs

	# generates an io.cloudevents module
	#!/usr/bin/env python3
	"""
	Test to demonstrate how FlatBuffers handles optional fields.

	"How are these optional fields serialized?
	For example, if 'subject' is missing, is there still some 'subject' type
	of entry there? Does the word 'subject' actually appear in the serialization?"
	"""

	import flatbuffers
	import sys

	from .io.cloudevents.CloudEvent import CloudEvent, Start, AddId, AddSource, AddSpecversion, AddType, AddSubject, End


	def create_cloudevent_with_subject(builder):
	"""Create a CloudEvent WITH the optional 'subject' field"""
	# Create strings
	id_str = builder.CreateString("event-123")
	source_str = builder.CreateString("https://example.com/source")
	specversion_str = builder.CreateString("1.0")
	type_str = builder.CreateString("com.example.event")
	subject_str = builder.CreateString("my-subject") # OPTIONAL FIELD INCLUDED

	# Build CloudEvent
	Start(builder)
	AddId(builder, id_str)
	AddSource(builder, source_str)
	AddSpecversion(builder, specversion_str)
	AddType(builder, type_str)
	AddSubject(builder, subject_str) # Adding optional field
	event = End(builder)

	builder.Finish(event)
	return bytes(builder.Output())


	def create_cloudevent_without_subject(builder):
	"""Create a CloudEvent WITHOUT the optional 'subject' field"""
	# Create strings
	id_str = builder.CreateString("event-456")
	source_str = builder.CreateString("https://example.com/source")
	specversion_str = builder.CreateString("1.0")
	type_str = builder.CreateString("com.example.event")
	# NO subject_str created

	# Build CloudEvent
	Start(builder)
	AddId(builder, id_str)
	AddSource(builder, source_str)
	AddSpecversion(builder, specversion_str)
	AddType(builder, type_str)
	# NO AddSubject call - optional field omitted
	event = End(builder)

	builder.Finish(event)
	return bytes(builder.Output())


	def hex_dump(data, label):
	"""Pretty print hex dump of binary data"""
	print(f"\n{'='*70}")
	print(f"{label}")
	print(f"{'='*70}")
	print(f"Total size: {len(data)} bytes")
	print(f"\nHex dump:")

	for i in range(0, len(data), 16):
	hex_part = ' '.join(f'{b:02x}' for b in data[i:i+16])
	ascii_part = ''.join(chr(b) if 32 <= b < 127 else '.' for b in data[i:i+16])
	print(f"{i:04x}: {hex_part:<48} {ascii_part}")

	# Check if "subject" appears as a string in the binary
	try:
	text = data.decode('ascii', errors='ignore')
	if 'subject' in text.lower():
	print(f"\n⚠️ WARNING: The word 'subject' APPEARS in the binary data!")
	print(f" Position: {text.lower().index('subject')}")
	else:
	print(f"\n✓ The word 'subject' does NOT appear in the binary data")
	except:
	pass

	def analyze_structure(data, label):
	"""Parse and print the real vtable layout of a FlatBuffer with full annotations."""
	print(f"\n{'-'*70}")
	print(f"Structure Analysis: {label}")
	print(f"{'-'*70}")

	if len(data) < 8:
	print("Buffer too small to contain a valid FlatBuffer")
	return

	# The first 4 bytes contain the root table offset (absolute from buffer start)
	root_table_offset = int.from_bytes(data[0:4], 'little')
	print(f"Root table offset (absolute from start): {root_table_offset}")

	# The root table begins at this offset
	table_start = root_table_offset
	print(f"Table starts at: {table_start}")

	# At the start of the table, there’s a signed relative offset to the vtable
	vtable_rel_off = int.from_bytes(data[table_start:table_start+4], 'little', signed=True)
	vtable_start = table_start - vtable_rel_off
	print(f"VTable relative offset: {vtable_rel_off}")
	print(f"VTable starts at: {vtable_start}")

	# Read vtable header
	vtable_size = int.from_bytes(data[vtable_start:vtable_start+2], 'little')
	object_size = int.from_bytes(data[vtable_start+2:vtable_start+4], 'little')
	num_fields = (vtable_size - 4) // 2

	print(f"VTable size: {vtable_size} bytes")
	print(f"Object inline size: {object_size} bytes")
	print(f"Number of field entries: {num_fields}")

	# Define all fields with required/optional info
	fields = [
	("id", True),
	("source", True),
	("specversion", True),
	("type", True),
	("datacontenttype", False),
	("dataschema", False),
	("subject", False),
	("time", False),
	("extensions", False),
	("data", False)
	]

	print(f"\n{'Field':20s} {'Required?':>10s} {'Offset (rel)':>15s} {'Absolute Offset':>18s} {'Present?':>10s}")
	print('-' * 80)

	for i, (field_name, required) in enumerate(fields):
	if i >= num_fields:
	print(f"{field_name:20s} {str(required):>10s} {'-':>15s} {'-':>18s} {'NO':>10s}")
	continue
	entry_pos = vtable_start + 4 + (i * 2)
	field_offset = int.from_bytes(data[entry_pos:entry_pos+2], 'little')
	present = field_offset != 0
	abs_offset = table_start + field_offset if present else "-"
	print(f"{field_name:20s} {str(required):>10s} {field_offset:>15} {str(abs_offset):>18} {str(present):>10s}")


	def test_optional_field_serialization():
	"""
	Main test demonstrating FlatBuffers optional field behavior.

	KEY FINDINGS:
	1. Field names (like "subject") do NOT appear in the binary serialization
	2. Optional fields that are not set have NO representation in the binary data
	3. The VTable contains offset entries - if a field is not set, its offset is 0
	4. The binary size is smaller when optional fields are omitted
	"""

	print("\n" + "="*70)
	print("FLATBUFFERS OPTIONAL FIELD SERIALIZATION TEST")
	print("="*70)
	print("\nQuestion: How are optional fields serialized?")
	print("Specifically: Does 'subject' appear in the binary when omitted?")

	# Test 1: WITH optional field
	builder1 = flatbuffers.Builder(1024)
	data_with_subject = create_cloudevent_with_subject(builder1)
	hex_dump(data_with_subject, "CloudEvent WITH 'subject' field")
	analyze_structure(data_with_subject, "WITH subject")

	# Test 2: WITHOUT optional field
	builder2 = flatbuffers.Builder(1024)
	data_without_subject = create_cloudevent_without_subject(builder2)
	hex_dump(data_without_subject, "CloudEvent WITHOUT 'subject' field")
	analyze_structure(data_without_subject, "WITHOUT subject")

	# Comparison
	print(f"\n{'='*70}")
	print("COMPARISON & CONCLUSIONS")
	print(f"{'='*70}")
	print(f"Size with subject: {len(data_with_subject)} bytes")
	print(f"Size without subject: {len(data_without_subject)} bytes")
	print(f"Difference: {len(data_with_subject) - len(data_without_subject)} bytes")

	# Verify by reading back
	print("\n" + "="*70)
	print("VERIFICATION: Reading back the data")
	print("="*70)

	event_with = CloudEvent.GetRootAs(data_with_subject, 0)
	print(f"\nEvent WITH subject:")
	print(f" ID: {event_with.Id().decode('utf-8')}")
	print(f" Subject: {event_with.Subject().decode('utf-8') if event_with.Subject() else 'None'}")

	event_without = CloudEvent.GetRootAs(data_without_subject, 0)
	print(f"\nEvent WITHOUT subject:")
	print(f" ID: {event_without.Id().decode('utf-8')}")
	print(f" Subject: {event_without.Subject().decode('utf-8') if event_without.Subject() else 'None'}")

	print("\n" + "="*70)
	print("TEST COMPLETE")
	print("="*70)


	if __name__ == "__main__":
	test_optional_field_serialization()