Created
November 13, 2025 15:15
-
-
Save allen-munsch/295ab0b944ae0c2816896945e3f168a2 to your computer and use it in GitHub Desktop.
test flatbuffers for cloud events
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| namespace io.cloudevents; | |
| // Key-value pair for extension attributes | |
| table ExtensionAttributes { | |
| key: string (required); | |
| value: string (required); | |
| } | |
| // Main CloudEvent record | |
| table CloudEvent { | |
| // REQUIRED attributes | |
| id: string (required); | |
| source: string (required); | |
| specversion: string (required); | |
| type: string (required); | |
| // OPTIONAL attributes | |
| datacontenttype: string; | |
| dataschema: string; | |
| subject: string; | |
| time: string; | |
| // Extension attributes | |
| extensions: [ExtensionAttributes]; | |
| // Event data payload | |
| data: [ubyte]; | |
| } | |
| root_type CloudEvent; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #! /bin/bash | |
| flatc --python cloudevents.fbs | |
| # generates an io.cloudevents module |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| """ | |
| Test to demonstrate how FlatBuffers handles optional fields. | |
| "How are these optional fields serialized? | |
| For example, if 'subject' is missing, is there still some 'subject' type | |
| of entry there? Does the word 'subject' actually appear in the serialization?" | |
| """ | |
| import flatbuffers | |
| import sys | |
| from .io.cloudevents.CloudEvent import CloudEvent, Start, AddId, AddSource, AddSpecversion, AddType, AddSubject, End | |
| def create_cloudevent_with_subject(builder): | |
| """Create a CloudEvent WITH the optional 'subject' field""" | |
| # Create strings | |
| id_str = builder.CreateString("event-123") | |
| source_str = builder.CreateString("https://example.com/source") | |
| specversion_str = builder.CreateString("1.0") | |
| type_str = builder.CreateString("com.example.event") | |
| subject_str = builder.CreateString("my-subject") # OPTIONAL FIELD INCLUDED | |
| # Build CloudEvent | |
| Start(builder) | |
| AddId(builder, id_str) | |
| AddSource(builder, source_str) | |
| AddSpecversion(builder, specversion_str) | |
| AddType(builder, type_str) | |
| AddSubject(builder, subject_str) # Adding optional field | |
| event = End(builder) | |
| builder.Finish(event) | |
| return bytes(builder.Output()) | |
| def create_cloudevent_without_subject(builder): | |
| """Create a CloudEvent WITHOUT the optional 'subject' field""" | |
| # Create strings | |
| id_str = builder.CreateString("event-456") | |
| source_str = builder.CreateString("https://example.com/source") | |
| specversion_str = builder.CreateString("1.0") | |
| type_str = builder.CreateString("com.example.event") | |
| # NO subject_str created | |
| # Build CloudEvent | |
| Start(builder) | |
| AddId(builder, id_str) | |
| AddSource(builder, source_str) | |
| AddSpecversion(builder, specversion_str) | |
| AddType(builder, type_str) | |
| # NO AddSubject call - optional field omitted | |
| event = End(builder) | |
| builder.Finish(event) | |
| return bytes(builder.Output()) | |
| def hex_dump(data, label): | |
| """Pretty print hex dump of binary data""" | |
| print(f"\n{'='*70}") | |
| print(f"{label}") | |
| print(f"{'='*70}") | |
| print(f"Total size: {len(data)} bytes") | |
| print(f"\nHex dump:") | |
| for i in range(0, len(data), 16): | |
| hex_part = ' '.join(f'{b:02x}' for b in data[i:i+16]) | |
| ascii_part = ''.join(chr(b) if 32 <= b < 127 else '.' for b in data[i:i+16]) | |
| print(f"{i:04x}: {hex_part:<48} {ascii_part}") | |
| # Check if "subject" appears as a string in the binary | |
| try: | |
| text = data.decode('ascii', errors='ignore') | |
| if 'subject' in text.lower(): | |
| print(f"\n⚠️ WARNING: The word 'subject' APPEARS in the binary data!") | |
| print(f" Position: {text.lower().index('subject')}") | |
| else: | |
| print(f"\n✓ The word 'subject' does NOT appear in the binary data") | |
| except: | |
| pass | |
| def analyze_structure(data, label): | |
| """Parse and print the real vtable layout of a FlatBuffer with full annotations.""" | |
| print(f"\n{'-'*70}") | |
| print(f"Structure Analysis: {label}") | |
| print(f"{'-'*70}") | |
| if len(data) < 8: | |
| print("Buffer too small to contain a valid FlatBuffer") | |
| return | |
| # The first 4 bytes contain the root table offset (absolute from buffer start) | |
| root_table_offset = int.from_bytes(data[0:4], 'little') | |
| print(f"Root table offset (absolute from start): {root_table_offset}") | |
| # The root table begins at this offset | |
| table_start = root_table_offset | |
| print(f"Table starts at: {table_start}") | |
| # At the start of the table, there’s a signed relative offset to the vtable | |
| vtable_rel_off = int.from_bytes(data[table_start:table_start+4], 'little', signed=True) | |
| vtable_start = table_start - vtable_rel_off | |
| print(f"VTable relative offset: {vtable_rel_off}") | |
| print(f"VTable starts at: {vtable_start}") | |
| # Read vtable header | |
| vtable_size = int.from_bytes(data[vtable_start:vtable_start+2], 'little') | |
| object_size = int.from_bytes(data[vtable_start+2:vtable_start+4], 'little') | |
| num_fields = (vtable_size - 4) // 2 | |
| print(f"VTable size: {vtable_size} bytes") | |
| print(f"Object inline size: {object_size} bytes") | |
| print(f"Number of field entries: {num_fields}") | |
| # Define all fields with required/optional info | |
| fields = [ | |
| ("id", True), | |
| ("source", True), | |
| ("specversion", True), | |
| ("type", True), | |
| ("datacontenttype", False), | |
| ("dataschema", False), | |
| ("subject", False), | |
| ("time", False), | |
| ("extensions", False), | |
| ("data", False) | |
| ] | |
| print(f"\n{'Field':20s} {'Required?':>10s} {'Offset (rel)':>15s} {'Absolute Offset':>18s} {'Present?':>10s}") | |
| print('-' * 80) | |
| for i, (field_name, required) in enumerate(fields): | |
| if i >= num_fields: | |
| print(f"{field_name:20s} {str(required):>10s} {'-':>15s} {'-':>18s} {'NO':>10s}") | |
| continue | |
| entry_pos = vtable_start + 4 + (i * 2) | |
| field_offset = int.from_bytes(data[entry_pos:entry_pos+2], 'little') | |
| present = field_offset != 0 | |
| abs_offset = table_start + field_offset if present else "-" | |
| print(f"{field_name:20s} {str(required):>10s} {field_offset:>15} {str(abs_offset):>18} {str(present):>10s}") | |
| def test_optional_field_serialization(): | |
| """ | |
| Main test demonstrating FlatBuffers optional field behavior. | |
| KEY FINDINGS: | |
| 1. Field names (like "subject") do NOT appear in the binary serialization | |
| 2. Optional fields that are not set have NO representation in the binary data | |
| 3. The VTable contains offset entries - if a field is not set, its offset is 0 | |
| 4. The binary size is smaller when optional fields are omitted | |
| """ | |
| print("\n" + "="*70) | |
| print("FLATBUFFERS OPTIONAL FIELD SERIALIZATION TEST") | |
| print("="*70) | |
| print("\nQuestion: How are optional fields serialized?") | |
| print("Specifically: Does 'subject' appear in the binary when omitted?") | |
| # Test 1: WITH optional field | |
| builder1 = flatbuffers.Builder(1024) | |
| data_with_subject = create_cloudevent_with_subject(builder1) | |
| hex_dump(data_with_subject, "CloudEvent WITH 'subject' field") | |
| analyze_structure(data_with_subject, "WITH subject") | |
| # Test 2: WITHOUT optional field | |
| builder2 = flatbuffers.Builder(1024) | |
| data_without_subject = create_cloudevent_without_subject(builder2) | |
| hex_dump(data_without_subject, "CloudEvent WITHOUT 'subject' field") | |
| analyze_structure(data_without_subject, "WITHOUT subject") | |
| # Comparison | |
| print(f"\n{'='*70}") | |
| print("COMPARISON & CONCLUSIONS") | |
| print(f"{'='*70}") | |
| print(f"Size with subject: {len(data_with_subject)} bytes") | |
| print(f"Size without subject: {len(data_without_subject)} bytes") | |
| print(f"Difference: {len(data_with_subject) - len(data_without_subject)} bytes") | |
| # Verify by reading back | |
| print("\n" + "="*70) | |
| print("VERIFICATION: Reading back the data") | |
| print("="*70) | |
| event_with = CloudEvent.GetRootAs(data_with_subject, 0) | |
| print(f"\nEvent WITH subject:") | |
| print(f" ID: {event_with.Id().decode('utf-8')}") | |
| print(f" Subject: {event_with.Subject().decode('utf-8') if event_with.Subject() else 'None'}") | |
| event_without = CloudEvent.GetRootAs(data_without_subject, 0) | |
| print(f"\nEvent WITHOUT subject:") | |
| print(f" ID: {event_without.Id().decode('utf-8')}") | |
| print(f" Subject: {event_without.Subject().decode('utf-8') if event_without.Subject() else 'None'}") | |
| print("\n" + "="*70) | |
| print("TEST COMPLETE") | |
| print("="*70) | |
| if __name__ == "__main__": | |
| test_optional_field_serialization() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment