Last active
January 16, 2016 05:43
-
-
Save cbsmith/421ee7f171b299a4eadf to your computer and use it in GitHub Desktop.
Proposed change to Heka message format.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package message; | |
import "gogo.proto"; | |
option (gogoproto.sizer_all) = true; | |
option (gogoproto.marshaler_all) = true; | |
option (gogoproto.unmarshaler_all) = true; | |
option java_package = "org.mozilla.heka"; | |
option java_outer_classname = "HekaMessage"; | |
//have this in a separate file for sure | |
message Header { | |
enum MACFunction { | |
MD5 = 0; //don't use this ever | |
SHA1 = 1; //still don't use this ever | |
POLY1305_AES = 2; | |
} | |
required uint32 message_length = 1; // length in bytes | |
optional MACFunction mac_function = 3 [default = POLY1305_AES]; | |
optional string mac_source = 4; //who is claiming to be the source of the data | |
optional uint32 mac_key_version = 5; //the version of the key for that source | |
optional bytes mac = 6; //the actual authentication code | |
} | |
//probably have this in a separate file for easy extension | |
enum RepresentationType { | |
COUNT = 0; | |
KB = 1; | |
MB = 2; | |
GB = 3; | |
MM = 4; | |
//So many different ways to represent time... https://xkcd.com/927/ | |
EPOCH_MILLI = 5; //can we use this as the default way to represent time | |
EPOCH_MICRO = 6; | |
EPOCH_NANO = 7; | |
DATETIME = 8; // RFC 3339, sec 5.6 | |
DATETIME_NANO = 9; // RFC 3339, sec 5.6 | |
RFC822_DATETIME = 10; // RFC 822 | |
RFC822_DT_WITH_TZ = 11; // RFC 822 | |
RFC850_DATETIME = 12; // RFC 850 | |
ANSI_C_DATETIME = 13; // ANSI C | |
UNIX_DATE = 14; // IEEE Std 1003.1-2001, Section 7.3.5 | |
RUBY_DATETIME = 15; // Because Ruby just had to be different | |
TIME_KITCHEN = 16; // ditto Chef | |
TIMESTAMP = 17; | |
TIMESTAMP_MILLIS = 18; | |
TIMESTAMP_MICRO = 19; | |
TIMESTAMP_NANO = 20; | |
EMAIL_ADDRESS = 21; // RFC 5322, sec 3.4.1 | |
HOSTNAME = 22; // RFC 1034, sec 3.1 | |
IPV4 = 22; // RFC 2673, sec 3.2 | |
IPV6 = 23; // RFC 2373, sec 2.2 | |
URI = 24; // RFC 3986 | |
} | |
message Metric { | |
message FieldMetaData { | |
enum ValueType { | |
STRING = 0; | |
BYTES = 1; | |
INTEGER = 2; //use when negative value are an option, but unlikely | |
DOUBLE = 3; | |
BOOL = 4; | |
SIGNED_INTEGER = 5; //use when negative values are a likely | |
UNSIGNED_INTEGER = 6; //use when negative values are impossible | |
} | |
optional string name = 1; | |
optional ValueType value_type = 2 [default = INTEGER]; | |
optional RepresentationType representation = 3; | |
required uint32 value_offset = 4; //offset in to the field type's value array | |
} | |
message FieldData { | |
repeated string value_string = 4; | |
repeated bytes value_bytes = 5; | |
repeated int64 value_integer = 6 [packed=true]; | |
repeated double value_double = 7 [packed=true]; | |
repeated bool value_bool = 8 [packed=true]; | |
repeated sint64 value_signed_integer = 9 [packed=true]; | |
repeated uint64 value_unsigned_integer = 10 [packed=true]; | |
} | |
repeated FieldMetaData metadata = 1; //only gets sent periodically | |
optional FieldData fields = 2; | |
} | |
message LogMessage { | |
message StructuredLog { | |
enum Severity { | |
option allow_alias = true; | |
EMERGENCY = 0; | |
ALERT = 1; | |
CRITICAL = 2; | |
ERROR = 3; | |
WARNING = 4; | |
NOTICE = 5; | |
INFORMATIONAL = 6; | |
INFO = 6; | |
DEBUG = 7; | |
TRACE = 8; | |
} | |
optional string logger = 1; | |
optional Severity severity = 2 [default = DEBUG]; | |
optional string event_message = 3; | |
message DataField { | |
required string name = 1; | |
optional bytes object = 2; | |
} | |
repeated DataField structured_data = 4; //event map in slf4j | |
optional string marker = 5; //for slf4j | |
enum SyslogFacility { | |
option allow_alias = true; | |
LOG_KERN = 0; | |
LOG_USER = 1; | |
LOG_MAIL = 2; | |
LOG_DAEMON = 3; | |
LOG_AUTH = 4; | |
LOG_SYSLOG = 5; | |
LOG_LPR = 6; | |
LOG_NEWS = 7; | |
LOG_UUCP = 8; | |
LOG_CRON = 9; | |
LOG_AUTHPRIV = 10; | |
LOG_FTP = 11; | |
LOG_NTP = 12; | |
LOG_AUDIT = 13; | |
LOG_SECURITY = 13; | |
LOG_ALERT = 14; | |
LOG_CONSOLE = 14; | |
LOG_CRONPRIV = 15; | |
LOG_LOCAL0 = 16; | |
LOG_LOCAL1 = 17; | |
LOG_LOCAL2 = 18; | |
LOG_LOCAL3 = 19; | |
LOG_LOCAL4 = 20; | |
LOG_LOCAL5 = 21; | |
LOG_LOCAL6 = 22; | |
LOG_LOCAL7 = 23; | |
} | |
optional SyslogFacility facility = 6; //for syslog style messages | |
} | |
message Other { | |
optional string type = 1; | |
oneof payload { | |
string text = 2; | |
bytes raw = 3; //in case the payload can't be represented as a string | |
} | |
} | |
oneof log_type { | |
StructuredLog structured = 2; | |
Other other = 3; | |
} | |
} | |
message Message { | |
//Identifier should uniquely identify each message | |
message Identifier { | |
//These fields left option for maximum flexiblity, but aside from tid they should | |
//be populated in almost every case | |
optional string hostname = 1; // an identifier for the host system | |
optional string source = 2; // name of the source program | |
optional int32 pid = 3; // possible go with uint64? | |
optional uint64 tid = 4 [default = 0]; //thread_id | |
required uint64 sequence_number = 5; | |
} | |
required Identifier identifier = 1; | |
optional bytes uuid = 2; // for compatibility more than anything else | |
required int64 timestamp = 3; // nanoseconds since UNIX epoch | |
optional string env_version = 4; | |
oneof message_type { | |
LogMessage log = 5; | |
Metric metric = 6; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment