Skip to content

Instantly share code, notes, and snippets.

@cbsmith
Last active January 16, 2016 05:43
Show Gist options
  • Save cbsmith/421ee7f171b299a4eadf to your computer and use it in GitHub Desktop.
Save cbsmith/421ee7f171b299a4eadf to your computer and use it in GitHub Desktop.
Proposed change to Heka message format.
package message;
import "gogo.proto";
option (gogoproto.sizer_all) = true;
option (gogoproto.marshaler_all) = true;
option (gogoproto.unmarshaler_all) = true;
option java_package = "org.mozilla.heka";
option java_outer_classname = "HekaMessage";
//have this in a separate file for sure
message Header {
enum MACFunction {
MD5 = 0; //don't use this ever
SHA1 = 1; //still don't use this ever
POLY1305_AES = 2;
}
required uint32 message_length = 1; // length in bytes
optional MACFunction mac_function = 3 [default = POLY1305_AES];
optional string mac_source = 4; //who is claiming to be the source of the data
optional uint32 mac_key_version = 5; //the version of the key for that source
optional bytes mac = 6; //the actual authentication code
}
//probably have this in a separate file for easy extension
enum RepresentationType {
COUNT = 0;
KB = 1;
MB = 2;
GB = 3;
MM = 4;
//So many different ways to represent time... https://xkcd.com/927/
EPOCH_MILLI = 5; //can we use this as the default way to represent time
EPOCH_MICRO = 6;
EPOCH_NANO = 7;
DATETIME = 8; // RFC 3339, sec 5.6
DATETIME_NANO = 9; // RFC 3339, sec 5.6
RFC822_DATETIME = 10; // RFC 822
RFC822_DT_WITH_TZ = 11; // RFC 822
RFC850_DATETIME = 12; // RFC 850
ANSI_C_DATETIME = 13; // ANSI C
UNIX_DATE = 14; // IEEE Std 1003.1-2001, Section 7.3.5
RUBY_DATETIME = 15; // Because Ruby just had to be different
TIME_KITCHEN = 16; // ditto Chef
TIMESTAMP = 17;
TIMESTAMP_MILLIS = 18;
TIMESTAMP_MICRO = 19;
TIMESTAMP_NANO = 20;
EMAIL_ADDRESS = 21; // RFC 5322, sec 3.4.1
HOSTNAME = 22; // RFC 1034, sec 3.1
IPV4 = 22; // RFC 2673, sec 3.2
IPV6 = 23; // RFC 2373, sec 2.2
URI = 24; // RFC 3986
}
message Metric {
message FieldMetaData {
enum ValueType {
STRING = 0;
BYTES = 1;
INTEGER = 2; //use when negative value are an option, but unlikely
DOUBLE = 3;
BOOL = 4;
SIGNED_INTEGER = 5; //use when negative values are a likely
UNSIGNED_INTEGER = 6; //use when negative values are impossible
}
optional string name = 1;
optional ValueType value_type = 2 [default = INTEGER];
optional RepresentationType representation = 3;
required uint32 value_offset = 4; //offset in to the field type's value array
}
message FieldData {
repeated string value_string = 4;
repeated bytes value_bytes = 5;
repeated int64 value_integer = 6 [packed=true];
repeated double value_double = 7 [packed=true];
repeated bool value_bool = 8 [packed=true];
repeated sint64 value_signed_integer = 9 [packed=true];
repeated uint64 value_unsigned_integer = 10 [packed=true];
}
repeated FieldMetaData metadata = 1; //only gets sent periodically
optional FieldData fields = 2;
}
message LogMessage {
message StructuredLog {
enum Severity {
option allow_alias = true;
EMERGENCY = 0;
ALERT = 1;
CRITICAL = 2;
ERROR = 3;
WARNING = 4;
NOTICE = 5;
INFORMATIONAL = 6;
INFO = 6;
DEBUG = 7;
TRACE = 8;
}
optional string logger = 1;
optional Severity severity = 2 [default = DEBUG];
optional string event_message = 3;
message DataField {
required string name = 1;
optional bytes object = 2;
}
repeated DataField structured_data = 4; //event map in slf4j
optional string marker = 5; //for slf4j
enum SyslogFacility {
option allow_alias = true;
LOG_KERN = 0;
LOG_USER = 1;
LOG_MAIL = 2;
LOG_DAEMON = 3;
LOG_AUTH = 4;
LOG_SYSLOG = 5;
LOG_LPR = 6;
LOG_NEWS = 7;
LOG_UUCP = 8;
LOG_CRON = 9;
LOG_AUTHPRIV = 10;
LOG_FTP = 11;
LOG_NTP = 12;
LOG_AUDIT = 13;
LOG_SECURITY = 13;
LOG_ALERT = 14;
LOG_CONSOLE = 14;
LOG_CRONPRIV = 15;
LOG_LOCAL0 = 16;
LOG_LOCAL1 = 17;
LOG_LOCAL2 = 18;
LOG_LOCAL3 = 19;
LOG_LOCAL4 = 20;
LOG_LOCAL5 = 21;
LOG_LOCAL6 = 22;
LOG_LOCAL7 = 23;
}
optional SyslogFacility facility = 6; //for syslog style messages
}
message Other {
optional string type = 1;
oneof payload {
string text = 2;
bytes raw = 3; //in case the payload can't be represented as a string
}
}
oneof log_type {
StructuredLog structured = 2;
Other other = 3;
}
}
message Message {
//Identifier should uniquely identify each message
message Identifier {
//These fields left option for maximum flexiblity, but aside from tid they should
//be populated in almost every case
optional string hostname = 1; // an identifier for the host system
optional string source = 2; // name of the source program
optional int32 pid = 3; // possible go with uint64?
optional uint64 tid = 4 [default = 0]; //thread_id
required uint64 sequence_number = 5;
}
required Identifier identifier = 1;
optional bytes uuid = 2; // for compatibility more than anything else
required int64 timestamp = 3; // nanoseconds since UNIX epoch
optional string env_version = 4;
oneof message_type {
LogMessage log = 5;
Metric metric = 6;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment