Skip to content

Instantly share code, notes, and snippets.

@ikonst
Last active September 16, 2024 21:39
Show Gist options
  • Save ikonst/9f3070dbd20440646c9d8dca7c184656 to your computer and use it in GitHub Desktop.
Save ikonst/9f3070dbd20440646c9d8dca7c184656 to your computer and use it in GitHub Desktop.
Bunyan log size limiting stream
import os from "os";
export const bunyanCoreFields = new Set([
"name",
"level",
"time",
"pid",
"hostname",
"v",
"src",
]);
const truncatedStringSuffix = " <truncated>";
/** Maximum depth of JSON objects to traverse when compacting.
* Traversing too deep can cause CPU load and worse outcomes, as we try to cull properties of large objects
* instead of culling the objects themselves. */
const MAX_DEPTH = 1;
export function compactJsonLogString(line: string, maxLength: number): string {
if (line.length < maxLength) return line;
const data: { msg?: string; [k: string]: any } = JSON.parse(line);
truncateData(data, line.length, maxLength, 0);
return JSON.stringify(data) + os.EOL;
}
function truncateData(
data: { [k: string]: any },
dataLength: number,
maxLength: number,
depth: number,
): boolean {
// Try to truncate the largest fields first.
const fieldsLargestToSmallest = Object.entries(data)
.filter(([key]) => !(depth == 0 && bunyanCoreFields.has(key)))
.map(([key, value]) => [JSON.stringify(value).length, key, value]);
fieldsLargestToSmallest.sort(([l1], [l2]) => l2 - l1); // descending
for (const [flen, fkey, fval] of fieldsLargestToSmallest) {
const otherFieldsLength = dataLength - flen;
let available = Math.max(maxLength - otherFieldsLength, 0);
switch (typeof fval) {
case "string":
available -= truncatedStringSuffix.length;
if (available > truncatedStringSuffix.length) {
data[fkey] =
trimStringToJsonLength(fval, available) + truncatedStringSuffix;
} else {
delete data[fkey];
}
break;
case "object":
if (Array.isArray(fval)) {
fval.splice(100); // truncate to 100 elements, otherwise gets computationally expensive
while (fval.length && JSON.stringify(fval).length > maxLength) {
fval.pop();
}
} else if (fval === null) {
delete data[fkey];
} else {
// actual JSON object
if (depth <= MAX_DEPTH && available > 1) {
if (!truncateData(fval, flen, available, depth + 1)) {
delete fval[fkey]; // truncating didn't help
}
} else delete data[fkey];
}
break;
default:
delete data[fkey]; // no way to further trim numbers, booleans
break;
}
dataLength = JSON.stringify(data).length;
if (dataLength <= maxLength) break; // if it helped, stop.
}
return dataLength <= maxLength;
}
/**
* Trims a string so that its JSON representation is at most 'l' characters long.
*/
export function trimStringToJsonLength(s: string, l: number): string {
s = s.slice(0, Math.max(l - 2, 0));
while (2 < l && l < JSON.stringify(s).length) {
s = s.slice(0, -1);
}
return s;
}
import _, { range } from "lodash";
import os from "os";
import { TestedLogger } from "./TestedLogger";
describe("LogStream", () => {
it("should write synchronously", () => {
// Arrange
const testedLogger = new TestedLogger();
// Act
testedLogger.logger.info("hello");
testedLogger.logger.info("world");
// Assert
expect(testedLogger.buffer).toContain("hello");
expect(testedLogger.buffer).toContain("world");
});
it("should truncate very long messages", () => {
// Arrange
const testedLogger = new TestedLogger();
// Act
testedLogger.logger.info("a".repeat(1_000));
// Assert
const lines = testedLogger.buffer.split(os.EOL);
expect(lines[0].length).toBeLessThanOrEqual(1000); // varies based on date, PID etc.
const json: { msg: string } = JSON.parse(testedLogger.buffer);
expect(json.msg.length).toBeLessThan(1_000);
expect(json.msg.endsWith("<truncated>")).toBe(true);
});
it("should truncate very long field with newlines", () => {
// Arrange
const testedLogger = new TestedLogger();
// Act
testedLogger.logger.info(
{ reasonableField: "hello", largeField: "hello\n".repeat(1_000) },
"reasonable message",
);
// Assert
const lines = testedLogger.buffer.split(os.EOL);
expect(lines[0].length).toBeLessThanOrEqual(1000); // varies based on date, PID etc.
const json: { msg: string; reasonableField: string; largeField: string } =
JSON.parse(lines[0]);
expect(json.msg).toEqual("reasonable message");
expect(json.reasonableField).toEqual("hello");
expect(json.largeField).toMatch(/^hello\nhello\n.+ <truncated>$/m);
});
it("should handle nulls", () => {
// Arrange
const testedLogger = new TestedLogger();
// Act
testedLogger.logger.info(
Object.fromEntries(range(100).map((num) => [`key${num}`, null])),
"test",
);
// Assert
const lines = testedLogger.buffer.split(os.EOL);
expect(lines[0].length).toBeLessThanOrEqual(1000); // varies based on date, PID etc.
const parsed = JSON.parse(lines[0]);
expect(parsed["key40"]).toBeNull();
});
it("should truncate very large structured log with an array", () => {
// Arrange
const testedLogger = new TestedLogger();
// Act
const largeArray = new Array(1024 * 1024).fill("a");
testedLogger.logger.info({ a: "hello", b: largeArray, c: "world" }, "test");
// Assert
const lines = testedLogger.buffer.split(os.EOL);
expect(lines[0].length).toBeLessThanOrEqual(1000); // varies based on date, PID etc.
expect(JSON.parse(lines[0])).toEqual({
name: "test",
hostname: expect.any(String),
pid: expect.any(Number),
level: 30,
a: "hello",
b: largeArray.slice(0, 100),
c: "world",
msg: "test",
time: expect.any(String),
v: 0,
});
});
it("should truncate very large structured log with nested objects", () => {
// Arrange
const testedLogger = new TestedLogger();
// Act
testedLogger.logger.info(
{
a: "hello",
b: {
smol: "smol",
tol: "a".repeat(1024 * 1024),
},
c: "world",
},
"test",
);
// Assert
const lines = testedLogger.buffer.split(os.EOL);
expect(lines[0].length).toBeLessThanOrEqual(1000); // varies based on date, PID etc.
expect(JSON.parse(lines[0])).toEqual({
name: "test",
hostname: expect.any(String),
pid: expect.any(Number),
level: 30,
a: "hello",
b: { smol: "smol", tol: expect.stringMatching(/^a{100,} <truncated>$/) },
c: "world",
msg: "test",
time: expect.any(String),
v: 0,
});
});
it("should truncate very very large structured log with nested objects", () => {
// Arrange
const testedLogger = new TestedLogger();
// Act
testedLogger.logger.info(
{
largeNested: Object.fromEntries(
_.range(100).map((i) => [
`key${i}`,
Object.fromEntries(_.range(100).map((j) => [`key${j}`, 42])),
]),
),
},
"test",
);
// Assert
const lines = testedLogger.buffer.split(os.EOL);
expect(lines[0].length).toBeLessThanOrEqual(1000); // varies based on date, PID etc.
expect(JSON.parse(lines[0])).toEqual({
name: "test",
hostname: expect.any(String),
pid: expect.any(Number),
level: 30,
msg: "test",
largeNested: {
key99: {
key24: 42,
key25: 42,
key26: 42,
key27: 42,
key28: 42,
key29: 42,
key30: 42,
key31: 42,
key32: 42,
key33: 42,
key34: 42,
key35: 42,
key36: 42,
key37: 42,
key38: 42,
key39: 42,
key40: 42,
key41: 42,
key42: 42,
key43: 42,
key44: 42,
key45: 42,
key46: 42,
key47: 42,
key48: 42,
key49: 42,
key50: 42,
key51: 42,
key52: 42,
key53: 42,
key54: 42,
key55: 42,
key56: 42,
key57: 42,
key58: 42,
key59: 42,
key60: 42,
key61: 42,
key62: 42,
key63: 42,
key64: 42,
key65: 42,
key66: 42,
key67: 42,
key68: 42,
key69: 42,
key70: 42,
key71: 42,
key72: 42,
key73: 42,
key74: 42,
key75: 42,
key76: 42,
key77: 42,
key78: 42,
key79: 42,
key80: 42,
key81: 42,
key82: 42,
key83: 42,
key84: 42,
key85: 42,
key86: 42,
key87: 42,
key88: 42,
key89: 42,
key90: 42,
key91: 42,
key92: 42,
key93: 42,
key94: 42,
key95: 42,
key96: 42,
key97: 42,
key98: 42,
key99: 42,
},
},
time: expect.any(String),
v: 0,
});
});
});
import stream from "stream";
import { compactJsonLogString } from "./jsonCompactor";
/**
* A stream that avoids emitting log records that are too large to be practical,
* and may cause syslog to split the message into multiple records.
* The solution is to redact large fields until the message fits.
*
* @example
*
* const logger = bunyan.createLogger({
* name: "test",
* stream: new LogStream({ stream: process.stdout, maxLength: 5000 }),
* });
*
*/
export class LogStream extends stream.Writable {
readonly stream: stream.Writable;
readonly maxLength: number;
constructor(args: { stream: stream.Writable; maxLength: number }) {
super({
decodeStrings: false,
});
this.stream = args.stream;
this.maxLength = args.maxLength;
}
_write(
chunk: string,
_encoding: BufferEncoding,
cb: (error?: Error | null) => void,
): boolean {
this.stream.write(compactJsonLogString(chunk, this.maxLength));
cb(); // callback immediately so that we don't depend on event loop
return true;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment