Last active
August 11, 2023 14:48
-
-
Save pluma/9007062 to your computer and use it in GitHub Desktop.
In response to http://www.reddit.com/r/node/comments/1xusho/examples_for_well_documentedcommented_node_code/cff58x1
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var DuplexStream = require('readable-stream').Duplex, | |
// A duplex stream is a combination of a read stream and a write stream | |
// For more on streams see http://nodeschool.io/#stream-adventure | |
util = require('util') | |
// The "util" module contains several node core utilities. | |
// In this case we're just using its inheritance helpers. | |
// See http://nodejs.org/api/util.html for API docs. | |
function BufferList(callback) { | |
// In JavaScript it is common to indicate that a function is a constructor by | |
// starting its name with a capital letter. Constructors are functions that are | |
// meant to be invoked with the "new" keyword. | |
// As you can tell, it also takes an argument called "callback". We'll shortly | |
// find out that this may actually be other things than a callback function. | |
if (!(this instanceof BufferList)) return new BufferList(callback) | |
// Calling constructors without "new" will result "this" not being a new object. | |
// If the constructor was called with "new" the instanceof-check will pass. | |
// Otherwise the constructor is invoked again with the passed argument and "new". | |
this._bufs = [] | |
this.length = 0 | |
// BufferList can be invoked with one of three possible arguments: | |
// - a callback function | |
// - a Buffer object | |
// - an array of Buffer objects | |
// The argument is simply named "callback" in either case. | |
if (typeof callback == 'function') { | |
this._callback = callback | |
// The function is stored as the "private" callback property | |
// In JS it is customary to denote properties that should not be used directly | |
// with an underscore prefix. This basically means "use at your own risk" and | |
// usually marks properties that are implementation details. | |
var piper = function(err) { | |
// This is just a function that makes sure the callback is invoked if an error is | |
// passed to it and that the callback is only invoked once (by setting it to "null"). | |
if (this._callback) { | |
// The expression "this._callback" evaluates to either a function or "null" (see below). | |
// As we're looking for a boolean in if expressions, the type of the value will be coerced | |
// to boolean. In practice this means the following values will be "false" (called falsey): | |
// The number 0, the empty string "" (or '') and the values false, null and undefined. | |
// Everything else will be "true" (called truthy). | |
this._callback(err) | |
this._callback = null | |
} | |
}.bind(this) | |
// "bind" guarantees a function will be invoked with its | |
// "this" variable set to the given argument. | |
this.on('pipe', function(src) { | |
src.on('error', piper) | |
}) | |
// BufferList has an "on" method, which suggests it is an event emitter. | |
// The "on" method takes an event name and a callback which will be called | |
// whenever the named event is emitted. | |
// In this case we're passing a callback for the "pipe" event which makes sure | |
// the piped stream will have a listener for its "error" event (set to the function | |
// we defined above). | |
// Streams expect to have a listener for their "error" events. If they don't, | |
// bad things will happen: an unhandled error will be thrown, potentially killing | |
// the entire process. Because streams are generally asynchronous, listening for | |
// error events is the only way to catch these errors. | |
this.on('unpipe', function(src) { | |
src.removeListener('error', piper) | |
}) | |
// This just makes sure the error listener is removed when the "unpipe" event | |
// is emitted. This is the inverse of the previous event handler. | |
} else if (Buffer.isBuffer(callback)) this.append(callback) | |
// If the argument is not a function but a Buffer object, it is added to the BufferList. | |
// This allows creating a BufferList from an existing Buffer in one line of code. | |
else if (Array.isArray(callback)) { | |
// This allows creating a BufferList from an array containing multiple Buffer objects. | |
callback.forEach(function(b) { | |
// The "forEach" method is the functional equivalent of a "for" loop. | |
// The passed function will be called for every item in the array and | |
// passed several arguments (an item, the item's index and the array itself). | |
// In this case we're only using the item itself, here called "b". | |
Buffer.isBuffer(b) && this.append(b) | |
// This is the same check we saw earlier to see whether "b" is a Buffer object. | |
// The in-line && is a shorthand for an if statement and can be read as: | |
// if (Buffer.isBuffer(b)) {this.append(b);} | |
// This is a matter of taste, but you'll often find it in situations where | |
// it can simplify a longer block or statement into a single expression, | |
// such as in a return statement. It's not strictly useful here, but more consise. | |
}.bind(this)) | |
// Again, the function passed to "forEach" is bound to the current "this" variable, | |
// guaranteeing the "this" inside the function refers to the same thing. | |
} | |
DuplexStream.call(this) | |
// DuplexStream is a constructor. As we saw earlier, the "this" variable in a constructor | |
// is set to a new object of the constructor's prototype if it is called with "new". | |
// In this case, we're calling DuplexStream with the current "this" to let perform whatever | |
// initializations it normally performs for new DuplexStream objects. | |
// This is something you will frequently find in code that uses inheritance. | |
} | |
util.inherits(BufferList, DuplexStream) | |
// The util module provides an "inherits" function that makes it easier to write | |
// constructors with prototypes that have a real inheritance chain as you would find in e.g. Java. | |
// All you need to understand for now is that BufferList.prototype is now related to | |
// DuplexStream.prototype, which also means "instanceof" checks between BufferList instances | |
// and the DuplexStream constructor will return "true". | |
BufferList.prototype._offset = function(offset) { | |
// Again the underscore denotes this is an internal method. | |
// The idea behind this function seems to be to find out, given an offset | |
// in the entirety of the BufferList (i.e. pretending the BufferList were one big Buffer) | |
// the index of the Buffer object containing that offset in the BufferList | |
// and the correct offset within that Buffer object. | |
// E.g. if you have a BufferList consisting of two Buffer objects, each of length 5, | |
// and you are asked for the offset 7 in that data, this function lets you find out | |
// that you need to look at offset 2 (7 minus the length of the first Buffer) | |
// in the second Buffer. | |
var tot = 0, // "current total" | |
i = 0, // "index" | |
_t // "new total" | |
for (; i < this._bufs.length; i++) { | |
// The first part of the for-loop is just for assigning initial values. | |
// As the values have already been assigned, it is left empty. | |
// The loop then iterates over all values from 0 to (not including) | |
// the length of the internal buffer. | |
_t = tot + this._bufs[i].length | |
// "new total is the current total plus the length of the buffer at the index" | |
// In other words, we're adding up the length of the buffers in this buffer list. | |
if (offset < _t) return [i, offset - tot] | |
// If "offset" is less than the new total, we return an array containing two values: | |
// - the index of the matching buffer | |
// - the value of offset minus the current total | |
tot = _t | |
// Otherwise the current total is set to the new total and we continue the loop. | |
} | |
// This function assumes the offset is eventually matched and the list is not empty. | |
// Otherwise it would return "undefined". | |
// Note: another way to implement this logic would have been the following: | |
// var i = 0, buffer | |
// for (; i < this._bufs.length; i++) { | |
// buffer = this._bufs[i] | |
// if (offset < buffer.length) return [i, offset] | |
// offset -= buffer.length | |
// } | |
} | |
BufferList.prototype.append = function(buf) { | |
// This method adds a given value to the end of the BufferList. | |
this._bufs.push(Buffer.isBuffer(buf) ? buf : new Buffer(buf)) | |
// Here you see a ternary expression used as an in-line version of an if/else statement. | |
// This basically says "if buf is a Buffer object, use buf, otherwise create a new Buffer object from buf". | |
// It simply makes sure the value is wrapped in a new Buffer object if it is not already one | |
// before being added to the internal array holding this BufferList object's Buffer objects. | |
this.length += buf.length | |
// The BufferList object's length property contains the total length of all Buffer objects | |
// contained therein, so it is updated accordingly. | |
// Here we see another assumption: even if "buf" is not a Buffer object, it must have a length. | |
// This will actually result in an inconsistent state if we pass an argument accepted by | |
// the Buffer constructor that doesn't have a length property. For example, we can create | |
// a new empty Buffer of length 10 by calling new Buffer(10). But the number "10" doesn't have | |
// a length property, which means this code will throw an exception after creating and appending | |
// the empty buffer it creates but before updating the length property of the BufferList. | |
// From the API documentation we can infer this method is not meant to be called with a number. | |
// You could argue that this is either intentionally undefined behaviour or a bug. | |
return this | |
// Returning "this" allows chaining multiple method calls like so: | |
// myBufferList.append('hello').append(' ').append('world') | |
} | |
BufferList.prototype._write = function(buf, encoding, callback) { | |
// This is an internal method for appending a value to the buffer and then calling a function. | |
// As "encoding" is not actually used, we can assume this is only implemented to provide | |
// compatibility with a similar method that requires an "encoding" argument (e.g. something | |
// used by DuplexStream). | |
this.append(buf) | |
if (callback) callback() | |
// Another "truthiness" check: if callback is truthy (e.g. a function), call it. | |
// Of course this breaks if callback is something silly that just happens to be truthy, | |
// but that would probably indicate this method is not invoked correctly. | |
} | |
BufferList.prototype._read = function(size) { | |
// Another internal function, this time something more complicated. | |
if (!this.length) return this.push(null) | |
// If this.length is falsey, it is probably 0. | |
// We won't find a definition of the "push" method in this module | |
// because it comes from ReadableStream (via DuplexStream). | |
// It has something to do with stream buffering. | |
size = Math.min(size, this.length) | |
// This just makes sure size is never larger than the actual | |
// length of this BufferList (i.e. our total number of bytes). | |
this.push(this.slice(0, size)) | |
// This again invokes the ominous "push" method. | |
// We'll see what "slice" does further down below. | |
this.consume(size) | |
// This method is also explained further down below. | |
} | |
BufferList.prototype.end = function(chunk) { | |
DuplexStream.prototype.end.call(this, chunk) | |
// This method is already defined by DuplexStream. | |
// Because we're overriding it here, we need to call | |
// the original method manually, again making sure it is | |
// called as a method of "this" and passing the argument. | |
if (this._callback) { | |
this._callback(null, this.slice()) | |
this._callback = null | |
} | |
// Here we see why we're overriding it. | |
// In addition to DuplexStream's behaviour we want to call | |
// the callback if it was defined. | |
// Passing two arguments is a common idiom for node callbacks. | |
// In this case "null" as first argument indicates no error occurred. | |
// The second argument is the actual result, in this case, the result | |
// of calling the slice method with no argument (see below). | |
// Again we make sure the callback is only ever called once. | |
} | |
BufferList.prototype.get = function(index) { | |
return this.slice(index, index + 1)[0] | |
// Not very interesting, this method simply returns a slice | |
// starting at the given index and exactly one byte long, | |
// but unlike slice it gets the underlying value (i.e. a string) | |
// rather than the buffer returned by slice (see below). | |
} | |
BufferList.prototype.slice = function(start, end) { | |
return this.copy(null, 0, start, end) | |
// Another surprise: slice is just a wrapper around "copy". | |
// In Java you'd probably call this the "facade pattern", | |
// but it's really just a shorthand. | |
} | |
BufferList.prototype.copy = function(dst, dstStart, srcStart, srcEnd) { | |
// Couple of arguments expected her. "dst" means "destination", "src" means "source". | |
// The "start" and "end" arguments are byte offsets, "dst" is a Buffer to copy bytes to. | |
// In other words: this function takes a Buffer "dst" and copies the bytes between | |
// the given "srcStart" and "srcEnd" into it, starting at "dstStart". | |
if (typeof srcStart != 'number' || srcStart < 0) srcStart = 0 | |
if (typeof srcEnd != 'number' || srcEnd > this.length) srcEnd = this.length | |
if (srcStart >= this.length) return dst || new Buffer(0) | |
if (srcEnd <= 0) return dst || new Buffer(0) | |
// This code mostly makes sure the values are somewhat sane. | |
// Source start and end should be between zero and the number of bytes in this buffer. | |
// If source start is an offset that is too big or if source end is an offset that is too small, | |
// no copying happens and either the destination buffer or a new empty buffer of length zero is returned. | |
// We can see that in addition to copying bytes to a destination buffer, this function also always | |
// returns a buffer and that the destination buffer is an optional argument. | |
var copy = !! dst, | |
// The double negation is a common trick to copy the "truthiness" value of a variable. | |
// This reads as "copy = there is a destination buffer", in other words: | |
// if a destination buffer has been passed, we want to copy to that buffer and are in "copy" mode. | |
off = this._offset(srcStart), | |
// This uses the internal method we saw earlier. "off" is an array consisting of a buffer and | |
// the relative offset within that buffer matching "source start". | |
len = srcEnd - srcStart, | |
// Simple arithmetic. We're determining the total number of bytes to copy. | |
bytes = len, | |
// And we're copying the same value to another variable, which probably means | |
// one of the two variables will be modified at some point. | |
bufoff = (copy && dstStart) || 0, | |
// This is a combination of two idioms we saw before and an oddity of how && works. | |
// As we can tell by the parentheses, first we execute "copy && dstStart", in other words, | |
// "if copy then destination start" -- if we saved the result to a variable at this point, | |
// it would equal the value of "copy" if "copy" is falsey or the value of "dstStart" if | |
// "copy" is truthy; this may take beginners by surprise: | |
// | |
// Boolean expressions *don't* return a boolean value. To illustrate: | |
// - "foo && bar" returns "bar" if "foo" is truthy but "foo" if "foo" is falsey | |
// - "foo || bar" returns "foo" if "foo" is truthy but "bar" if "foo" is falsey | |
// This is important to understand. | |
// | |
// The line above simply combines these two facts: | |
// If "copy" is falsey, "copy && dstStart" results in "copy", which means "|| 0" will result in 0. | |
// If "copy" is truthy, "copy && dstStart" results in "dstStart", which means: | |
// - if "dstStart" is also truthy, the entire expression will result in the value of dstStart | |
// - if "dstStart" is falsey, the entire expression will result in 0. | |
// Also note that this assumes that if "copy" is truthy, "dstStart" is a number (not just any truthy value). | |
// | |
// This could code also have been written like this: | |
// var bufoff = 0 | |
// if (copy) bufoff = dstStart | |
// This would make the same assumption. | |
start = off[1], | |
// The second value in "off" is the relative offset, so we're copying that. | |
l, i | |
// Protip: avoid "l" as a variable name because it can be hard to distinguish. | |
if (srcStart === 0 && srcEnd == this.length) { | |
// If we're supposed to copy everything from offset zero to the total number | |
// of bytes in this BufferList, we can simply copy everything. | |
if (!copy) | |
return Buffer.concat(this._bufs) | |
// If we're not in copy-mode, simply return the result of having Buffer | |
// concatenate all the buffers in our internal array (creating a new Buffer object | |
// containing all the bytes in those buffers). | |
for (i = 0; i < this._bufs.length; i++) { | |
this._bufs[i].copy(dst, bufoff) | |
bufoff += this._bufs[i].length | |
} | |
// This part could be rewritten to use forEach for consistency, e.g.: | |
// this._bufs.forEach(function(buf) { | |
// buf.copy(dst, bufoff) | |
// bufoff += buf.length | |
// }) | |
// It takes each buffer in this BufferList and tells it to copy its bytes to the | |
// destination buffer, starting at the destination's offset "bufoff", which is increased | |
// by the number of bytes just written each time. | |
// We can see that the Buffer object has a similar "copy" method to our BufferList, | |
// which means our BufferList could in fact also contain other BufferList objects. | |
return dst | |
// Once we're done, the destination buffer is returned, so either way, if we're just copying | |
// everything, the copy function ends here. | |
} | |
// The following code will only be run if we haven't already returned. | |
if (bytes <= this._bufs[off[0]].length - start) { | |
// This checks whether the entire range of bytes we want to copy is contained in the same buffer. | |
return copy ? this._bufs[off[0]].copy(dst, dstStart, start, start + bytes) : this._bufs[off[0]].slice(start, start + bytes) | |
// Another ternary: | |
// If we're in copy-mode, tell the matching buffer to copy the bytes in the given range to the destination buffer. | |
// Otherwise tell it to return a new buffer containing the bytes in the given range. | |
// This distinction is probably not strictly necessary if the copy method always returns a new buffer if passed a falsey | |
// value as the destination buffer. | |
} | |
if (!copy) | |
dst = new Buffer(len) | |
// If we're not in copy mode, we need a new buffer that is big enough to contain all the bytes. | |
// From this point on we can pretend we're in copy-mode because the logic will be the same. | |
for (i = off[0]; i < this._bufs.length; i++) { | |
// "off[0]" is the index of the buffer containing the start offset | |
// We're iterating over the internal array of buffers. | |
l = this._bufs[i].length - start | |
// This will only be relevant on the first iteration. | |
// Basically we're copying the buffer's length to the variable "l" (for "length") | |
// but we're substracting "start", which, if you remember is simply "off[1]", | |
// i.e. the relative offset within the buffer matching the absolute offset in | |
// the range of bytes contained in the entire BufferList. | |
if (bytes > l) { | |
// "bytes" here means "bytes left to copy". | |
// This just checks if we'll have bytes left to copying after copying the bytes | |
// in this buffer. | |
this._bufs[i].copy(dst, bufoff, start) | |
} else { | |
// If all the bytes left to copy are contained in this iteration's buffer, | |
// we just need to copy that many more bytes from the buffer and then break out | |
// of the loop. | |
this._bufs[i].copy(dst, bufoff, start, start + bytes) | |
break | |
} | |
bufoff += l | |
bytes -= l | |
// "bufoff" is the write offset in the destination buffer. We increase it by the | |
// number of bytes we just wrote. "bytes" is the number of bytes left to write. | |
// We decrease it by the same number of bytes. | |
if (start) start = 0 | |
// This will only be executed in the first iteration because "start" will never | |
// change again throughout this loop. Basically, after the first write, set "start" to zero, | |
// because we always want to copy from the start of the next buffer. | |
} | |
return dst | |
// Finally, return the destination buffer. If we weren't in copy mode, this is a new buffer. | |
} | |
BufferList.prototype.toString = function(encoding, start, end) { | |
// This emulates Buffer's toString method. Basically it converts the bytes contained in this | |
// BufferList (optionally limited to the given start and end offsets) to a string, using the | |
// given encoding. | |
return this.slice(start, end).toString(encoding) | |
// As we can see, it's just another copy-call: it creates a new Buffer from the BufferList's | |
// bytes in the given range and then retruns the result of calling that buffer's toString | |
// method with the given encoding. | |
} | |
BufferList.prototype.consume = function(bytes) { | |
// Finally the mysterious "consume" method. | |
// "bytes" here means "number of bytes to consume". | |
while (this._bufs.length) { | |
// This loop repeats until this BufferList object's internal array of buffers | |
// is empty. This implies we're removing buffers from the array in each iteration. | |
if (bytes > this._bufs[0].length) { | |
// This is a similar condition to what we saw before: | |
// "if there are more bytes left to consume than contained in the buffer at index 0". | |
// Because we are explicitly checking the buffer at array index 0, we can assume | |
// that the code will remove buffers from the array starting at the beginning | |
// rather than the end. | |
bytes -= this._bufs[0].length | |
// The "number of bytes left to consume" is reduced by the number of bytes in the buffer. | |
// Some people consider modifying a value that was passed as an argument bad practice. | |
this.length -= this._bufs[0].length | |
// Now we're seeing that the consume method really will delete stuff: | |
// The BufferList object's length is reduced by the number of bytes in the buffer. | |
this._bufs.shift() | |
// And the first buffer in the array is removed from the array. | |
// So apparently we're not doing anything with the buffer itself. | |
} else { | |
// This will be executed when we have reached the last buffer to consume. | |
this._bufs[0] = this._bufs[0].slice(bytes) | |
// As we can see, the consume method is simply deleting all bytes in the BufferList | |
// up to the given number of bytes. | |
// This line replaces the first buffer in the array with a new buffer containing the | |
// bytes in the given buffer, starting at the given offset. | |
this.length -= bytes | |
// And the number of bytes in the ByteList is adjusted one last time. | |
break | |
// We have to break out of the loop or else it wouldn't know to stop because there are | |
// still buffers in the array. | |
} | |
} | |
return this | |
// Again, "this" is returned to allow chaining. | |
} | |
BufferList.prototype.duplicate = function() { | |
// One last method: this duplicates ("clones") our BufferList. | |
var i = 0, | |
copy = new BufferList() | |
// We're duplicating, so we need a new BufferList as a target first. | |
for (; i < this._bufs.length; i++) | |
copy.append(this._bufs[i]) | |
// As we can see, it's actually a shallow copy: | |
// we're creating a new BufferList that simply contains exactly the | |
// same buffers as our current BufferList. We're not actually duplicating | |
// the buffers themselves. | |
return copy | |
// This time we can't return "this" because we need to return the result. | |
} | |
; | |
// This stray semicolon is necessary because the author relies on automatic semicolon insertion (ASI) | |
// (i.e. he doesn't use any semicolons in his code). Because JavaScript mostly doesn't care about | |
// line breaks, the following open parenthesis would otherwise be interpreted as if it were | |
// following the previous closing brace, turning the previous function expression into an | |
// anonymous function invokation (e.g. function() {/* body */}(/* arguments */)). | |
// This is why many people advocate simply using semicolons. | |
// ASI proponents sometimes use a negation (!) instead to achieve the same effect without using a semicolon. | |
(function() { | |
// | |
var methods = { | |
'readDoubleBE': 8, | |
'readDoubleLE': 8, | |
'readFloatBE': 4, | |
'readFloatLE': 4, | |
'readInt32BE': 4, | |
'readInt32LE': 4, | |
'readUInt32BE': 4, | |
'readUInt32LE': 4, | |
'readInt16BE': 2, | |
'readInt16LE': 2, | |
'readUInt16BE': 2, | |
'readUInt16LE': 2, | |
'readInt8': 1, | |
'readUInt8': 1 | |
} | |
// This looks a bit strange at first glance, but it's just a mapping of | |
// method names to values. We'll see what the values mean below. | |
for (var m in methods) { | |
// This iterates over the property names of the "methods" object. | |
(function(m) { | |
BufferList.prototype[m] = function(offset) { | |
// Looks like we're defining a new method for each name. | |
return this.slice(offset, offset + methods[m])[m](0) | |
// If we substitute "m" for a name from the mapping above this is easier | |
// to understand (e.g. "readInt8"): | |
// return this.slice(offset, offset + methods.readInt8).readInt8(0) | |
// | |
// Or more verbosely: | |
// var bytes = method.readInt8 | |
// var buffer = this.slice(offset, offset + bytes) | |
// return buffer.readInt8(0) | |
// | |
// As you can see the magic numbers we saw before correspond to the | |
// number of bytes to read for each method name. | |
// The methods themselves correspond to methods with the same name | |
// defined by Buffer. Looking at Node's API docs for Buffer tells us | |
// that the hard-coded zero here simply means "return the first value | |
// of this type in the buffer". | |
// Of course the Buffer created by slice will only contain one value of | |
// the given type, but we need to use slice because a multi-byte value | |
// could be spread over multiple buffers. | |
} | |
}(m)) | |
// The anonymous function we just defined in this loop is directly invoked | |
// with the iteration variable. This is called an IIFE (see below). | |
// The reason the author is doing this is that because "m" is our iteration | |
// variable, the final value of the variable will change with each loop. | |
// Because we're using the variable in the methods we're defining, we would | |
// thus define all methods with the same value of "m". By passing the value | |
// in a function, we're copying the value for each iteration, side-stepping | |
// the issue. | |
} | |
// Since we're already using ECMAScript 5 language features (e.g. forEach), | |
// we could rewrite this part like this: | |
// Object.keys(methods).forEach(function(m) { | |
// BufferList.prototype[m] = function(offset) { | |
// return this.slice(offset, offset + methods[m])[m](0) | |
// } | |
// }) | |
// As you can see, this would also eliminate the need for the IIFE because the | |
// code called in each iteration is already wrapped in a function. | |
}()) | |
// That this function expression started with an opening parenthesis already suggested | |
// this, but now we know for sure: another immediately invoked function expression (IIFE)! | |
module.exports = BufferList | |
// There are two ways to export values in a node module: | |
// 1. defining properties of the "exports" variable. | |
// e.g. exports.foo = 'bar' | |
// This allows you to use the module like this: | |
// console.log(require('bl').foo) // "bar" | |
// 2. defining an "exports" property of the "module" variable. | |
// e.g. module.exports = 'bar' | |
// This allows you to use the module like this: | |
// console.log(require('bl')) // "bar" | |
// | |
// Because it is generally considered best practice in the node community to have a single | |
// export per module, most people always use "module.exports". | |
// Generally speaking, you probably only want to use the "exports" way if you want to export | |
// multiple things and can't clearly identify one of them as the primary purpose of the module. | |
// This is also a good indicator you might want to split the module up. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment