It's the reply to the question raised by @getify on his twitter:
here's a variation on the question... will JS engines exhibit much performance difference between these two loops?
for (var i = 0; i < 100000000; i++) {
// do some stuff, but not closure
}
for (let i = 0; i < 100000000; i++) {
// do the same stuff (no closure)
}
Maybe every JS engine has their own implementation, but luckily in V8 we can generate the bytecode to see what's going on under the hood.
The code I used for test:
function let_for(){
for (let i = 0; i < 10; i++) {
console.log(i)
}
}
function var_for() {
for (var i = 0; i < 10; i++) {
console.log(i)
}
}
function let_timeout() {
for (let i = 0; i < 10; i++) {
setTimeout(function let_timeout_inner() {
console.log(i)
})
}
}
function var_timeout() {
for (var i = 0; i < 10; i++) {
setTimeout(function var_timeout_inner() {
console.log(i)
})
}
}
let_for()
var_for()
let_timeout()
var_timeout()
// node --print-bytecode a.js > out.txt
Environment: OSX El Capitan + Node.js v12.2.0
First, we can see the generated bytecode for var_for
and let_for
:
[generated bytecode for function: var_for]
Parameter count 1
Register count 3
Frame size 24
95 E> 0x1e9bbe2f0972 @ 0 : a5 StackCheck
115 S> 0x1e9bbe2f0973 @ 1 : 0b LdaZero
0x1e9bbe2f0974 @ 2 : 26 fb Star r0
120 S> 0x1e9bbe2f0976 @ 4 : 0c 0a LdaSmi [10]
120 E> 0x1e9bbe2f0978 @ 6 : 69 fb 00 TestLessThan r0, [0]
0x1e9bbe2f097b @ 9 : 99 1c JumpIfFalse [28] (0x1e9bbe2f0997 @ 37)
102 E> 0x1e9bbe2f097d @ 11 : a5 StackCheck
137 S> 0x1e9bbe2f097e @ 12 : 13 00 01 LdaGlobal [0], [1]
0x1e9bbe2f0981 @ 15 : 26 f9 Star r2
145 E> 0x1e9bbe2f0983 @ 17 : 28 f9 01 03 LdaNamedProperty r2, [1], [3]
0x1e9bbe2f0987 @ 21 : 26 fa Star r1
145 E> 0x1e9bbe2f0989 @ 23 : 59 fa f9 fb 05 CallProperty1 r1, r2, r0, [5]
127 S> 0x1e9bbe2f098e @ 28 : 25 fb Ldar r0
0x1e9bbe2f0990 @ 30 : 4c 07 Inc [7]
0x1e9bbe2f0992 @ 32 : 26 fb Star r0
0x1e9bbe2f0994 @ 34 : 8a 1e 00 JumpLoop [30], [0] (0x1e9bbe2f0976 @ 4)
0x1e9bbe2f0997 @ 37 : 0d LdaUndefined
156 S> 0x1e9bbe2f0998 @ 38 : a9 Return
Constant pool (size = 2)
Handler Table (size = 0)
[generated bytecode for function: let_for]
Parameter count 1
Register count 3
Frame size 24
16 E> 0x1e9bbe2d800a @ 0 : a5 StackCheck
35 S> 0x1e9bbe2d800b @ 1 : 0b LdaZero
0x1e9bbe2d800c @ 2 : 26 fb Star r0
40 S> 0x1e9bbe2d800e @ 4 : 0c 0a LdaSmi [10]
40 E> 0x1e9bbe2d8010 @ 6 : 69 fb 00 TestLessThan r0, [0]
0x1e9bbe2d8013 @ 9 : 99 1c JumpIfFalse [28] (0x1e9bbe2d802f @ 37)
22 E> 0x1e9bbe2d8015 @ 11 : a5 StackCheck
57 S> 0x1e9bbe2d8016 @ 12 : 13 00 01 LdaGlobal [0], [1]
0x1e9bbe2d8019 @ 15 : 26 f9 Star r2
65 E> 0x1e9bbe2d801b @ 17 : 28 f9 01 03 LdaNamedProperty r2, [1], [3]
0x1e9bbe2d801f @ 21 : 26 fa Star r1
65 E> 0x1e9bbe2d8021 @ 23 : 59 fa f9 fb 05 CallProperty1 r1, r2, r0, [5]
47 S> 0x1e9bbe2d8026 @ 28 : 25 fb Ldar r0
0x1e9bbe2d8028 @ 30 : 4c 07 Inc [7]
0x1e9bbe2d802a @ 32 : 26 fb Star r0
0x1e9bbe2d802c @ 34 : 8a 1e 00 JumpLoop [30], [0] (0x1e9bbe2d800e @ 4)
0x1e9bbe2d802f @ 37 : 0d LdaUndefined
76 S> 0x1e9bbe2d8030 @ 38 : a9 Return
Constant pool (size = 2)
Handler Table (size = 0)
It's exactly the same, and there is only one variable which stored in r0. So V8 won't create new variable in each iteration in this case.
How about the setTimeout? The case we really need different variable in each iteration.
We can start from the bytecode for var_timeout_inner
and let_timeout_inner
[generated bytecode for function: var_timeout_inner]
Parameter count 1
Register count 3
Frame size 24
397 E> 0x2725c3d772e2 @ 0 : a5 StackCheck
408 S> 0x2725c3d772e3 @ 1 : 13 00 00 LdaGlobal [0], [0]
0x2725c3d772e6 @ 4 : 26 fa Star r1
416 E> 0x2725c3d772e8 @ 6 : 28 fa 01 02 LdaNamedProperty r1, [1], [2]
0x2725c3d772ec @ 10 : 26 fb Star r0
0x2725c3d772ee @ 12 : 1a 04 LdaCurrentContextSlot [4]
0x2725c3d772f0 @ 14 : 26 f9 Star r2
416 E> 0x2725c3d772f2 @ 16 : 59 fb fa f9 04 CallProperty1 r0, r1, r2, [4]
0x2725c3d772f7 @ 21 : 0d LdaUndefined
427 S> 0x2725c3d772f8 @ 22 : a9 Return
Constant pool (size = 2)
Handler Table (size = 0)
[generated bytecode for function: let_timeout_inner]
Parameter count 1
Register count 3
Frame size 24
258 E> 0x2725c3d76972 @ 0 : a5 StackCheck
269 S> 0x2725c3d76973 @ 1 : 13 00 00 LdaGlobal [0], [0]
0x2725c3d76976 @ 4 : 26 fa Star r1
277 E> 0x2725c3d76978 @ 6 : 28 fa 01 02 LdaNamedProperty r1, [1], [2]
0x2725c3d7697c @ 10 : 26 fb Star r0
0x2725c3d7697e @ 12 : 1a 04 LdaCurrentContextSlot [4]
281 E> 0x2725c3d76980 @ 14 : aa 02 ThrowReferenceErrorIfHole [2]
0x2725c3d76982 @ 16 : 26 f9 Star r2
277 E> 0x2725c3d76984 @ 18 : 59 fb fa f9 04 CallProperty1 r0, r1, r2, [4]
0x2725c3d76989 @ 23 : 0d LdaUndefined
288 S> 0x2725c3d7698a @ 24 : a9 Return
Constant pool (size = 3)
Handler Table (size = 0)
The only difference is this line ThrowReferenceErrorIfHole
in let_timeout_inner
. It's actually the implementation of TDZ, so it doesn't matter.
Finally, here is the main dish: var_timeout
and let_timeout
[generated bytecode for function: var_timeout]
Parameter count 1
Register count 3
Frame size 24
0x2725c3d7316a @ 0 : 84 00 01 CreateFunctionContext [0], [1]
0x2725c3d7316d @ 3 : 16 fb PushContext r0
318 E> 0x2725c3d7316f @ 5 : a5 StackCheck
338 S> 0x2725c3d73170 @ 6 : 0b LdaZero
338 E> 0x2725c3d73171 @ 7 : 1d 04 StaCurrentContextSlot [4]
343 S> 0x2725c3d73173 @ 9 : 1a 04 LdaCurrentContextSlot [4]
0x2725c3d73175 @ 11 : 26 fa Star r1
0x2725c3d73177 @ 13 : 0c 0a LdaSmi [10]
343 E> 0x2725c3d73179 @ 15 : 69 fa 00 TestLessThan r1, [0]
0x2725c3d7317c @ 18 : 99 1b JumpIfFalse [27] (0x2725c3d73197 @ 45)
325 E> 0x2725c3d7317e @ 20 : a5 StackCheck
360 S> 0x2725c3d7317f @ 21 : 13 01 01 LdaGlobal [1], [1]
0x2725c3d73182 @ 24 : 26 fa Star r1
0x2725c3d73184 @ 26 : 81 02 03 02 CreateClosure [2], [3], #2
0x2725c3d73188 @ 30 : 26 f9 Star r2
360 E> 0x2725c3d7318a @ 32 : 5d fa f9 04 CallUndefinedReceiver1 r1, r2, [4]
350 S> 0x2725c3d7318e @ 36 : 1a 04 LdaCurrentContextSlot [4]
0x2725c3d73190 @ 38 : 4c 06 Inc [6]
350 E> 0x2725c3d73192 @ 40 : 1d 04 StaCurrentContextSlot [4]
0x2725c3d73194 @ 42 : 8a 21 00 JumpLoop [33], [0] (0x2725c3d73173 @ 9)
0x2725c3d73197 @ 45 : 0d LdaUndefined
434 S> 0x2725c3d73198 @ 46 : a9 Return
Constant pool (size = 3)
Handler Table (size = 0)
It seems V8 will create a FunctionContext
and then create a variable and store it in the position [4]
of CurrentContextSlot.
[generated bytecode for function: let_timeout]
Parameter count 1
Register count 7
Frame size 56
179 E> 0x2725c3d70daa @ 0 : a5 StackCheck
199 S> 0x2725c3d70dab @ 1 : 0b LdaZero
0x2725c3d70dac @ 2 : 26 f8 Star r3
0x2725c3d70dae @ 4 : 26 fb Star r0
0x2725c3d70db0 @ 6 : 0c 01 LdaSmi [1]
0x2725c3d70db2 @ 8 : 26 fa Star r1
293 E> 0x2725c3d70db4 @ 10 : a5 StackCheck
0x2725c3d70db5 @ 11 : 82 00 CreateBlockContext [0]
0x2725c3d70db7 @ 13 : 16 f7 PushContext r4
0x2725c3d70db9 @ 15 : 0f LdaTheHole
0x2725c3d70dba @ 16 : 1d 04 StaCurrentContextSlot [4]
0x2725c3d70dbc @ 18 : 25 fb Ldar r0
0x2725c3d70dbe @ 20 : 1d 04 StaCurrentContextSlot [4]
0x2725c3d70dc0 @ 22 : 0c 01 LdaSmi [1]
0x2725c3d70dc2 @ 24 : 67 fa 00 TestEqual r1, [0]
0x2725c3d70dc5 @ 27 : 99 07 JumpIfFalse [7] (0x2725c3d70dcc @ 34)
0x2725c3d70dc7 @ 29 : 0b LdaZero
0x2725c3d70dc8 @ 30 : 26 fa Star r1
0x2725c3d70dca @ 32 : 8b 08 Jump [8] (0x2725c3d70dd2 @ 40)
211 S> 0x2725c3d70dcc @ 34 : 1a 04 LdaCurrentContextSlot [4]
0x2725c3d70dce @ 36 : 4c 01 Inc [1]
211 E> 0x2725c3d70dd0 @ 38 : 1d 04 StaCurrentContextSlot [4]
0x2725c3d70dd2 @ 40 : 0c 01 LdaSmi [1]
0x2725c3d70dd4 @ 42 : 26 f9 Star r2
204 S> 0x2725c3d70dd6 @ 44 : 1a 04 LdaCurrentContextSlot [4]
0x2725c3d70dd8 @ 46 : 26 f6 Star r5
0x2725c3d70dda @ 48 : 0c 0a LdaSmi [10]
204 E> 0x2725c3d70ddc @ 50 : 69 f6 02 TestLessThan r5, [2]
0x2725c3d70ddf @ 53 : 99 04 JumpIfFalse [4] (0x2725c3d70de3 @ 57)
0x2725c3d70de1 @ 55 : 8b 06 Jump [6] (0x2725c3d70de7 @ 61)
0x2725c3d70de3 @ 57 : 17 f7 PopContext r4
0x2725c3d70de5 @ 59 : 8b 33 Jump [51] (0x2725c3d70e18 @ 110)
0x2725c3d70de7 @ 61 : 0c 01 LdaSmi [1]
0x2725c3d70de9 @ 63 : 67 f9 03 TestEqual r2, [3]
0x2725c3d70dec @ 66 : 99 1c JumpIfFalse [28] (0x2725c3d70e08 @ 94)
186 E> 0x2725c3d70dee @ 68 : a5 StackCheck
221 S> 0x2725c3d70def @ 69 : 13 01 04 LdaGlobal [1], [4]
0x2725c3d70df2 @ 72 : 26 f6 Star r5
0x2725c3d70df4 @ 74 : 81 02 06 02 CreateClosure [2], [6], #2
0x2725c3d70df8 @ 78 : 26 f5 Star r6
221 E> 0x2725c3d70dfa @ 80 : 5d f6 f5 07 CallUndefinedReceiver1 r5, r6, [7]
0x2725c3d70dfe @ 84 : 0b LdaZero
0x2725c3d70dff @ 85 : 26 f9 Star r2
0x2725c3d70e01 @ 87 : 1a 04 LdaCurrentContextSlot [4]
0x2725c3d70e03 @ 89 : 26 fb Star r0
0x2725c3d70e05 @ 91 : 8a 1e 01 JumpLoop [30], [1] (0x2725c3d70de7 @ 61)
0x2725c3d70e08 @ 94 : 0c 01 LdaSmi [1]
293 E> 0x2725c3d70e0a @ 96 : 67 f9 09 TestEqual r2, [9]
0x2725c3d70e0d @ 99 : 99 06 JumpIfFalse [6] (0x2725c3d70e13 @ 105)
0x2725c3d70e0f @ 101 : 17 f7 PopContext r4
0x2725c3d70e11 @ 103 : 8b 07 Jump [7] (0x2725c3d70e18 @ 110)
0x2725c3d70e13 @ 105 : 17 f7 PopContext r4
0x2725c3d70e15 @ 107 : 8a 61 00 JumpLoop [97], [0] (0x2725c3d70db4 @ 10)
0x2725c3d70e18 @ 110 : 0d LdaUndefined
295 S> 0x2725c3d70e19 @ 111 : a9 Return
Constant pool (size = 3)
Handler Table (size = 0)
Compare to the var_timeout, it's obviously longer, so I tried to translate it into more readable format(and remove some code I think it's not necessary):
r1 = 1
loop:
r4.push(new BlockContext())
CurrentContextSlot = 0
if (r1 === 1) {
r1 = 0
} else {
CurrentContextSlot++
}
r2 = 1
r5 = CurrentContextSlot
if (!(r5 < 10)) { // end loop
PopContext r4
goto done
}
loop2:
if (r2 === 1) {
setTimeout()
r2 = 0
r0 = CurrentContextSlot
goto loop2
}
if (r2 === 1) {
PopContext r4
goto done
}
PopContext r4
goto loop
done:
return undefined
The code above is not accurate, but you can still see that it creates new BlockContext
in each iteration.
Here is my conclusion: V8 treats let and var exactly the same if there is no closure in the for loop. But please noted that it's still guess, I am not the V8 engineer 😂
Regarding the difference found for closure, why do you think there’s this difference? And which one would be better? (Your opinion)