Last active
January 10, 2025 07:26
-
-
Save CAFxX/1bfe56cabbf197e14603148178ff9169 to your computer and use it in GitHub Desktop.
Fast count decimal digits (branchless)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
Fast, branchless count of decimal digits in a uint64 | |
(C) 2025 Carlo Alberto Ferraris (CAFxX) | |
This compiles down on x86-64 to something like | |
lzcnt rcx, rdi | |
lea rax, [rip + countDigits.lut1] | |
movzx eax, byte ptr [rcx + rax] | |
lea rdx, [rip + countDigits.lut2] | |
cmp qword ptr [rdx + 8*rcx], rdi | |
adc rax, 0 | |
ret | |
and on rv64gcb to: | |
lui a5,%hi(.LANCHOR0) | |
clz a3,a0 | |
addi a5,a5,%lo(.LANCHOR0) | |
sh3add a4,a3,a5 | |
ld a4,0(a4) | |
add a5,a5,a3 | |
lbu a5,520(a5) | |
sltu a0,a4,a0 | |
add a0,a0,a5 | |
ret | |
Depending on the processor, and assuming the LUTs are in L1, this should | |
execute in 10~15 cycles (at least on modern x86-64). | |
https://godbolt.org/#z:OYLghAFBqd5QCxAYwPYBMCmBRdBLAF1QCcAaPECAMzwBtMA7AQwFtMQByARg9KtQYEAysib0QXACx8BBAKoBnTAAUAHpwAMvAFYTStJg1AB9U8lJL6yAngGVG6AMKpaAVxYMQAZgAcpBwAyeAyYAHLuAEaYxCAArFykAA6oCoS2DM5uHt5%2Byak2AkEh4SxRMfEWmFYFDEIETMQEme6evpXV6XUNBEVhkdFxCQr1jc3ZbcPdvSVlgwCUFqiuxMjsHACkAExewchuWADU616Ow/iCAHQIx9jrGgCCWzsMe66Hx6cE%2BKhXN3eP212%2B0wRxOqh8ADZggRiMFfl5bg9/pMbMgDq5oRDJMYCAc0K5BAARPDAQgKCAYwRYnEHBhzI4AdgAQv8DmyDgoAO6EZAIA4QOmMlkPdmivFMJQHDQHC6yg4AThArLFKuImAIywYBy4x2F9xVbNEkq40tlFwViuVBvZao1xC1m11VrFRpBJtNcvlXqVIutNvVmoOXidvpVru1GkjMs9Xstob9tsDkhD%2BoN4fdUbNFtjPtTfoOiftB1iKet6cjFej5tjOed1sLWohpbTErdFcrWZrtfj9YDRYZzbDrYj7czMa7cbzCb7Wp8g5dw4zo6r2YnufzbIbCvnovLo4744nk43BZnEZ37L3%2B7H1aP3anvbtWq4Oq8erLi%2Bv7ZXd5r643W5cI6b51pen5fget6/t6oGqmeXDBiBPZgcaEHfp20H3iegHJkhD4oW2aGQaumH/vmgElnhH6oURxGYV2ZHTk%2B2pNlRLY0bRN4kfRjGPoGXADmxQ4cZxHpQfRWEAfBc5CQuImiT%2BEldq4tC8QagHyhebJYFQTAqQQalwcxmwaPO6wMoS/zmZZSIPJSBDUri%2BJEiSZKbBSmLYrigrme%2B7IongaJoAwwzotCPg0m4BA6rETIQiWsSEqCSW%2BbB2ryqQ6WZVwGVZdqfj5dlBUCdlDKldlEIVVVLHZbEtX1WlUjZdI2otQh2VeB1XXaps2W9T12UJNqQ0vtlGhjRNEakGluUKplc1sgVBxLUtZVsmtBwbRtlVsjtBx7XVbKHcW83SGlLUHBdF2dWyN1BvN/UHI9j1Ddq82vR91nzgFQUCKF9mOQcUWOrF8XrIlyVCkpym0NNyEngjBpcD4kiSBCDKoxoDJeAyGjyrE8QQlwsQqXD%2BGIxT7LI6j6OY9juP44TxOk2llOI9TaMY5IWM43jBNcETJOw6zbMntDsYs/DosIxztPc/TfNM0LZPS9LstczzDP84Lkvk6rG7i/Kuv62z6t07zjMC8zwtSyb1pm/LFva9bKt24j4vG27Mso5z5ta0rntexuDua4rVvKyLQdsiHCuWzrNt61H7LQ4HSdIz7cuh3HLuR1HMdOwHCdp36Smp8Xor5/74dl%2BX0cZxrsfOxHttpxJNe15XYfx67tcV/XftdznLdJ532fN4nreYe35ej030/F7Phc973bJT0XK9U/3jtV93udB4v1frxv3E1vPacH7vw951vWdz0fG8X0PE9J7%2BZ8jzfjdL3vXuP%2BPx%2Br0eN%2B18aYNwLofZeK9f5AKDoA%2B%2BkCP5gMvs/YBvtt6Dz/v/KBcDe4TmgT/BBO8n7/3yiAgeY88FuxhhA3uWDqEdwIegihdtT7YPoaQtB5DWEzwYZwuh3D2G3y/lfGBXomEm1od/N2EjhFe1EVwhePC758IUQIz%2B4DJHMLEfraRyD96KKEbo/BqjEFEMwfow%2BX1ZIAy8kDAAXpDUwEQMS0BsAwYwexbG0AFHMecW4ooxSZLQWx4MUqbCZPyLUNwgauAICDQJwSIbHAAGLagOCAKUPi8JfVsvcAA9AAKn%2BNCA4LAmDBAgPSVK8ZrE0m0JDV8fk2T8GIPyYpeBIamTfAcdpHwnqdPCXgLYLIwmVOZGlPAVB%2BR1LAGAY4SUuD0kSLCQQVAIBbEdJsWIJk3BHE2ZsNw4NHAMHWZlOpABaJ6mVnIEGJKSAgCh3LnKenMTJjSxRLOhKs9ZWwtkaB2T8/ZrhDnHM2P1J5Q1rm3LchAcFLytIHA%2BSstZoKAV/NcLsrZBzYhHJOQcbQVylguTuQ8mFcLZKikRQQL5KK9looxYC4FuK6lDLeniQlNzXL3Mebs8JCzXlpUpdSjZvyFCMtBZldZZp1n8qlnUwpXh5n9OdNkqcgrkXCu2eigFWKcXioOAAPw0KTNlBIOXEvcoalSZK9QqvybkqyDwfphSpDYyFnKFCSA8i6mkPkxnxlybk/kWIzmqCGfKOY%2BSvC5JNGcx08YnXBX%2Bp5SKMS4lg0SQqqGhsjbyPPuYpBxCdGFvzaY4%2B2atGqyLWY4xhCMFlo9rm9%2BNbGGNpQZnNRBb/4NuUXm5tvCNHiJLXWh%2BQ6K3SxTq2vRfalEDu0aOydsi24LqkfOntTbUGCPUTIyh9Ex2iyrcfA9I7p0GOIbu5ddsj3wJPVuwxO7X4XsHTezth7V2ztVtBPdps33bsvT%2Bu9f7n2lo3g%2BtdbbQG1q/ZTK9ODYFganRujtwHr2IZMcOlecH31q3/cWoD6GaE4a7VQrD%2B7COvrw1BimxHf1PtQ5Bx9c6KMMY/RLZj2GmPwaMXRltnGV0cZI2zVjvHAPcf7TRxjomZ3iZY5R9mZHj2SdPV22T3t%2BPSfY4p29uHNMvoU%2B2tDKlLFvJqbiIJDjjBOLoK49xQSvF0nnCZg4ERIYQCxAcC5ZmWXhoOPkoMBxA0mnc09XxZ5nMsoFKCbA0TYng3iSE0EKSuBpIySmFVyJ6iomdQ5V17KoX3K8F67LPrRlvMAjyiJkW5kRgS6k9JGhKlhIq38TNS4FLJNqxknlkcIvNcVdKdrSW6sNfCT1hEVXWu0Rq4NzrQzuuRLGy1ysA3kv1fK6N24i3RL9a8Illbw25uVc21GZbQ21vzY231ziU29tdeHut8by4Tszca/do7RFrundm3d87D3Hs7Y66t8LP23sQQ%2B89t5BpXuXe/E9wHL3gfQ6/GDuHEOVRQ5HPuZHw2msLcR5j2H%2B3vuHbx0t/702Uc44uxj68WPUsWQ4AsVSHBYi8E8BwLQpBUCcEcByJYKwQRPB4KQAy7OGcLAANbeFiBcLwmwITygZBCLwCv0YCTqkzyQrPNC8C5xwXgCgQDjRF1oBYcBYBIDQCwRIdBojkEoJb639AYjID2IYYAOJiAEnF3wKz0QDcQAiNr0gERggNAAJ6cCFyH5gxAw8AHkIjaEwNYSPvBLdsEEHHhgtAI%2Bi9IFgJxwBHBiFoAb7gvAsClKMOIPP%2BA1TWDwAAN0wGXjnmBVDJ5iWsDn0IqhB9oHgCIxBw/OCwEHmEeAWCp9IM34gEQUiYEJJgKvwAB9GG1wsKgBhgAKAAGp4EwJyOPiRGDT/4IIEQYh2BNXP/IJQagg%2B6ASAYdfph3H6EHwbyACxUCJBqGXs5OPLwdzUpFYa4BVYAYfBgQgaIJga3dABQHXWfWELAL/CpdoZPGoewBgJwFwFoPQQIYIPoUoAYBIPINIAQMYTwMglICghgaYfoGIIYKoTAzoEYJoPA7IZgjoAQLoRoBgkgpgiwdgqgvQSYfgogmYUghYBQPnVYCQRnTgFnUgNnDnXXA4cECEM5NzV3IwfkGEL3ekCAXAQgEgXZLwBZXgY3MXUgBATAJgLAGIdAyXCwmXZXeXDQOXHwL0SQeUXqDXLXPPXXfXQ3YXDfUgM3RAEAIgFwO3CABoHfTgM5FgZARIIFCAqAmA4feAhQdzM5GwNgNfTAXgBIhQZQQwKoIQBAVATkVQtPVAK3G3YgUIVgNYTQ7QyQPEF/YAfQz3BgcXOYXgTAfAIgFAvQW/S/cQG/WQRQFQdQPPJ/UgFgAQJgNAEw0YgACQlCZEwEYHuBiVQGP0YGGOn2WOYDWJGJICOLZyFzONWNQDqHOGnzOGCE4H1y%2BGCHKJCFoCqJqJuKsNQBcAAEl0AQBaBaBG8WBkjRADD%2BjSBORsjU9FDmdAi1DOBwTIToSmBejDCNDIQOiui3ccT%2BijD1izDBdMpnBGindzDLCwjRcXlSAXCvA3CvA2T2SOS2T9BOBNcVCg9giLBQjrDGSmdNhUSdc3j6STcFhZ9Ug7BJAgA | |
*/ | |
#include <stdint.h> | |
uint64_t countDigits(uint64_t n) { | |
static const uint8_t lut1[65] = { | |
19, 19, 19, 19, 18, 18, 18, 17, 17, 17, 16, 16, 16, 16, 15, 15, 15, | |
14, 14, 14, 13, 13, 13, 13, 12, 12, 12, 11, 11, 11, 10, 10, 10, 10, | |
9, 9, 9, 8, 8, 8, 7, 7, 7, 7, 6, 6, 6, 5, 5, 5, 4, | |
4, 4, 4, 3, 3, 3, 2, 2, 2, 1, 1, 1, 1, 1}; | |
static const uint64_t lut2[65] = {9999999999999999999ul, | |
18446744073709551615ul, | |
18446744073709551615ul, | |
18446744073709551615ul, | |
999999999999999999ul, | |
18446744073709551615ul, | |
18446744073709551615ul, | |
99999999999999999ul, | |
18446744073709551615ul, | |
18446744073709551615ul, | |
9999999999999999ul, | |
18446744073709551615ul, | |
18446744073709551615ul, | |
18446744073709551615ul, | |
999999999999999ul, | |
18446744073709551615ul, | |
18446744073709551615ul, | |
99999999999999ul, | |
18446744073709551615ul, | |
18446744073709551615ul, | |
9999999999999ul, | |
18446744073709551615ul, | |
18446744073709551615ul, | |
18446744073709551615ul, | |
999999999999ul, | |
18446744073709551615ul, | |
18446744073709551615ul, | |
99999999999ul, | |
18446744073709551615ul, | |
18446744073709551615ul, | |
9999999999ul, | |
18446744073709551615ul, | |
18446744073709551615ul, | |
18446744073709551615ul, | |
999999999ul, | |
18446744073709551615ul, | |
18446744073709551615ul, | |
99999999ul, | |
18446744073709551615ul, | |
18446744073709551615ul, | |
9999999ul, | |
18446744073709551615ul, | |
18446744073709551615ul, | |
18446744073709551615ul, | |
999999ul, | |
18446744073709551615ul, | |
18446744073709551615ul, | |
99999ul, | |
18446744073709551615ul, | |
18446744073709551615ul, | |
9999ul, | |
18446744073709551615ul, | |
18446744073709551615ul, | |
18446744073709551615ul, | |
999ul, | |
18446744073709551615ul, | |
18446744073709551615ul, | |
99ul, | |
18446744073709551615ul, | |
18446744073709551615ul, | |
9ul, | |
18446744073709551615ul, | |
18446744073709551615ul, | |
18446744073709551615ul, | |
18446744073709551615ul}; | |
uint64_t lz = __builtin_clzl(n); | |
return lut1[lz] + (n > lut2[lz] ? 1 : 0); | |
} | |
/* | |
LUTs generated (apart from the case x=64) with | |
(0..63).map{|x| | |
(10**Math.log10(2**(64-x-1)).floor).to_s.length | |
}.join ',' | |
(0..63).map{|x| | |
np10(2**(64-x-1)) != np10(2**(64-x)-1) ? np10(2**(64-x-1))-1 : 2**64-1 | |
}.map{|x| x.to_s + "ul" }.join ',' | |
(0..63).each{|x| | |
puts "#{x.to_s.rjust(2)}: #{(10**Math.log10(2**(64-x-1)).floor).to_s.length.to_s.rjust(2)} #{((64-x)*3/10).floor}" | |
} | |
*/ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment