Skip to content

Instantly share code, notes, and snippets.

@CAFxX
Last active January 10, 2025 07:26
Show Gist options
  • Save CAFxX/1bfe56cabbf197e14603148178ff9169 to your computer and use it in GitHub Desktop.
Save CAFxX/1bfe56cabbf197e14603148178ff9169 to your computer and use it in GitHub Desktop.
Fast count decimal digits (branchless)
/*
Fast, branchless count of decimal digits in a uint64
(C) 2025 Carlo Alberto Ferraris (CAFxX)
This compiles down on x86-64 to something like
lzcnt rcx, rdi
lea rax, [rip + countDigits.lut1]
movzx eax, byte ptr [rcx + rax]
lea rdx, [rip + countDigits.lut2]
cmp qword ptr [rdx + 8*rcx], rdi
adc rax, 0
ret
and on rv64gcb to:
lui a5,%hi(.LANCHOR0)
clz a3,a0
addi a5,a5,%lo(.LANCHOR0)
sh3add a4,a3,a5
ld a4,0(a4)
add a5,a5,a3
lbu a5,520(a5)
sltu a0,a4,a0
add a0,a0,a5
ret
Depending on the processor, and assuming the LUTs are in L1, this should
execute in 10~15 cycles (at least on modern x86-64).
https://godbolt.org/#z:OYLghAFBqd5QCxAYwPYBMCmBRdBLAF1QCcAaPECAMzwBtMA7AQwFtMQByARg9KtQYEAysib0QXACx8BBAKoBnTAAUAHpwAMvAFYTStJg1AB9U8lJL6yAngGVG6AMKpaAVxYMQAZgAcpBwAyeAyYAHLuAEaYxCAArFykAA6oCoS2DM5uHt5%2Byak2AkEh4SxRMfEWmFYFDEIETMQEme6evpXV6XUNBEVhkdFxCQr1jc3ZbcPdvSVlgwCUFqiuxMjsHACkAExewchuWADU616Ow/iCAHQIx9jrGgCCWzsMe66Hx6cE%2BKhXN3eP212%2B0wRxOqh8ADZggRiMFfl5bg9/pMbMgDq5oRDJMYCAc0K5BAARPDAQgKCAYwRYnEHBhzI4AdgAQv8DmyDgoAO6EZAIA4QOmMlkPdmivFMJQHDQHC6yg4AThArLFKuImAIywYBy4x2F9xVbNEkq40tlFwViuVBvZao1xC1m11VrFRpBJtNcvlXqVIutNvVmoOXidvpVru1GkjMs9Xstob9tsDkhD%2BoN4fdUbNFtjPtTfoOiftB1iKet6cjFej5tjOed1sLWohpbTErdFcrWZrtfj9YDRYZzbDrYj7czMa7cbzCb7Wp8g5dw4zo6r2YnufzbIbCvnovLo4744nk43BZnEZ37L3%2B7H1aP3anvbtWq4Oq8erLi%2Bv7ZXd5r643W5cI6b51pen5fget6/t6oGqmeXDBiBPZgcaEHfp20H3iegHJkhD4oW2aGQaumH/vmgElnhH6oURxGYV2ZHTk%2B2pNlRLY0bRN4kfRjGPoGXADmxQ4cZxHpQfRWEAfBc5CQuImiT%2BEldq4tC8QagHyhebJYFQTAqQQalwcxmwaPO6wMoS/zmZZSIPJSBDUri%2BJEiSZKbBSmLYrigrme%2B7IongaJoAwwzotCPg0m4BA6rETIQiWsSEqCSW%2BbB2ryqQ6WZVwGVZdqfj5dlBUCdlDKldlEIVVVLHZbEtX1WlUjZdI2otQh2VeB1XXaps2W9T12UJNqQ0vtlGhjRNEakGluUKplc1sgVBxLUtZVsmtBwbRtlVsjtBx7XVbKHcW83SGlLUHBdF2dWyN1BvN/UHI9j1Ddq82vR91nzgFQUCKF9mOQcUWOrF8XrIlyVCkpym0NNyEngjBpcD4kiSBCDKoxoDJeAyGjyrE8QQlwsQqXD%2BGIxT7LI6j6OY9juP44TxOk2llOI9TaMY5IWM43jBNcETJOw6zbMntDsYs/DosIxztPc/TfNM0LZPS9LstczzDP84Lkvk6rG7i/Kuv62z6t07zjMC8zwtSyb1pm/LFva9bKt24j4vG27Mso5z5ta0rntexuDua4rVvKyLQdsiHCuWzrNt61H7LQ4HSdIz7cuh3HLuR1HMdOwHCdp36Smp8Xor5/74dl%2BX0cZxrsfOxHttpxJNe15XYfx67tcV/XftdznLdJ532fN4nreYe35ej030/F7Phc973bJT0XK9U/3jtV93udB4v1frxv3E1vPacH7vw951vWdz0fG8X0PE9J7%2BZ8jzfjdL3vXuP%2BPx%2Br0eN%2B18aYNwLofZeK9f5AKDoA%2B%2BkCP5gMvs/YBvtt6Dz/v/KBcDe4TmgT/BBO8n7/3yiAgeY88FuxhhA3uWDqEdwIegihdtT7YPoaQtB5DWEzwYZwuh3D2G3y/lfGBXomEm1od/N2EjhFe1EVwhePC758IUQIz%2B4DJHMLEfraRyD96KKEbo/BqjEFEMwfow%2BX1ZIAy8kDAAXpDUwEQMS0BsAwYwexbG0AFHMecW4ooxSZLQWx4MUqbCZPyLUNwgauAICDQJwSIbHAAGLagOCAKUPi8JfVsvcAA9AAKn%2BNCA4LAmDBAgPSVK8ZrE0m0JDV8fk2T8GIPyYpeBIamTfAcdpHwnqdPCXgLYLIwmVOZGlPAVB%2BR1LAGAY4SUuD0kSLCQQVAIBbEdJsWIJk3BHE2ZsNw4NHAMHWZlOpABaJ6mVnIEGJKSAgCh3LnKenMTJjSxRLOhKs9ZWwtkaB2T8/ZrhDnHM2P1J5Q1rm3LchAcFLytIHA%2BSstZoKAV/NcLsrZBzYhHJOQcbQVylguTuQ8mFcLZKikRQQL5KK9looxYC4FuK6lDLeniQlNzXL3Mebs8JCzXlpUpdSjZvyFCMtBZldZZp1n8qlnUwpXh5n9OdNkqcgrkXCu2eigFWKcXioOAAPw0KTNlBIOXEvcoalSZK9QqvybkqyDwfphSpDYyFnKFCSA8i6mkPkxnxlybk/kWIzmqCGfKOY%2BSvC5JNGcx08YnXBX%2Bp5SKMS4lg0SQqqGhsjbyPPuYpBxCdGFvzaY4%2B2atGqyLWY4xhCMFlo9rm9%2BNbGGNpQZnNRBb/4NuUXm5tvCNHiJLXWh%2BQ6K3SxTq2vRfalEDu0aOydsi24LqkfOntTbUGCPUTIyh9Ex2iyrcfA9I7p0GOIbu5ddsj3wJPVuwxO7X4XsHTezth7V2ztVtBPdps33bsvT%2Bu9f7n2lo3g%2BtdbbQG1q/ZTK9ODYFganRujtwHr2IZMcOlecH31q3/cWoD6GaE4a7VQrD%2B7COvrw1BimxHf1PtQ5Bx9c6KMMY/RLZj2GmPwaMXRltnGV0cZI2zVjvHAPcf7TRxjomZ3iZY5R9mZHj2SdPV22T3t%2BPSfY4p29uHNMvoU%2B2tDKlLFvJqbiIJDjjBOLoK49xQSvF0nnCZg4ERIYQCxAcC5ZmWXhoOPkoMBxA0mnc09XxZ5nMsoFKCbA0TYng3iSE0EKSuBpIySmFVyJ6iomdQ5V17KoX3K8F67LPrRlvMAjyiJkW5kRgS6k9JGhKlhIq38TNS4FLJNqxknlkcIvNcVdKdrSW6sNfCT1hEVXWu0Rq4NzrQzuuRLGy1ysA3kv1fK6N24i3RL9a8Illbw25uVc21GZbQ21vzY231ziU29tdeHut8by4Tszca/do7RFrundm3d87D3Hs7Y66t8LP23sQQ%2B89t5BpXuXe/E9wHL3gfQ6/GDuHEOVRQ5HPuZHw2msLcR5j2H%2B3vuHbx0t/702Uc44uxj68WPUsWQ4AsVSHBYi8E8BwLQpBUCcEcByJYKwQRPB4KQAy7OGcLAANbeFiBcLwmwITygZBCLwCv0YCTqkzyQrPNC8C5xwXgCgQDjRF1oBYcBYBIDQCwRIdBojkEoJb639AYjID2IYYAOJiAEnF3wKz0QDcQAiNr0gERggNAAJ6cCFyH5gxAw8AHkIjaEwNYSPvBLdsEEHHhgtAI%2Bi9IFgJxwBHBiFoAb7gvAsClKMOIPP%2BA1TWDwAAN0wGXjnmBVDJ5iWsDn0IqhB9oHgCIxBw/OCwEHmEeAWCp9IM34gEQUiYEJJgKvwAB9GG1wsKgBhgAKAAGp4EwJyOPiRGDT/4IIEQYh2BNXP/IJQagg%2B6ASAYdfph3H6EHwbyACxUCJBqGXs5OPLwdzUpFYa4BVYAYfBgQgaIJga3dABQHXWfWELAL/CpdoZPGoewBgJwFwFoPQQIYIPoUoAYBIPINIAQMYTwMglICghgaYfoGIIYKoTAzoEYJoPA7IZgjoAQLoRoBgkgpgiwdgqgvQSYfgogmYUghYBQPnVYCQRnTgFnUgNnDnXXA4cECEM5NzV3IwfkGEL3ekCAXAQgEgXZLwBZXgY3MXUgBATAJgLAGIdAyXCwmXZXeXDQOXHwL0SQeUXqDXLXPPXXfXQ3YXDfUgM3RAEAIgFwO3CABoHfTgM5FgZARIIFCAqAmA4feAhQdzM5GwNgNfTAXgBIhQZQQwKoIQBAVATkVQtPVAK3G3YgUIVgNYTQ7QyQPEF/YAfQz3BgcXOYXgTAfAIgFAvQW/S/cQG/WQRQFQdQPPJ/UgFgAQJgNAEw0YgACQlCZEwEYHuBiVQGP0YGGOn2WOYDWJGJICOLZyFzONWNQDqHOGnzOGCE4H1y%2BGCHKJCFoCqJqJuKsNQBcAAEl0AQBaBaBG8WBkjRADD%2BjSBORsjU9FDmdAi1DOBwTIToSmBejDCNDIQOiui3ccT%2BijD1izDBdMpnBGindzDLCwjRcXlSAXCvA3CvA2T2SOS2T9BOBNcVCg9giLBQjrDGSmdNhUSdc3j6STcFhZ9Ug7BJAgA
*/
#include <stdint.h>
uint64_t countDigits(uint64_t n) {
static const uint8_t lut1[65] = {
19, 19, 19, 19, 18, 18, 18, 17, 17, 17, 16, 16, 16, 16, 15, 15, 15,
14, 14, 14, 13, 13, 13, 13, 12, 12, 12, 11, 11, 11, 10, 10, 10, 10,
9, 9, 9, 8, 8, 8, 7, 7, 7, 7, 6, 6, 6, 5, 5, 5, 4,
4, 4, 4, 3, 3, 3, 2, 2, 2, 1, 1, 1, 1, 1};
static const uint64_t lut2[65] = {9999999999999999999ul,
18446744073709551615ul,
18446744073709551615ul,
18446744073709551615ul,
999999999999999999ul,
18446744073709551615ul,
18446744073709551615ul,
99999999999999999ul,
18446744073709551615ul,
18446744073709551615ul,
9999999999999999ul,
18446744073709551615ul,
18446744073709551615ul,
18446744073709551615ul,
999999999999999ul,
18446744073709551615ul,
18446744073709551615ul,
99999999999999ul,
18446744073709551615ul,
18446744073709551615ul,
9999999999999ul,
18446744073709551615ul,
18446744073709551615ul,
18446744073709551615ul,
999999999999ul,
18446744073709551615ul,
18446744073709551615ul,
99999999999ul,
18446744073709551615ul,
18446744073709551615ul,
9999999999ul,
18446744073709551615ul,
18446744073709551615ul,
18446744073709551615ul,
999999999ul,
18446744073709551615ul,
18446744073709551615ul,
99999999ul,
18446744073709551615ul,
18446744073709551615ul,
9999999ul,
18446744073709551615ul,
18446744073709551615ul,
18446744073709551615ul,
999999ul,
18446744073709551615ul,
18446744073709551615ul,
99999ul,
18446744073709551615ul,
18446744073709551615ul,
9999ul,
18446744073709551615ul,
18446744073709551615ul,
18446744073709551615ul,
999ul,
18446744073709551615ul,
18446744073709551615ul,
99ul,
18446744073709551615ul,
18446744073709551615ul,
9ul,
18446744073709551615ul,
18446744073709551615ul,
18446744073709551615ul,
18446744073709551615ul};
uint64_t lz = __builtin_clzl(n);
return lut1[lz] + (n > lut2[lz] ? 1 : 0);
}
/*
LUTs generated (apart from the case x=64) with
(0..63).map{|x|
(10**Math.log10(2**(64-x-1)).floor).to_s.length
}.join ','
(0..63).map{|x|
np10(2**(64-x-1)) != np10(2**(64-x)-1) ? np10(2**(64-x-1))-1 : 2**64-1
}.map{|x| x.to_s + "ul" }.join ','
(0..63).each{|x|
puts "#{x.to_s.rjust(2)}: #{(10**Math.log10(2**(64-x-1)).floor).to_s.length.to_s.rjust(2)} #{((64-x)*3/10).floor}"
}
*/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment