Skip to content

Instantly share code, notes, and snippets.

@resilar
Last active September 22, 2025 08:56
Show Gist options
  • Save resilar/205654ec85832385861f43bf94994b18 to your computer and use it in GitHub Desktop.
Save resilar/205654ec85832385861f43bf94994b18 to your computer and use it in GitHub Desktop.
(RFC7748) X25519 and (RFC8032) EdDSA
#include "sha512.h"
#include <stdint.h>
#include <sys/random.h>
/*
* RFC7748 X25519 Elliptic Curve Diffie-Hellman (ECDH) with Curve25519.
* Montgomery curve y² = x³ + 486662x² + x over GF(p) = GF(2^255 - 19).
*
* 256-bit arithmetic modulo 2^255 - 19 implemented with 8 x 32-bit limbs :3
*/
static uint32_t *import(const uint8_t in[32], uint32_t out[8])
{
out[0] = in[ 0] + (in[ 1] << 8) + (in[ 2] << 16) + (in[ 3] << 24);
out[1] = in[ 4] + (in[ 5] << 8) + (in[ 6] << 16) + (in[ 7] << 24);
out[2] = in[ 8] + (in[ 9] << 8) + (in[10] << 16) + (in[11] << 24);
out[3] = in[12] + (in[13] << 8) + (in[14] << 16) + (in[15] << 24);
out[4] = in[16] + (in[17] << 8) + (in[18] << 16) + (in[19] << 24);
out[5] = in[20] + (in[21] << 8) + (in[22] << 16) + (in[23] << 24);
out[6] = in[24] + (in[25] << 8) + (in[26] << 16) + (in[27] << 24);
out[7] = in[28] + (in[29] << 8) + (in[30] << 16) + (in[31] << 24);
return out;
}
static uint8_t *export(const uint32_t i[8], uint8_t o[32])
{
o[ 0] = i[0]; o[ 1] = i[0] >> 8; o[ 2] = i[0] >> 16; o[ 3] = i[0] >> 24;
o[ 4] = i[1]; o[ 5] = i[1] >> 8; o[ 6] = i[1] >> 16; o[ 7] = i[1] >> 24;
o[ 8] = i[2]; o[ 9] = i[2] >> 8; o[10] = i[2] >> 16; o[11] = i[2] >> 24;
o[12] = i[3]; o[13] = i[3] >> 8; o[14] = i[3] >> 16; o[15] = i[3] >> 24;
o[16] = i[4]; o[17] = i[4] >> 8; o[18] = i[4] >> 16; o[19] = i[4] >> 24;
o[20] = i[5]; o[21] = i[5] >> 8; o[22] = i[5] >> 16; o[23] = i[5] >> 24;
o[24] = i[6]; o[25] = i[6] >> 8; o[26] = i[6] >> 16; o[27] = i[6] >> 24;
o[28] = i[7]; o[29] = i[7] >> 8; o[30] = i[7] >> 16; o[31] = i[7] >> 24;
return o;
}
static uint32_t *clamp_scalar(uint32_t a[8])
{
a[0] &= 0xfffffff8;
a[7] &= 0x7fffffff;
a[7] |= 0x40000000;
return a;
}
static uint32_t *clamp_coordinate(uint32_t u[8])
{
u[7] &= 0x7fffffff;
return u;
}
/* x = a (mod p) */
static uint32_t *modp(const uint32_t a[8], uint32_t x[8])
{
int i;
uint64_t c = (uint64_t)a[0] + 19;
for (i = 1; i < 8; i++) c = (c >> 32) + a[i];
c = (c >> 31) * 19;
for (i = 0; i < 7; i++) {
c += a[i];
x[i] = c;
c >>= 32;
}
x[i] = ((uint32_t)c + a[i]) & 0x7fffffff;
return x;
}
/* x = a + b (mod p) */
static uint32_t *add(const uint32_t a[8], const uint32_t b[8], uint32_t x[8])
{
int i;
uint64_t c = 0;
for (i = 0; i < 8; i++) {
c += a[i];
c += b[i];
x[i] = c;
c >>= 32;
}
return x;
/*
* Note that we can skip modp() here because we do not chain more than one
* addition or subtraction between multiplications. mul() handles unreduced
* integers just fine and returns (mod p)-reduced product as the result.
*/
}
/* x = a - b (mod p) */
static uint32_t *sub(const uint32_t a[8], const uint32_t b[8], uint32_t x[8])
{
int i;
int64_t c = -19;
for (i = 0; i < 8; i++) {
c += a[i];
c -= b[i];
x[i] = c;
c >>= 32;
}
x[7] += 0x80000000;
return x; /* Again, skip modp() */
}
/*
* The mul() function is the main performance bottleneck of the code.
* https://locklessinc.com/articles/256bit_arithmetic/
*/
/* x = a * b (mod p) */
static uint32_t *mul(const uint32_t a[8], const uint32_t b[8], uint32_t x[8])
{
int i, j;
uint32_t ab[8];
uint64_t c, d, l, r;
for (i = c = 0; i < 8; i++) {
l = c & 0xffffffff;
r = 0;
c >>= 32;
for (j = 0; j <= i; j++) {
l += (uint64_t)a[j] * b[i - j];
c += l >> 32;
l &= 0xffffffff;
}
for (d = 0; j < 8; j++) {
r += (uint64_t)a[j] * b[8 - (j - i)];
d += r >> 32;
r &= 0xffffffff;
}
l += 38 * r;
c += 38 * d + (l >> 32);
ab[i] = l;
}
c = 19 * (2*c + (ab[7] >> 31));
ab[7] &= 0x7fffffff;
for (i = 0; i < 8; i++) {
c += ab[i];
ab[i] = c;
c >>= 32;
}
return modp(ab, x);
}
/* x = a² (mod p) */
static uint32_t *square(const uint32_t a[8], uint32_t x[8])
{
return mul(a, a, x);
}
/* x = a^(p-2) (mod p) */
static uint32_t *inverse_modp(const uint32_t a[8], uint32_t x[8])
{
int i;
uint32_t b[8];
mul(a, square(a, b), x);
mul(x, square(square(b, b), b), x);
square(b, b);
for (i = 5; i < 255; i++) mul(x, square(b, b), x);
return x;
}
/* Swap a and b if c != 0 */
static void cswap(uint32_t a[8], uint32_t b[8], int c)
{
int i;
uint32_t mask = -(uint32_t)!!c;
for (i = 0; i < 8; i++) {
const uint32_t xor = mask & (a[i] ^ b[i]);
a[i] ^= xor;
b[i] ^= xor;
}
}
/*
* Constant-time Montgomery ladder for elliptic curve point multiplication with
* a scalar k and point coordinate u. We expect k and u to be properly clamped.
*/
static uint32_t *monty(const uint32_t k[8], const uint32_t u[8], uint32_t x[8])
{
int i, swap = 0;
uint32_t x0[8] = { 1, 0, 0, 0, 0, 0, 0, 0 };
uint32_t z0[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
uint32_t x1[8];
uint32_t z1[8] = { 1, 0, 0, 0, 0, 0, 0, 0 };
const uint32_t a24[8] = { (486662 - 2) / 4, 0, 0, 0, 0, 0, 0, 0 };
for (i = 0; i < 8; i++) x1[i] = u[i];
for (i = 1; i <= 255; i++) {
uint32_t A[8], AA[8], B[8], BB[8], C[8], CB[8], D[8], DA[8],E[8];
const int bit = (k[(255-i) / 32] >> ((255-i) % 32)) & 1;
swap ^= bit;
cswap(x0, x1, swap);
cswap(z0, z1, swap);
swap = bit;
/*
* A = x_0 + z_0, AA = A^2
* B = x_0 - z_0, BB = B^2
* C = x_1 + z_1, CB = C * B
* D = x_1 - z_1, DA = D * A
* E = AA - BB
*/
square(add(x0, z0, A), AA);
square(sub(x0, z0, B), BB);
mul(add(x1, z1, C), B, CB);
mul(sub(x1, z1, D), A, DA);
sub(AA, BB, E);
/*
* x_0 = AA * BB
* x_1 = (DA + CB)^2
*/
mul(AA, BB, x0);
square(add(DA, CB, x1), x1);
/*
* z_0 = E * (AA + a24 * E) where a24 = (486662 - 2) / 4 = 121665
* z_1 = (DA - CB)^2 * u
*/
add(AA, mul(a24, E, z0), z0);
mul(E, z0, z0);
square(sub(DA, CB, z1), z1);
mul(u, z1, z1);
}
cswap(x0, x1, swap);
cswap(z0, z1, swap);
return mul(x0, inverse_modp(z0, x), x);
}
/*
* Generate a public/private key pair for x25519 key exchange.
*
* The secret key is generated with getrandom(2) and clamped/masked according
* to RFC7748. The public key is derived from the secret key by elliptic curve
* point multiplication with the generator point G = { 9 } i.e. the Montgomery
* curve point u=9.
*/
int x25519_keygen(uint8_t secret[32], uint8_t public[32])
{
uint32_t k[8];
const uint32_t G[8] = { 9, 0, 0, 0, 0, 0, 0, 0 };
if (getrandom(k, sizeof(k), 0) != sizeof(k))
return 0;
clamp_scalar(k);
export(k, secret);
monty(k, G, k);
export(k, public);
return 1;
}
/*
* Compute a public key from secret.
*/
void x25519_secret_to_public(const uint8_t secret[32], uint8_t public[32])
{
uint32_t k[8];
const uint32_t G[8] = { 9, 0, 0, 0, 0, 0, 0, 0 };
import(secret, k);
clamp_scalar(k);
monty(k, G, k);
export(k, public);
}
/*
* X25519 Elliptic-curve Diffie-Hellman (ECDH).
*
* x25519(k, u) <=> U(kP) = x25519(k, U(P)) where P = (u, v) and U(P) = u.
*
* K_a = clamp_scalar(urandom(32)) Alice's private key
* P_a = x25519(K_a, G) = K_a x G = K_a x 9 Alice's public key
* K_b = clamp_scalar(urandom(32)) Bob's private key
* P_b = x25519(K_b, G) = K_b x G = K_b x 9 Bob's public key
*
* Elliptic-curve Diffie-Hellman produces a shared key for Alice & Bob:
*
* K = x25519(K_a, P_b) = x25519(K_b, P_a) Alice's & Bob's shared key
*
* Proof of correctness:
* (A) x25519(K_a, P_b) = x25519(K_a, x25519(K_b, 9)) = K_a x K_b x 9 = K
* (B) x25519(K_b, P_a) = x25519(K_b, x25519(K_a, 9)) = K_b x K_a x 9 = K
* => Alice and Bob derive identical shared key K.
*
* The return value is non-zero on success. An elliptic curve point with small
* order causes the function return to zero and out buffer receives only zeros.
*/
int x25519(const uint8_t secret[32], const uint8_t public[32], uint8_t out[32])
{
uint32_t k[8], u[8];
import(secret, k);
import(public, u);
clamp_scalar(k);
clamp_coordinate(u);
monty(k, u, k);
export(k, out);
return (k[0] | k[1] | k[2] | k[3] | k[4] | k[5] | k[6] | k[7]) != 0;
}
/*
* RFC8032 Edwards-Curve Digital Signature Algorithm (EdDSA).
* Twisted Edwards curve -x² + y² = (121665/121666)x²y² over GF(p).
*/
/* Base point */
static const uint32_t ed25519_B[32] = {
/* g_x = recover_x(g_y, 0) */
0x8f25d51a, 0xc9562d60, 0x9525a7b2, 0x692cc760,
0xfdd6dc5c, 0xc0a4e231, 0xcd6e53fe, 0x216936d3,
/* g_y = 4 * inverse_modp(5) % p */
0x66666658, 0x66666666, 0x66666666, 0x66666666,
0x66666666, 0x66666666, 0x66666666, 0x66666666,
/* g_z = 1 */
0x00000001, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
/* g_t = g_x * g_y % p */
0xa5b7dda3, 0x6dde8ab3, 0x775152f5, 0x20f09f80,
0x64abe37d, 0x66ea4e8e, 0xd78b7665, 0x67875f0f
};
/* Group order q = 2^252 + 27742317777372353535851937790883648493 */
static const uint32_t ed25519_q[8] = {
0x5cf5d3ed, 0x5812631a, 0xa2f79cd6, 0x14def9de,
0x00000000, 0x00000000, 0x00000000, 0x10000000,
};
/* Reduce 512-bit x modulo q */
static uint32_t *modq(const uint32_t x[16], uint32_t reduced[8])
{
/*
* Barrett reduction.
*
* First, choose k = 256 such that 2^k > q. Second, set r = floor(4^k / q).
* Now x - q * floor(x*r / 4^k) < 2*q and we obtain x (mod q) after a
* conditional subtraction.
*/
static const uint32_t r[9] = {
0x0a2c131b, 0xed9ce5a3, 0x086329a7, 0x2106215d,
0xffffffeb, 0xffffffff, 0xffffffff, 0xffffffff,
0xf
};
int i, j;
uint64_t c;
uint32_t mask, xr[25] = {0};
/* xr = x * r */
for (i = 0; i < 9; i++) {
c = 0;
for (j = 0; j < 16; j++) {
c += (uint64_t)r[i] * x[j] + xr[i+j];
xr[i+j] = c;
c >>= 32;
}
xr[i+j] = c;
}
/* xr = q * floor(xr / 4^k) = q * floor(xr / 2^512) */
for (i = 0; i < 8; i++) xr[i] = 0;
for (i = 0; i < 8; i++) {
c = 0;
for (j = 0; i+j < 8; j++) {
c = (c >> 32) + xr[i+j] + (uint64_t)xr[i+16] * ed25519_q[j];
xr[i+j] = c;
}
}
/* xr = x - xr */
c = 1;
for (i = 0; i < 8; i++) {
xr[i] = (c = c + x[i] + ~xr[i]);
c >>= 32;
}
/*
* reduced = xr if 0 <= xr < q
* xr - q if q <= xr < 2*q
*/
c = 1;
for (i = 0; i < 8; i++) c = (c + xr[i] + ~ed25519_q[i]) >> 32;
mask = -c;
for (i = 0; i < 8; i++) {
c += (uint64_t)xr[i] + (~ed25519_q[i] & mask);
reduced[i] = c;
c >>= 32;
}
for (i = 0; i < 25; i++) xr[i] = 0;
return reduced;
}
/* Variable-time */
static int cmp_vartime(const uint32_t a[8], const uint32_t b[8])
{
int i;
for (i = 0; i < 8; i++) {
const int j = 8 - (i+1);
if (a[j] < b[j]) return -1;
if (a[j] > b[j]) return 1;
}
return 0;
}
/* Variable-time */
static uint32_t *recover_x(const uint32_t y[8], int sign, uint32_t x[8])
{
int i;
uint32_t yy[8], u[8], v[8], uv3[8], uv7[8], b[8], bb[8];
const uint32_t zero[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
const uint32_t one[8] = { 1, 0, 0, 0, 0, 0, 0, 0 };
const uint32_t p[8] = {
0xffffffed, 0xffffffff, 0xffffffff, 0xffffffff,
0xffffffff, 0xffffffff, 0xffffffff, 0x7fffffff
};
const uint32_t d[8] = {
0x135978a3, 0x75eb4dca, 0x4141d8ab, 0x00700a4d,
0x7779e898, 0x8cc74079, 0x2b6ffe73, 0x52036cee
};
const uint32_t sqrti[8] = { /* sqrt of -1, i.e., pow(2, (p-1) / 4) % p */
0x4a0ea0b0, 0xc4ee1b27, 0xad2fe478, 0x2f431806,
0x3dfbd7a7, 0x2b4d0099, 0x4fc1df0b, 0x2b832480
};
/* if (y >= p) return 0; */
if (cmp_vartime(y, p) >= 0)
return (void *)0;
/*
* For an encoded point R = (x,y), recover x = ±sqrt((y² - 1) / (dy² + 1)).
* Prime p = 2^255 - 19 (modulo 8) ≡ 5 so α² = β⁴ (i.e., ±α = β²) for any
* square α ∈ F(p) such that β = α^((p+3)/8).
*
* (1) x = ±sqrt(u/v) = ±sqrt((y² - 1) / (dy² + 1))
* (2) β = (u/v)^((p+3)/8) = ··· = uv³(uv⁷)^((p-5)/8)
*/
sub(square(y, yy), one, u);
add(mul(d, yy, v), one, v);
/* If u is zero return a zero coordinate (or error if a non-zero sign) */
if (cmp_vartime(modp(u, x), zero) == 0)
return sign ? (void *)0 : x;
/* β = (u/v)^(p+3)/8 = ··· = uv³(uv⁷)^((p-5)/8) */
mul(u, mul(v, square(v, b), b), uv3); /* b = v³, uv3 = uv³ */
mul(u, mul(v, square(b, b), b), uv7); /* b = v⁷, uv7 = uv⁷ */
mul(uv3, uv7, b);
square(uv7, bb);
for (i = 0; i < 250; i++) mul(b, square(bb, bb), b);
/*
* If β² = -α (that is, vβ² = -u) then multiplication with sqrt(-1) yields
* the recovered x.
*/
sub(mul(v, square(b, bb), yy), u, yy);
if (cmp_vartime(modp(yy, yy), zero) != 0) {
mul(b, sqrti, b);
sub(mul(v, square(b, bb), yy), u, yy);
if (cmp_vartime(modp(yy, yy), zero) != 0)
return (void *)0; /* Input was not a square */
}
/* return (p - b) if (b & 1) != sign else b */
if (((int)b[0] & 1) != sign)
sub(p, b, b);
return modp(b, x);
}
/* Derive a scalar and a message prefix from a 32-byte secret */
static void secret_expand(const uint8_t secret[32],
uint32_t scalar[8], uint8_t prefix[32])
{
int i;
uint8_t hash[64];
struct sha512 ctx;
sha512_init(&ctx);
sha512_update(&ctx, secret, 32);
sha512_final(&ctx, hash);
import(hash, scalar);
clamp_scalar(scalar);
for (i = 0; i < 32; i++) prefix[i] = hash[32+i];
}
/* Finalize SHA-512 computation and return the hash as an integer modulo q */
static void sha512_final_modq(struct sha512 *ctx, uint32_t x[8])
{
union {
uint8_t u8[64];
uint32_t u32[16];
} hash;
sha512_final(ctx, hash.u8);
import(hash.u8, hash.u32);
import(&hash.u8[32], &hash.u32[8]);
modq(hash.u32, x);
}
/* r = ((x * y) + z) % q */
static uint32_t *mul_add_modq(const uint32_t x[8], const uint32_t y[8],
const uint32_t z[8], uint32_t r[8])
{
int i, j;
uint64_t c;
uint32_t xy[16] = {0};
for (i = 0; i < 8; i++) xy[i] = z[i];
for (i = 0; i < 8; i++) {
c = 0;
for (j = 0; j < 8; j++) {
xy[i+j] = (c += (uint64_t)x[i] * y[j] + xy[i+j]);
c >>= 32;
}
xy[i+j] = c;
}
return modq(xy, r);
}
/* Encode point (X:Y:Z:T) as a 255-bit y-coordinate with a sign bit */
static void point_encode(const uint32_t P[32], uint8_t buf[32])
{
uint32_t x[8], y[8], zinv[8];
inverse_modp(P + 16, zinv);
mul(zinv, P, x);
mul(zinv, P + 8, y);
y[7] |= (x[0] & 1) << 31;
export(y, buf);
}
/* Decode point (X:Y:Z:T) from 256 bits encoding the x coordinate and sign */
static uint32_t *point_decode(const uint8_t buf[32], uint32_t P[32])
{
int sign;
uint32_t *Px = P;
uint32_t *Py = P + 8;
uint32_t *Pz = P + 16;
uint32_t *Pt = P + 24;
import(buf, Py);
sign = Py[7] >> 31;
Py[7] &= 0x7fffffff;
if (!recover_x(Py, sign, Px))
return (void *)0;
Pz[0] = 1;
Pz[1] = Pz[2] = Pz[3] = Pz[4] = Pz[5] = Pz[6] = Pz[7] = 0;
mul(Px, Py, Pt);
return P;
}
/* P == Q */
static int point_equal(const uint32_t P[32], const uint32_t Q[32])
{
int i;
uint32_t A[8], B[8], sum = 0;
const uint32_t *Px = P;
const uint32_t *Py = P + 8;
const uint32_t *Pz = P + 16;
const uint32_t *Qx = Q;
const uint32_t *Qy = Q + 8;
const uint32_t *Qz = Q + 16;
/* P.x / P.z == Q.x / Q.z <=> P.x * Q.z == Q.x * P.z */
mul(Px, Qz, A);
mul(Qx, Pz, B);
for (i = 0; i < 8; i++) sum |= A[i] ^ B[i];
/* P.y / P.z == Q.y / Q.z <=> P.y * Q.z == Q.y * P.z */
mul(Py, Qz, A);
mul(Qy, Pz, B);
for (i = 0; i < 8; i++) sum |= A[i] ^ B[i];
return sum == 0;
}
/* R = P + Q */
static uint32_t *point_add(const uint32_t P[32], const uint32_t Q[32],
uint32_t R[32])
{
uint32_t A[8], B[8], C[8], D[8], E[8], F[8], G[8], H[8];
uint32_t *Rx = R;
uint32_t *Ry = R + 8;
uint32_t *Rz = R + 16;
uint32_t *Rt = R + 24;
const uint32_t *Px = P;
const uint32_t *Py = P + 8;
const uint32_t *Pz = P + 16;
const uint32_t *Pt = P + 24;
const uint32_t *Qx = Q;
const uint32_t *Qy = Q + 8;
const uint32_t *Qz = Q + 16;
const uint32_t *Qt = Q + 24;
const uint32_t d2[8] = {
0x26b2f159, 0xebd69b94, 0x8283b156, 0x00e0149a,
0xeef3d130, 0x198e80f2, 0x56dffce7, 0x2406d9dc
};
/* A = (P[1]-P[0]) * (Q[1]-Q[0]) % p */
sub(Py, Px, A);
sub(Qy, Qx, C);
mul(A, C, A);
/* B = (P[1]+P[0]) * (Q[1]+Q[0]) % p */
add(Py, Px, B);
add(Qy, Qx, C);
mul(B, C, B);
/* C = 2 * P[3] * Q[3] * d % p */
mul(Pt, Qt, C);
mul(C, d2, C);
/* D = 2 * P[2] * Q[2] % p */
mul(Pz, Qz, D);
add(D, D, D);
modp(D, D);
/* E = B - A */
sub(B, A, E);
/* F = D - C */
sub(D, C, F);
/* G = D + C */
add(D, C, G);
/* H = B + A */
add(B, A, H);
/* R = (E*F, G*H, F*G, E*H) */
mul(E, F, Rx);
mul(G, H, Ry);
mul(F, G, Rz);
mul(E, H, Rt);
return R;
}
/* Q = s * P */
static uint32_t *point_mul(const uint32_t s[8], const uint32_t P[32],
uint32_t Q[32])
{
int i;
uint32_t sP[32];
for (i = 0; i < 32; i++) {
sP[i] = P[i];
Q[i] = 0;
}
Q[8] = Q[16] = 1;
for (i = 0; i < 256; i++) {
uint32_t R[32];
const int swap = (s[i / 32] >> (i % 32)) & 1;
point_add(Q, sP, R);
cswap(Q, R, swap);
cswap(Q + 8, R + 8, swap);
cswap(Q + 16, R + 16, swap);
cswap(Q + 24, R + 24, swap);
point_add(sP, sP, sP);
}
return Q;
}
/* Compute a public key from secret */
void ed25519_secret_to_public(const uint8_t secret[32], uint8_t public[32])
{
uint8_t prefix[32];
uint32_t a[8], aB[32];
secret_expand(secret, a, prefix);
point_mul(a, ed25519_B, aB);
point_encode(aB, public);
}
/* Constant-time EdDSA signature generation algorithm */
void ed25519_sign(const uint8_t secret[32], const void *msg, size_t len,
uint8_t signature[64])
{
struct sha512 ctx;
uint32_t a[8], h[8], r[8], P[32];
secret_expand(secret, a, signature);
sha512_init(&ctx);
sha512_update(&ctx, signature, 32);
sha512_update(&ctx, msg, len);
sha512_final_modq(&ctx, r);
point_encode(point_mul(r, ed25519_B, P), signature);
point_encode(point_mul(a, ed25519_B, P), signature + 32);
sha512_init(&ctx);
sha512_update(&ctx, signature, 64);
sha512_update(&ctx, msg, len);
sha512_final_modq(&ctx, h);
export(mul_add_modq(h, a, r, r), signature + 32);
}
/* Variable-time EdDSA signature verification algorithm */
int ed25519_verify(const uint8_t public[32], const void *msg, size_t len,
const uint8_t signature[64])
{
struct sha512 ctx;
uint32_t A[32], R[32], s[8], h[8], sB[32], hA[32];
if (!point_decode(public, A))
return 0;
if (!point_decode(signature, R))
return 0;
import(signature + 32, s);
if (cmp_vartime(s, ed25519_q) >= 0)
return 0;
sha512_init(&ctx);
sha512_update(&ctx, signature, 32);
sha512_update(&ctx, public, 32);
sha512_update(&ctx, msg, len);
sha512_final_modq(&ctx, h);
point_mul(s, ed25519_B, sB);
point_mul(h, A, hA);
return point_equal(sB, point_add(R, hA, hA));
}
#include "sha512.h"
#define ROR64(x, c) (((x) >> (c)) | ((x) << (64 - (c))))
#define LOAD64_BE(p) \
( ((uint64_t)((p)[7]) << 0) \
| ((uint64_t)((p)[6]) << 8) \
| ((uint64_t)((p)[5]) << 16) \
| ((uint64_t)((p)[4]) << 24) \
| ((uint64_t)((p)[3]) << 32) \
| ((uint64_t)((p)[2]) << 40) \
| ((uint64_t)((p)[1]) << 48) \
| ((uint64_t)((p)[0]) << 56) \
)
#define STORE64_BE(p, v) \
(p)[7] = ((v) >> 0) & 0xFF; \
(p)[6] = ((v) >> 8) & 0xFF; \
(p)[5] = ((v) >> 16) & 0xFF; \
(p)[4] = ((v) >> 24) & 0xFF; \
(p)[3] = ((v) >> 32) & 0xFF; \
(p)[2] = ((v) >> 40) & 0xFF; \
(p)[1] = ((v) >> 48) & 0xFF; \
(p)[0] = ((v) >> 56) & 0xFF;
void sha512_init(struct sha512 *ctx)
{
ctx->state[0] = 0x6a09e667f3bcc908; /* sqrt(2) */
ctx->state[1] = 0xbb67ae8584caa73b; /* sqrt(3) */
ctx->state[2] = 0x3c6ef372fe94f82b; /* sqrt(5) */
ctx->state[3] = 0xa54ff53a5f1d36f1; /* sqrt(7) */
ctx->state[4] = 0x510e527fade682d1; /* sqrt(11) */
ctx->state[5] = 0x9b05688c2b3e6c1f; /* sqrt(13) */
ctx->state[6] = 0x1f83d9abfb41bd6b; /* sqrt(17) */
ctx->state[7] = 0x5be0cd19137e2179; /* sqrt(19) */
ctx->bits[0] = ctx->bits[1] = 0;
ctx->n = 0;
}
static void sha512_block(uint64_t state[8], const uint8_t p[128])
{
uint64_t a, b, c, d, e, f, g, h, s0, s1, S0, S1, t1, t2, w[16];
static const uint64_t K512[80] = {
0x428a2f98d728ae22, 0x7137449123ef65cd,
0xb5c0fbcfec4d3b2f, 0xe9b5dba58189dbbc,
0x3956c25bf348b538, 0x59f111f1b605d019,
0x923f82a4af194f9b, 0xab1c5ed5da6d8118,
0xd807aa98a3030242, 0x12835b0145706fbe,
0x243185be4ee4b28c, 0x550c7dc3d5ffb4e2,
0x72be5d74f27b896f, 0x80deb1fe3b1696b1,
0x9bdc06a725c71235, 0xc19bf174cf692694,
0xe49b69c19ef14ad2, 0xefbe4786384f25e3,
0x0fc19dc68b8cd5b5, 0x240ca1cc77ac9c65,
0x2de92c6f592b0275, 0x4a7484aa6ea6e483,
0x5cb0a9dcbd41fbd4, 0x76f988da831153b5,
0x983e5152ee66dfab, 0xa831c66d2db43210,
0xb00327c898fb213f, 0xbf597fc7beef0ee4,
0xc6e00bf33da88fc2, 0xd5a79147930aa725,
0x06ca6351e003826f, 0x142929670a0e6e70,
0x27b70a8546d22ffc, 0x2e1b21385c26c926,
0x4d2c6dfc5ac42aed, 0x53380d139d95b3df,
0x650a73548baf63de, 0x766a0abb3c77b2a8,
0x81c2c92e47edaee6, 0x92722c851482353b,
0xa2bfe8a14cf10364, 0xa81a664bbc423001,
0xc24b8b70d0f89791, 0xc76c51a30654be30,
0xd192e819d6ef5218, 0xd69906245565a910,
0xf40e35855771202a, 0x106aa07032bbd1b8,
0x19a4c116b8d2d0c8, 0x1e376c085141ab53,
0x2748774cdf8eeb99, 0x34b0bcb5e19b48a8,
0x391c0cb3c5c95a63, 0x4ed8aa4ae3418acb,
0x5b9cca4f7763e373, 0x682e6ff3d6b2b8a3,
0x748f82ee5defb2fc, 0x78a5636f43172f60,
0x84c87814a1f0ab72, 0x8cc702081a6439ec,
0x90befffa23631e28, 0xa4506cebde82bde9,
0xbef9a3f7b2c67915, 0xc67178f2e372532b,
0xca273eceea26619c, 0xd186b8c721c0c207,
0xeada7dd6cde0eb1e, 0xf57d4f7fee6ed178,
0x06f067aa72176fba, 0x0a637dc5a2c898a6,
0x113f9804bef90dae, 0x1b710b35131c471b,
0x28db77f523047d84, 0x32caab7b40c72493,
0x3c9ebe0a15c9bebc, 0x431d67c49c100d4c,
0x4cc5d4becb3e42b6, 0x597f299cfc657e2a,
0x5fcb6fab3ad6faec, 0x6c44198c4a475817
};
a = state[0]; b = state[1]; c = state[2]; d = state[3];
e = state[4]; f = state[5]; g = state[6]; h = state[7];
#define ROUND_CORE(i) \
S0 = ROR64(a, 28) ^ ROR64(a, 34) ^ ROR64(a, 39); \
S1 = ROR64(e, 14) ^ ROR64(e, 18) ^ ROR64(e, 41); \
t1 = h + S1 + ((e & f) ^ (~e & g)) + K512[i] + w[i & 0xf]; \
t2 = S0 + ((a & b) ^ (a & c) ^ (b & c)); \
h = g; g = f; f = e; e = d + t1; \
d = c; c = b; b = a; a = t1 + t2;
#define ROUND_0_15(i) w[i] = LOAD64_BE(p); p += 8; ROUND_CORE(i)
ROUND_0_15( 0) ROUND_0_15( 1) ROUND_0_15( 2) ROUND_0_15( 3)
ROUND_0_15( 4) ROUND_0_15( 5) ROUND_0_15( 6) ROUND_0_15( 7)
ROUND_0_15( 8) ROUND_0_15( 9) ROUND_0_15(10) ROUND_0_15(11)
ROUND_0_15(12) ROUND_0_15(13) ROUND_0_15(14) ROUND_0_15(15)
#undef ROUND_0_15
#define ROUND_16_79(i) \
s0 = w[(i + 1) & 0xf]; s1 = w[(i + 14) & 0xf]; \
s0 = ROR64(s0, 1) ^ ROR64(s0, 8) ^ (s0 >> 7); \
s1 = ROR64(s1, 19) ^ ROR64(s1, 61) ^ (s1 >> 6); \
w[i & 0xf] += s0 + s1 + w[(i + 9) & 0xf]; ROUND_CORE(i)
ROUND_16_79(16) ROUND_16_79(17) ROUND_16_79(18) ROUND_16_79(19)
ROUND_16_79(20) ROUND_16_79(21) ROUND_16_79(22) ROUND_16_79(23)
ROUND_16_79(24) ROUND_16_79(25) ROUND_16_79(26) ROUND_16_79(27)
ROUND_16_79(28) ROUND_16_79(29) ROUND_16_79(30) ROUND_16_79(31)
ROUND_16_79(32) ROUND_16_79(33) ROUND_16_79(34) ROUND_16_79(35)
ROUND_16_79(36) ROUND_16_79(37) ROUND_16_79(38) ROUND_16_79(39)
ROUND_16_79(40) ROUND_16_79(41) ROUND_16_79(42) ROUND_16_79(43)
ROUND_16_79(44) ROUND_16_79(45) ROUND_16_79(46) ROUND_16_79(47)
ROUND_16_79(48) ROUND_16_79(49) ROUND_16_79(50) ROUND_16_79(51)
ROUND_16_79(52) ROUND_16_79(53) ROUND_16_79(54) ROUND_16_79(55)
ROUND_16_79(56) ROUND_16_79(57) ROUND_16_79(58) ROUND_16_79(59)
ROUND_16_79(60) ROUND_16_79(61) ROUND_16_79(62) ROUND_16_79(63)
ROUND_16_79(64) ROUND_16_79(65) ROUND_16_79(66) ROUND_16_79(67)
ROUND_16_79(68) ROUND_16_79(69) ROUND_16_79(70) ROUND_16_79(71)
ROUND_16_79(72) ROUND_16_79(73) ROUND_16_79(74) ROUND_16_79(75)
ROUND_16_79(76) ROUND_16_79(77) ROUND_16_79(78) ROUND_16_79(79)
#undef ROUND_16_79
#undef ROUND_CORE
state[0] += a; state[1] += b; state[2] += c; state[3] += d;
state[4] += e; state[5] += f; state[6] += g; state[7] += h;
}
void sha512_update(struct sha512 *ctx, const void *data, size_t n)
{
const uint8_t *input = (const uint8_t *)data;
if (ctx->n || n < 128) {
while (n && ctx->n < 128) {
ctx->buffer[ctx->n++] = *input++;
n--;
}
if (ctx->n < 128)
return;
sha512_block(ctx->state, ctx->buffer);
ctx->bits[1] += !(ctx->bits[0] += 1024);
ctx->n = 0;
}
while (n >= 128) {
sha512_block(ctx->state, input);
ctx->bits[1] += !(ctx->bits[0] += 1024);
input += 128;
n -= 128;
}
while (n) {
ctx->buffer[ctx->n++] = *input++;
n--;
}
}
void sha512_final(struct sha512 *ctx, uint8_t hash[64])
{
int i;
uint8_t buf[256];
ctx->bits[0] += ctx->n * 8;
for (i = 1, buf[0] = 0x80; (ctx->n + i + 16) % 128; buf[i++] = 0);
STORE64_BE(buf + i, ctx->bits[1]);
STORE64_BE(buf + i + 8, ctx->bits[0]);
sha512_update(ctx, buf, i + 16);
STORE64_BE(hash + 0, ctx->state[0]);
STORE64_BE(hash + 8, ctx->state[1]);
STORE64_BE(hash + 16, ctx->state[2]);
STORE64_BE(hash + 24, ctx->state[3]);
STORE64_BE(hash + 32, ctx->state[4]);
STORE64_BE(hash + 40, ctx->state[5]);
STORE64_BE(hash + 48, ctx->state[6]);
STORE64_BE(hash + 56, ctx->state[7]);
}
void sha512(const void *data, size_t n, uint8_t hash[64])
{
struct sha512 ctx;
sha512_init(&ctx);
sha512_update(&ctx, data, n);
sha512_final(&ctx, hash);
}
#ifndef SHA512_H
#define SHA512_H
#include <stddef.h>
#include <stdint.h>
struct sha512 {
uint64_t state[8];
uint8_t buffer[128];
uint64_t bits[2];
int n;
};
void sha512_init(struct sha512 *ctx);
void sha512_update(struct sha512 *ctx, const void *data, size_t n);
void sha512_final(struct sha512 *ctx, uint8_t hash[64]);
void sha512(const void *data, size_t n, uint8_t hash[64]);
#endif
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment