Skip to content

Instantly share code, notes, and snippets.

@bellbind
Last active July 22, 2017 18:27
Show Gist options
  • Save bellbind/2fd66a09340942b9845ef432a5c8c419 to your computer and use it in GitHub Desktop.
Save bellbind/2fd66a09340942b9845ef432a5c8c419 to your computer and use it in GitHub Desktop.
[c]sha256 implementation
# [build with cmake]
# $ mkdir build
# $ cd build
# $ cmake ..
# $ make
#
# [install and uninstall]
# $ make install
# $ xargs rm < install_manifest.txt
#
# [FYI: install to working dir: $(pwd)/tmp/usr/local/... ]
# $ make DESTDIR=$(pwd)/tmp install
# [FYI: set prefix: /usr ]
# $ cmake -DCMAKE_INSTALL_PREFIX=/usr ..
cmake_minimum_required(VERSION 3.0)
project(sha256)
set(SEMVER 1.0.0)
enable_language(C)
set(CMAKE_C_FLAGS "-std=c11 -pedantic -Wall -Wextra -O3 -funroll-loops")
add_executable(sha256l sha256main.c sha256.c)
install(TARGETS sha256l DESTINATION bin)
add_executable(sha256e sha256main.c sha256extracted.c)
install(TARGETS sha256e DESTINATION bin)
add_executable(sha256g sha256main.c sha256gen.c)
install(TARGETS sha256g DESTINATION bin)
add_library(sha256l-so SHARED sha256.c)
set_target_properties(sha256l-so PROPERTIES OUTPUT_NAME sha256l SOVERSION ${SEMVER})
install(TARGETS sha256l-so DESTINATION lib)
add_library(sha256e-so SHARED sha256extracted.c)
set_target_properties(sha256e-so PROPERTIES OUTPUT_NAME sha256e SOVERSION ${SEMVER})
install(TARGETS sha256e-so DESTINATION lib)
add_library(sha256g-so SHARED sha256gen.c)
set_target_properties(sha256g-so PROPERTIES OUTPUT_NAME sha256g SOVERSION ${SEMVER})
install(TARGETS sha256g-so DESTINATION lib)
add_library(sha256l-a STATIC sha256.c)
set_target_properties(sha256l-a PROPERTIES OUTPUT_NAME sha256l)
install(TARGETS sha256l-a DESTINATION lib)
add_library(sha256e-a STATIC sha256extracted.c)
set_target_properties(sha256e-a PROPERTIES OUTPUT_NAME sha256e)
install(TARGETS sha256e-a DESTINATION lib)
add_library(sha256g-a STATIC sha256gen.c)
set_target_properties(sha256g-a PROPERTIES OUTPUT_NAME sha256g)
install(TARGETS sha256g-a DESTINATION lib)
install(FILES sha256.h DESTINATION include)
#include <stdlib.h>
#include <string.h>
#include "sha256.h"
// see: https://en.wikipedia.org/wiki/SHA-2#Pseudocode
static const uint32_t K[] = {
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
};
static const uint32_t H[] = {
0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a,
0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19
};
static inline uint32_t big32(uint32_t v) {
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
return v;
#else
union {
struct {uint8_t b0, b1, b2, b3;};
uint32_t v;
} le = {.v = v};
union {
struct {uint8_t b3, b2, b1, b0;};
uint32_t v;
} be = {.b0 = le.b0, .b1 = le.b1, .b2 = le.b2, .b3 = le.b3};
return be.v;
#endif
}
static inline uint64_t big64(uint64_t v) {
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
return v;
#else
union {
struct {uint8_t b0, b1, b2, b3, b4, b5, b6, b7;};
uint64_t v;
} le = {.v = v};
union {
struct {uint8_t b7, b6, b5, b4, b3, b2, b1, b0;};
uint64_t v;
} be = {
.b0 = le.b0, .b1 = le.b1, .b2 = le.b2, .b3 = le.b3,
.b4 = le.b4, .b5 = le.b5, .b6 = le.b6, .b7 = le.b7,
};
return be.v;
#endif
}
static inline uint32_t rotr(uint32_t v, size_t n) {
return (v >> n) | (v << (32 - n));
}
static void update(struct sha256* self);
static void tail(struct sha256* self);
extern void sha256_init(struct sha256* self) {
memcpy(self->h, H, sizeof H);
self->bytes = 0;
self->chunk_size = 0;
}
extern void sha256_update(struct sha256* self, const void* buf, size_t bytes) {
if (self->chunk_size < 0) return;
const char* cur = buf;
unsigned char* chunk = self->chunk;
size_t chunk_size = self->chunk_size;
self->bytes += bytes;
const char* end = &cur[bytes];
for (const char* next = &cur[64 - chunk_size]; next <= end; next += 64) {
memcpy(&chunk[chunk_size], cur, next - cur);
update(self);
cur = next;
chunk_size = 0;
}
size_t rest = end - cur;
memcpy(&chunk[chunk_size], cur, rest);
self->chunk_size = chunk_size + rest;
}
extern void sha256_digest(struct sha256* self, void* digest) {
if (self->chunk_size >= 0) {
tail(self);
self->chunk_size = -1;
}
uint32_t* buf = (uint32_t*) digest;
for (int i = 0; i < 8; i++) {
buf[i] = big32(self->h[i]);
}
}
static void update(struct sha256* self) {
uint32_t* sh = self->h;
uint32_t* cd = (uint32_t*) self->chunk;
uint32_t w[64];
#pragma unroll
for (size_t i = 0; i < 16; i++) {
w[i] = big32(cd[i]);
}
#pragma unroll
for (size_t i = 16; i < 64; i++) {
uint32_t w16 = w[i - 16], w15 = w[i - 15], w7 = w[i - 7], w2 = w[i - 2];
uint32_t s0 = rotr(w15, 7) ^ rotr(w15, 18) ^ (w15 >> 3);
uint32_t s1 = rotr(w2, 17) ^ rotr(w2, 19) ^ (w2 >> 10);
w[i] = w16 + s0 + w7 + s1;
}
uint32_t a = sh[0], b = sh[1], c = sh[2], d = sh[3],
e = sh[4], f = sh[5], g = sh[6], h = sh[7];
#pragma unroll
for (size_t i = 0; i < 64; i++) {
uint32_t s0 = rotr(a, 2) ^ rotr(a, 13) ^ rotr(a, 22);
uint32_t s1 = rotr(e, 6) ^ rotr(e, 11) ^ rotr(e, 25);
uint32_t ch = (e & f) ^ (~e & g);
uint32_t maj = (a & b) ^ (a & c) ^ (b & c);
uint32_t t1 = h + s1 + ch + K[i] + w[i];
uint32_t t2 = s0 + maj;
h = g; g = f; f = e; e = d + t1;
d = c; c = b; b = a; a = t1 + t2;
}
sh[0] += a; sh[1] += b; sh[2] += c; sh[3] += d;
sh[4] += e; sh[5] += f; sh[6] += g; sh[7] += h;
}
static void tail(struct sha256* self) {
uint64_t bits = big64(self->bytes * 8);
self->chunk[self->chunk_size] = 0x80;
memset(&self->chunk[self->chunk_size + 1], 0, 63 - self->chunk_size);
if (self->chunk_size + 1 + sizeof bits > 64) {
update(self);
memset(self->chunk, 0, 64 - sizeof bits);
}
memcpy(&self->chunk[64 - sizeof bits], &bits, sizeof bits);
update(self);
}
#ifndef __SHA256_H__
#define __SHA256_H__
#include <stdint.h>
#define SHA256_HASH_BYTES 32
struct sha256 {
uint32_t h[8];
uint64_t bytes;
unsigned char chunk[64];
long chunk_size;
};
extern void sha256_init(struct sha256* self);
extern void sha256_update(struct sha256* self, const void* buf, size_t bytes);
extern void sha256_digest(struct sha256* self, void* digest);
#endif
#include <stdlib.h>
#include <string.h>
#include "sha256.h"
// see: https://en.wikipedia.org/wiki/SHA-2#Pseudocode
static const uint32_t K[] = {
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
};
static const uint32_t H[] = {
0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a,
0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19
};
static inline uint32_t big32(uint32_t v) {
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
return v;
#else
union {
struct {uint8_t b0, b1, b2, b3;};
uint32_t v;
} le = {.v = v};
union {
struct {uint8_t b3, b2, b1, b0;};
uint32_t v;
} be = {.b0 = le.b0, .b1 = le.b1, .b2 = le.b2, .b3 = le.b3};
return be.v;
#endif
}
static inline uint64_t big64(uint64_t v) {
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
return v;
#else
union {
struct {uint8_t b0, b1, b2, b3, b4, b5, b6, b7;};
uint64_t v;
} le = {.v = v};
union {
struct {uint8_t b7, b6, b5, b4, b3, b2, b1, b0;};
uint64_t v;
} be = {
.b0 = le.b0, .b1 = le.b1, .b2 = le.b2, .b3 = le.b3,
.b4 = le.b4, .b5 = le.b5, .b6 = le.b6, .b7 = le.b7,
};
return be.v;
#endif
}
static inline uint32_t rotr(uint32_t v, size_t n) {
return (v >> n) | (v << (32 - n));
}
static void update(struct sha256* self);
static void tail(struct sha256* self);
extern void sha256_init(struct sha256* self) {
memcpy(self->h, H, sizeof H);
self->bytes = 0;
self->chunk_size = 0;
}
extern void sha256_update(struct sha256* self, const void* buf, size_t bytes) {
if (self->chunk_size < 0) return;
const char* cur = buf;
unsigned char* chunk = self->chunk;
size_t chunk_size = self->chunk_size;
self->bytes += bytes;
const char* end = &cur[bytes];
for (const char* next = &cur[64 - chunk_size]; next <= end; next += 64) {
memcpy(&chunk[chunk_size], cur, next - cur);
update(self);
cur = next;
chunk_size = 0;
}
size_t rest = end - cur;
memcpy(&chunk[chunk_size], cur, rest);
self->chunk_size = chunk_size + rest;
}
extern void sha256_digest(struct sha256* self, void* digest) {
if (self->chunk_size >= 0) {
tail(self);
self->chunk_size = -1;
}
uint32_t* buf = (uint32_t*) digest;
for (int i = 0; i < 8; i++) {
buf[i] = big32(self->h[i]);
}
}
static inline uint32_t s0(uint32_t a) {
return rotr(a, 2) ^ rotr(a, 13) ^ rotr(a, 22);
}
static inline uint32_t s1(uint32_t e) {
return rotr(e, 6) ^ rotr(e, 11) ^ rotr(e, 25);
}
static inline uint32_t ch(uint32_t e, uint32_t f, uint32_t g) {
return (e & f) ^ (~e & g);
}
static inline uint32_t maj(uint32_t a, uint32_t b, uint32_t c) {
return (a & b) ^ (a & c) ^ (b & c);
}
static inline uint32_t
t1(uint32_t e, uint32_t f, uint32_t g, uint32_t w, uint32_t k) {
return s1(e) + ch(e, f, g) + k + w;
}
static inline uint32_t t2(uint32_t a, uint32_t b, uint32_t c) {
return s0(a) + maj(a, b, c);
}
static inline void
step(uint32_t a, uint32_t b, uint32_t c, uint32_t* d,
uint32_t e, uint32_t f, uint32_t g, uint32_t* h, uint32_t w, uint32_t k) {
*h += t1(e, f, g, w, k);
*d += *h;
*h += t2(a, b, c);
}
static void update(struct sha256* self) {
uint32_t* sh = self->h;
uint32_t* cd = (uint32_t*) self->chunk;
uint32_t w[64];
#pragma unroll
for (size_t i = 0; i < 16; i++) {
w[i] = big32(cd[i]);
}
#pragma unroll
for (size_t i = 16; i < 64; i++) {
uint32_t w16 = w[i - 16], w15 = w[i - 15], w7 = w[i - 7], w2 = w[i - 2];
uint32_t s0 = rotr(w15, 7) ^ rotr(w15, 18) ^ (w15 >> 3);
uint32_t s1 = rotr(w2, 17) ^ rotr(w2, 19) ^ (w2 >> 10);
w[i] = w16 + s0 + w7 + s1;
}
uint32_t a = sh[0], b = sh[1], c = sh[2], d = sh[3],
e = sh[4], f = sh[5], g = sh[6], h = sh[7];
/*
for (int i = 0; i < 64; i++) {
uint32_t s0 = rotr(a, 2) ^ rotr(a, 13) ^ rotr(a, 22);
uint32_t s1 = rotr(e, 6) ^ rotr(e, 11) ^ rotr(e, 25);
uint32_t ch = (e & f) ^ (~e & g);
uint32_t maj = (a & b) ^ (a & c) ^ (b & c);
uint32_t t1 = h + s1 + ch + K[i] + w[i];
uint32_t t2 = s0 + maj;
h = g; g = f; f = e; e = d + t1;
d = c; c = b; b = a; a = t1 + t2;
}
*/
// extract whole loop for speed: Instead of variable rotation,
// Rotating parameters for avoiding assignment,
// In each step, updated only `d` and `h`
step(a, b, c, &d, e, f, g, &h, w[0], K[0]);
step(h, a, b, &c, d, e, f, &g, w[1], K[1]);
step(g, h, a, &b, c, d, e, &f, w[2], K[2]);
step(f, g, h, &a, b, c, d, &e, w[3], K[3]);
step(e, f, g, &h, a, b, c, &d, w[4], K[4]);
step(d, e, f, &g, h, a, b, &c, w[5], K[5]);
step(c, d, e, &f, g, h, a, &b, w[6], K[6]);
step(b, c, d, &e, f, g, h, &a, w[7], K[7]);
step(a, b, c, &d, e, f, g, &h, w[8], K[8]);
step(h, a, b, &c, d, e, f, &g, w[9], K[9]);
step(g, h, a, &b, c, d, e, &f, w[10], K[10]);
step(f, g, h, &a, b, c, d, &e, w[11], K[11]);
step(e, f, g, &h, a, b, c, &d, w[12], K[12]);
step(d, e, f, &g, h, a, b, &c, w[13], K[13]);
step(c, d, e, &f, g, h, a, &b, w[14], K[14]);
step(b, c, d, &e, f, g, h, &a, w[15], K[15]);
step(a, b, c, &d, e, f, g, &h, w[16], K[16]);
step(h, a, b, &c, d, e, f, &g, w[17], K[17]);
step(g, h, a, &b, c, d, e, &f, w[18], K[18]);
step(f, g, h, &a, b, c, d, &e, w[19], K[19]);
step(e, f, g, &h, a, b, c, &d, w[20], K[20]);
step(d, e, f, &g, h, a, b, &c, w[21], K[21]);
step(c, d, e, &f, g, h, a, &b, w[22], K[22]);
step(b, c, d, &e, f, g, h, &a, w[23], K[23]);
step(a, b, c, &d, e, f, g, &h, w[24], K[24]);
step(h, a, b, &c, d, e, f, &g, w[25], K[25]);
step(g, h, a, &b, c, d, e, &f, w[26], K[26]);
step(f, g, h, &a, b, c, d, &e, w[27], K[27]);
step(e, f, g, &h, a, b, c, &d, w[28], K[28]);
step(d, e, f, &g, h, a, b, &c, w[29], K[29]);
step(c, d, e, &f, g, h, a, &b, w[30], K[30]);
step(b, c, d, &e, f, g, h, &a, w[31], K[31]);
step(a, b, c, &d, e, f, g, &h, w[32], K[32]);
step(h, a, b, &c, d, e, f, &g, w[33], K[33]);
step(g, h, a, &b, c, d, e, &f, w[34], K[34]);
step(f, g, h, &a, b, c, d, &e, w[35], K[35]);
step(e, f, g, &h, a, b, c, &d, w[36], K[36]);
step(d, e, f, &g, h, a, b, &c, w[37], K[37]);
step(c, d, e, &f, g, h, a, &b, w[38], K[38]);
step(b, c, d, &e, f, g, h, &a, w[39], K[39]);
step(a, b, c, &d, e, f, g, &h, w[40], K[40]);
step(h, a, b, &c, d, e, f, &g, w[41], K[41]);
step(g, h, a, &b, c, d, e, &f, w[42], K[42]);
step(f, g, h, &a, b, c, d, &e, w[43], K[43]);
step(e, f, g, &h, a, b, c, &d, w[44], K[44]);
step(d, e, f, &g, h, a, b, &c, w[45], K[45]);
step(c, d, e, &f, g, h, a, &b, w[46], K[46]);
step(b, c, d, &e, f, g, h, &a, w[47], K[47]);
step(a, b, c, &d, e, f, g, &h, w[48], K[48]);
step(h, a, b, &c, d, e, f, &g, w[49], K[49]);
step(g, h, a, &b, c, d, e, &f, w[50], K[50]);
step(f, g, h, &a, b, c, d, &e, w[51], K[51]);
step(e, f, g, &h, a, b, c, &d, w[52], K[52]);
step(d, e, f, &g, h, a, b, &c, w[53], K[53]);
step(c, d, e, &f, g, h, a, &b, w[54], K[54]);
step(b, c, d, &e, f, g, h, &a, w[55], K[55]);
step(a, b, c, &d, e, f, g, &h, w[56], K[56]);
step(h, a, b, &c, d, e, f, &g, w[57], K[57]);
step(g, h, a, &b, c, d, e, &f, w[58], K[58]);
step(f, g, h, &a, b, c, d, &e, w[59], K[59]);
step(e, f, g, &h, a, b, c, &d, w[60], K[60]);
step(d, e, f, &g, h, a, b, &c, w[61], K[61]);
step(c, d, e, &f, g, h, a, &b, w[62], K[62]);
step(b, c, d, &e, f, g, h, &a, w[63], K[63]);
sh[0] += a; sh[1] += b; sh[2] += c; sh[3] += d;
sh[4] += e; sh[5] += f; sh[6] += g; sh[7] += h;
}
static void tail(struct sha256* self) {
uint64_t bits = big64(self->bytes * 8);
self->chunk[self->chunk_size] = 0x80;
memset(&self->chunk[self->chunk_size + 1], 0, 63 - self->chunk_size);
if (self->chunk_size + 1 + sizeof bits > 64) {
update(self);
memset(self->chunk, 0, 64 - sizeof bits);
}
memcpy(&self->chunk[64 - sizeof bits], &bits, sizeof bits);
update(self);
}
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include "sha256.h"
// see: https://en.wikipedia.org/wiki/SHA-2#Pseudocode
// - this version generats constants at runtime -
static uint32_t K[64] = {0};
static uint32_t H[8] = {0};
static inline uint32_t big32(uint32_t v) {
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
return v;
#else
union {
struct {uint8_t b0, b1, b2, b3;};
uint32_t v;
} le = {.v = v};
union {
struct {uint8_t b3, b2, b1, b0;};
uint32_t v;
} be = {.b0 = le.b0, .b1 = le.b1, .b2 = le.b2, .b3 = le.b3};
return be.v;
#endif
}
static inline uint64_t big64(uint64_t v) {
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
return v;
#else
union {
struct {uint8_t b0, b1, b2, b3, b4, b5, b6, b7;};
uint64_t v;
} le = {.v = v};
union {
struct {uint8_t b7, b6, b5, b4, b3, b2, b1, b0;};
uint64_t v;
} be = {
.b0 = le.b0, .b1 = le.b1, .b2 = le.b2, .b3 = le.b3,
.b4 = le.b4, .b5 = le.b5, .b6 = le.b6, .b7 = le.b7,
};
return be.v;
#endif
}
static inline uint32_t rotr(uint32_t v, size_t n) {
return (v >> n) | (v << (32 - n));
}
static void initlib();
static void update(struct sha256* self);
static void tail(struct sha256* self);
extern void sha256_init(struct sha256* self) {
if (H[0] == 0) initlib();
memcpy(self->h, H, sizeof H);
self->bytes = 0;
self->chunk_size = 0;
}
extern void sha256_update(struct sha256* self, const void* buf, size_t bytes) {
if (self->chunk_size < 0) return;
const char* cur = buf;
unsigned char* chunk = self->chunk;
size_t chunk_size = self->chunk_size;
self->bytes += bytes;
const char* end = &cur[bytes];
for (const char* next = &cur[64 - chunk_size]; next <= end; next += 64) {
memcpy(&chunk[chunk_size], cur, next - cur);
update(self);
cur = next;
chunk_size = 0;
}
size_t rest = end - cur;
memcpy(&chunk[chunk_size], cur, rest);
self->chunk_size = chunk_size + rest;
}
extern void sha256_digest(struct sha256* self, void* digest) {
tail(self);
uint32_t* buf = (uint32_t*) digest;
for (int i = 0; i < 8; i++) {
buf[i] = big32(self->h[i]);
}
}
static void update(struct sha256* self) {
uint32_t* sh = self->h;
uint32_t* cd = (uint32_t*) self->chunk;
uint32_t w[64];
#pragma unroll
for (size_t i = 0; i < 16; i++) {
w[i] = big32(cd[i]);
}
#pragma unroll
for (size_t i = 16; i < 64; i++) {
uint32_t w16 = w[i - 16], w15 = w[i - 15], w7 = w[i - 7], w2 = w[i - 2];
uint32_t s0 = rotr(w15, 7) ^ rotr(w15, 18) ^ (w15 >> 3);
uint32_t s1 = rotr(w2, 17) ^ rotr(w2, 19) ^ (w2 >> 10);
w[i] = w16 + s0 + w7 + s1;
}
uint32_t a = sh[0], b = sh[1], c = sh[2], d = sh[3],
e = sh[4], f = sh[5], g = sh[6], h = sh[7];
#pragma unroll
for (int i = 0; i < 64; i++) {
uint32_t s0 = rotr(a, 2) ^ rotr(a, 13) ^ rotr(a, 22);
uint32_t s1 = rotr(e, 6) ^ rotr(e, 11) ^ rotr(e, 25);
uint32_t ch = (e & f) ^ (~e & g);
uint32_t maj = (a & b) ^ (a & c) ^ (b & c);
uint32_t t1 = h + s1 + ch + K[i] + w[i];
uint32_t t2 = s0 + maj;
h = g; g = f; f = e; e = d + t1;
d = c; c = b; b = a; a = t1 + t2;
}
sh[0] += a; sh[1] += b; sh[2] += c; sh[3] += d;
sh[4] += e; sh[5] += f; sh[6] += g; sh[7] += h;
}
static void tail(struct sha256* self) {
uint64_t bits = big64(self->bytes * 8);
self->chunk[self->chunk_size] = 0x80;
memset(&self->chunk[self->chunk_size + 1], 0, 63 - self->chunk_size);
if (self->chunk_size + 1 + sizeof bits > 64) {
update(self);
memset(self->chunk, 0, 64 - sizeof bits);
}
memcpy(&self->chunk[64 - sizeof bits], &bits, sizeof bits);
update(self);
}
// generating sha256 constants
static void primes(uint32_t* ps, size_t count) {
if (count == 0) return;
size_t s = 0;
ps[s++] = 2;
for (uint32_t n = 3; s < count; n += 2) {
for (size_t i = 0; i < s; i++) if (n % ps[i] == 0) goto out;
ps[s++] = n;
out:;
}
}
static uint32_t fractop32(double v) {
union {
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
struct {uint8_t s: 1; uint32_t e: 11; uint64_t f: 52;};
#else
struct {uint64_t f: 52; uint32_t e: 11; uint8_t s: 1;};
#endif
double v;
} bf = {.v = v};
uint64_t f = bf.f | (0x1ULL << 52);
int sft = 20 - (bf.e - 1023);
return (sft >= 0 ? f >> sft : f << -sft) & 0xffffffff;
}
static void initlib() {
primes(K, 64);
for (size_t i = 0; i < 8; i++) H[i] = fractop32(sqrt(K[i]));
for (size_t i = 0; i < 64; i++) K[i] = fractop32(cbrt(K[i]));
}
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "sha256.h"
// command for sha256sum compatible output
// $ cc -O3 -std=c11 -pedantic -Wall -Wextra sha256.c sha256main.c -o sha256
// or using gcc-7
// $ gcc-7 -O3 -funroll-loops -std=c11 -pedantic -Wall -Wextra
// sha256.c sha256main.c -o sha256
//
// (you can replace the sha256.c variations: sha256gen.c sha256extracted.c)
static int digest_file(char* filename, unsigned char* digest) {
char buf[BUFSIZ];
struct sha256 hash;
FILE* fp = strcmp(filename, "-") == 0 ? stdin : fopen(filename, "r");
if (!fp) return EXIT_FAILURE;
sha256_init(&hash);
for (;;) {
size_t s = fread(buf, 1, BUFSIZ, fp);
sha256_update(&hash, buf, s);
if (s < BUFSIZ) break;
}
fclose(fp);
sha256_digest(&hash, digest);
return EXIT_SUCCESS;
}
static int output_file(char* prog, char* file) {
unsigned char digest[SHA256_HASH_BYTES];
if (digest_file(file, digest) != EXIT_SUCCESS) {
fprintf(stderr, "%s: %s: No such file or directory\n", prog, file);
return EXIT_FAILURE;
}
for (size_t i = 0; i < SHA256_HASH_BYTES; i++) printf("%02x", digest[i]);
printf(" %s\n", file);
return EXIT_SUCCESS;
}
static int output(int argc, char** argv) {
if (argc == 1) return output_file(argv[0], "-");
int ret = EXIT_SUCCESS;
for (int i = 1; i < argc; i++) ret |= output_file(argv[0], argv[i]);
return ret;
}
static char* filename(char* head, char* end) {
// compat for coretuils sha256sum
if (*head == ' ') head++; // skip if 1 space (not tab) exist
while (*end == '\r' || *end == '\n' ) *end-- = '\0';
return head;
}
static int parse_hex(char* hex, unsigned char* expected) {
for (size_t i = 0; i < SHA256_HASH_BYTES; i++) {
char bytehex[3] = {hex[i * 2], hex[i * 2 + 1], '\0'};
char* end = NULL;
expected[i] = strtol(bytehex, &end, 16);
if (*end != '\0') return EXIT_FAILURE;
}
return EXIT_SUCCESS;
}
static int check_digest(unsigned char* digest, unsigned char* expected) {
for (size_t i = 0; i < SHA256_HASH_BYTES; i++) {
if (digest[i] != expected[i]) return EXIT_FAILURE;
}
return EXIT_SUCCESS;
}
enum CheckResult {
CR_OK = 0,
CR_INVALID_LINE = 1,
CR_NOT_OPEN = 2,
CR_FAIL = 3,
CR_COMMENT = 4,
CR_MAX = 5
};
static enum CheckResult check_line(char* line, ssize_t len) {
while (len > 0 && (*line == ' ' || *line == '\t')) line++, len--;
if (*line == '#') return CR_COMMENT;
const ssize_t hexlen = SHA256_HASH_BYTES * 2;
if (len <= hexlen + 3) return CR_INVALID_LINE;
if (line[hexlen] != ' ' && line[hexlen] != '\t') return CR_INVALID_LINE;
unsigned char expected[SHA256_HASH_BYTES];
if (parse_hex(line, expected) != EXIT_SUCCESS) return CR_INVALID_LINE;
char* file = filename(line + hexlen + 1, line + len - 1);
unsigned char digest[SHA256_HASH_BYTES];
if (digest_file(file, digest) != EXIT_SUCCESS) {
printf("%s: FAILED open or read\n", file);
return CR_NOT_OPEN;
}
if (check_digest(digest, expected) != EXIT_SUCCESS) {
printf("%s: FAILED\n", file);
return CR_FAIL;
}
printf("%s: OK\n", file);
return CR_OK;
}
static int check_file(char* prog, char* sumfile) {
FILE* fp = strcmp(sumfile, "-") == 0 ? stdin : fopen(sumfile, "r");
if (!fp) {
fprintf(stderr, "%s: %s: No such file or directory\n", prog, sumfile);
return EXIT_FAILURE;
}
int counts[CR_MAX] = {0};
char* line = NULL;
size_t empty = 0;
ssize_t len = 0;
while ((len = getline(&line, &empty, fp)) != -1) {
counts[check_line(line, len)]++;
}
free(line);
fclose(fp);
int cil = counts[CR_INVALID_LINE], cno = counts[CR_NOT_OPEN],
cf = counts[CR_FAIL];
if (cil > 0) {
fprintf(stderr, "%s: WARNING: %d line%s %s improperly formatted\n",
prog, cil, cil == 1 ? "" : "s", cil == 1 ? "is" : "are");
}
if (cno > 0) {
fprintf(stderr, "%s: WARNING: %d listed file%s could not be read\n",
prog, cno, cno == 1 ? "" : "s");
}
if (cf > 0) {
fprintf(stderr, "%s: WARNING: %d computed checksum%s did NOT match\n",
prog, cf, cf == 1 ? "" : "s");
}
return (cno || cf) ? EXIT_FAILURE : EXIT_SUCCESS;
}
static int check(int argc, char** argv) {
if (argc == 2) return check_file(argv[0], "-");
int ret = EXIT_SUCCESS;
for (int i = 2; i < argc; i++) ret |= check_file(argv[0], argv[i]);
return ret;
}
extern int main(int argc, char** argv) {
if (argc >= 2 && strcmp(argv[1], "-c") == 0) return check(argc, argv);
return output(argc, argv);
}
@bellbind
Copy link
Author

bellbind commented Jul 4, 2017

NOTE: The simple chunking code for sha256_update() would be:

extern void sha256_update(struct sha256* self, const void* buf, size_t bytes) {
  const char* cur = buf;
  size_t offs = 0;
  self->bytes += bytes;
  if (self->chunk_size > 0) {
    if (self->chunk_size + bytes < 64) {
      memcpy(&self->chunk[self->chunk_size], cur, bytes);
      self->chunk_size += bytes;
      return;
    }
    size_t fill = 64 - self->chunk_size;
    memcpy(&self->chunk[self->chunk_size], cur, fill);
    update(self);
    offs += fill;
  }
  // self->chunk is empty                                                       
  for (size_t next = offs + 64; next <= bytes; next += 64) {
    memcpy(self->chunk, &cur[offs], 64);
    update(self);
    offs = next;
  }
  self->chunk_size = bytes - offs;
  if (self->chunk_size > 0) memcpy(self->chunk, &cur[offs], self->chunk_size);
}

But the code is slower than the above shorter code. Times of hashing 3.2GB file as:

  • simple code: 0m24.528s
  • short code: 0m23.863s
  • (shasum -a 256: 0m23.401s)
# compile options
$ cc -O3 -std=c11 -pedantic -Wall -Wextra sha256.c sha256main.c -o sha256

@bellbind
Copy link
Author

bellbind commented Jul 6, 2017

Usual hash update implementation not used loop because rotating with assignment is too expensive.
In sha256, only two variables(d and h) updated in each step of the loop.
Instead of assignment rotation, use parameter rotation with loop extraction.

The "sha256extracted.c" is the loop extracted version.

$ dd if=/dev/zero bs=1m count=8192 of=8G.data 
8192+0 records in
8192+0 records out
8589934592 bytes transferred in 6.561749 secs (1309092228 bytes/sec)
$ ls -lh 8G.data 
-rw-r--r--  1 bellbind  staff   8.0G  7  7 00:30 8G.data
$ time shasum -a 256 8G.data 
ebfb4ef19ae410f190327b5ebd312711263bc7579970e87d9c1e2d84e06b3c25  8G.data

real	0m58.343s
user	0m52.820s
sys	0m4.807s
$ time gsha256sum 8G.data 
ebfb4ef19ae410f190327b5ebd312711263bc7579970e87d9c1e2d84e06b3c25  8G.data

real	0m56.134s
user	0m50.688s
sys	0m4.685s
$ time ./sha256 8G.data 
ebfb4ef19ae410f190327b5ebd312711263bc7579970e87d9c1e2d84e06b3c25  8G.data

real	0m59.864s
user	0m53.699s
sys	0m5.344s
$ time ./sha256extracted 8G.data 
ebfb4ef19ae410f190327b5ebd312711263bc7579970e87d9c1e2d84e06b3c25  8G.data

real	0m55.929s
user	0m49.806s
sys	0m5.323s
$
command time note
./sha256extracted 55.929s use sha256extracted.c
gsha256sum 56.134s coreutils
shasum -a 256 58.343s perl script
./sha256 59.864s use sha256.c

@bellbind
Copy link
Author

bellbind commented Jul 7, 2017

Using gcc-7 with -O3 -funroll-loops options make loop version faster as extracted version.

$ gcc-7 -O3 -funroll-loops -std=c11 -pedantic -Wall -Wextra sha256.c sha256main.c -o sha256

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment