Created
August 14, 2014 13:19
-
-
Save jgarzik/2e2c4373b88d90ee4859 to your computer and use it in GitHub Desktop.
storing data in registers
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <sys/types.h> | |
#include <sys/stat.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <fcntl.h> | |
#include <time.h> | |
#include <stdint.h> | |
#include <stdbool.h> | |
#include <unistd.h> | |
#include <assert.h> | |
typedef struct uchar256 { | |
unsigned char data[256 / 8]; | |
} uint256_t; | |
static bool f_mmx = false; | |
static bool f_sse2 = false; | |
static const bool f_64bit = (sizeof(void *) == 8); | |
static void hexdump(const char *name, const void *p_, size_t len) | |
{ | |
const unsigned char *p = p_; | |
printf("%s: ", name); | |
unsigned int i; | |
for (i = 0; i < len; i++) | |
printf("%02x", p[i]); | |
printf("\n"); | |
} | |
static bool rand_bytes(void *p, size_t len) | |
{ | |
int fd = open("/dev/urandom", O_RDONLY); | |
if (fd < 0) | |
return false; | |
ssize_t bread = read(fd, p, len); | |
close(fd); | |
return (bread == len); | |
} | |
void read_u256_mmx(uint256_t *v, unsigned int idx) | |
{ | |
uint64_t *vals = (uint64_t *) v; | |
if (idx == 0) { | |
asm volatile("movq %%mm0,%0" : "=m" (vals[0])); | |
asm volatile("movq %%mm1,%0" : "=m" (vals[1])); | |
asm volatile("movq %%mm2,%0" : "=m" (vals[2])); | |
asm volatile("movq %%mm3,%0" : "=m" (vals[3])); | |
} else { | |
asm volatile("movq %%mm4,%0" : "=m" (vals[0])); | |
asm volatile("movq %%mm5,%0" : "=m" (vals[1])); | |
asm volatile("movq %%mm6,%0" : "=m" (vals[2])); | |
asm volatile("movq %%mm7,%0" : "=m" (vals[3])); | |
} | |
} | |
void write_u256_mmx(const uint256_t *v, unsigned int idx) | |
{ | |
const uint64_t *vals = (const uint64_t *) v; | |
if (idx == 0) { | |
asm volatile("movq %0,%%mm0" : : "m" (vals[0])); | |
asm volatile("movq %0,%%mm1" : : "m" (vals[1])); | |
asm volatile("movq %0,%%mm2" : : "m" (vals[2])); | |
asm volatile("movq %0,%%mm3" : : "m" (vals[3])); | |
} else { | |
asm volatile("movq %0,%%mm4" : : "m" (vals[0])); | |
asm volatile("movq %0,%%mm5" : : "m" (vals[1])); | |
asm volatile("movq %0,%%mm6" : : "m" (vals[2])); | |
asm volatile("movq %0,%%mm7" : : "m" (vals[3])); | |
} | |
} | |
void read_u256_sse(uint256_t *v_, unsigned int idx) | |
{ | |
uint64_t *v = (uint64_t *) v_; | |
switch (idx) { | |
case 0: | |
asm volatile("movdqa %%xmm0,%0" : "=m" (v[0])); | |
asm volatile("movdqa %%xmm1,%0" : "=m" (v[2])); | |
break; | |
case 1: | |
asm volatile("movdqa %%xmm2,%0" : "=m" (v[0])); | |
asm volatile("movdqa %%xmm3,%0" : "=m" (v[2])); | |
break; | |
case 2: | |
asm volatile("movdqa %%xmm4,%0" : "=m" (v[0])); | |
asm volatile("movdqa %%xmm5,%0" : "=m" (v[2])); | |
break; | |
case 3: | |
asm volatile("movdqa %%xmm6,%0" : "=m" (v[0])); | |
asm volatile("movdqa %%xmm7,%0" : "=m" (v[2])); | |
break; | |
} | |
} | |
void write_u256_sse(const uint256_t *v_, unsigned int idx) | |
{ | |
const uint64_t *v = (const uint64_t *) v_; | |
switch (idx) { | |
case 0: | |
asm volatile("movdqa %0,%%xmm0" : : "m" (v[0])); | |
asm volatile("movdqa %0,%%xmm1" : : "m" (v[2])); | |
break; | |
case 1: | |
asm volatile("movdqa %0,%%xmm2" : : "m" (v[0])); | |
asm volatile("movdqa %0,%%xmm3" : : "m" (v[2])); | |
break; | |
case 2: | |
asm volatile("movdqa %0,%%xmm4" : : "m" (v[0])); | |
asm volatile("movdqa %0,%%xmm5" : : "m" (v[2])); | |
break; | |
case 3: | |
asm volatile("movdqa %0,%%xmm6" : : "m" (v[0])); | |
asm volatile("movdqa %0,%%xmm7" : : "m" (v[2])); | |
break; | |
} | |
} | |
static void runit_mmx(void) | |
{ | |
printf("MMX:\n"); | |
uint256_t v __attribute__ ((aligned(32))); | |
uint256_t v2 __attribute__ ((aligned(32))); | |
unsigned int i; | |
for (i = 0; i < 2; i++) { | |
rand_bytes(&v, sizeof(v)); | |
memset(&v2, 0, sizeof(v2)); | |
write_u256_mmx(&v, i); | |
read_u256_mmx(&v2, i); | |
char s[16]; | |
sprintf(s, "%u.a", i); | |
hexdump(s, &v, sizeof(v)); | |
sprintf(s, "%u.b", i); | |
hexdump(s, &v2, sizeof(v2)); | |
assert(!memcmp(&v, &v2, sizeof(v))); | |
} | |
} | |
static void runit_sse(void) | |
{ | |
printf("SSE:\n"); | |
uint256_t v __attribute__ ((aligned(32))); | |
uint256_t v2 __attribute__ ((aligned(32))); | |
unsigned int i; | |
for (i = 0; i < 4; i++) { | |
rand_bytes(&v, sizeof(v)); | |
memset(&v2, 0, sizeof(v2)); | |
write_u256_sse(&v, i); | |
read_u256_sse(&v2, i); | |
char s[16]; | |
sprintf(s, "%u.a", i); | |
hexdump(s, &v, sizeof(v)); | |
sprintf(s, "%u.b", i); | |
hexdump(s, &v2, sizeof(v2)); | |
assert(!memcmp(&v, &v2, sizeof(v))); | |
} | |
} | |
static void cpuid(uint32_t code, uint32_t *ebx, uint32_t *ecx, uint32_t *edx) { | |
uint32_t dummy = 0; | |
asm volatile("cpuid" : | |
"=a" (dummy), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) : | |
"a" (code), "c" (0)); | |
} | |
static void detect(void) | |
{ | |
uint32_t ecx = 0, ebx = 0, edx = 0; | |
cpuid(1, &ebx, &ecx, &edx); | |
f_mmx = edx & (1 << 23); | |
f_sse2 = edx & (1 << 26); | |
printf("CPU features:%s%s%s\n", | |
f_mmx ? " MMX" : "", | |
f_sse2 ? " SSE2" : "", | |
f_64bit ? " 64b" : " 32b"); | |
} | |
int main(int argc, char *argv[]) | |
{ | |
detect(); | |
if (f_mmx) | |
runit_mmx(); | |
if (f_sse2) | |
runit_sse(); | |
printf("success!\n"); | |
return 0; | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment