Last active
October 28, 2018 15:24
-
-
Save yinwang0/290f34bb567a896eada4745173aa4477 to your computer and use it in GitHub Desktop.
demo of using adc instruction to add a bignum
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <stdlib.h> | |
#include <stdint.h> | |
typedef int bigit; | |
typedef long int iptr; | |
void asm_add(xp, xl, yp, yl, zp) bigit *xp, *yp, *zp; iptr xl, yl; | |
{ | |
#define UNROLL 2 | |
volatile bigit sum; | |
volatile iptr c1 = yl / UNROLL * UNROLL; | |
volatile iptr c2 = yl % UNROLL; | |
volatile iptr c3 = (xl - yl) / UNROLL * UNROLL; | |
volatile iptr c4 = (xl - yl) % UNROLL; | |
volatile bigit *x1 = xp - c1 + 1; | |
volatile bigit *y1 = yp - c1 + 1; | |
volatile bigit *z1 = zp - c1 + 1; | |
volatile bigit *x2 = xp - yl + 1; | |
volatile bigit *y2 = yp - yl + 1; | |
volatile bigit *z2 = zp - yl + 1; | |
volatile bigit *x3 = xp - xl + 1 + c4; | |
volatile bigit *z3 = zp - xl + 1 + c4; | |
volatile bigit *x4 = xp - xl + 1; | |
volatile bigit *z4 = zp - xl + 1; | |
volatile bigit *zz = zp - xl; | |
// process segment shared by xp and yp | |
__asm__ ( | |
"clc \n\t" | |
"1: \n\t" | |
"jecxz 2f \n\t" | |
"movl -4(%[x],%[c],4), %[sum] \n\t" | |
"adcl -4(%[y],%[c],4), %[sum] \n\t" | |
"movl %[sum], -4(%[z],%[c],4) \n\t" | |
"movl -8(%[x],%[c],4), %[sum] \n\t" | |
"adcl -8(%[y],%[c],4), %[sum] \n\t" | |
"movl %[sum], -8(%[z],%[c],4) \n\t" | |
"lea -2(%[c]), %[c] \n\t" | |
"jmp 1b \n\t" | |
"2: \n\t" | |
: [c]"+c"(c1), [sum]"+r"(sum), [x]"+r"(x1), [y]"+r"(y1), [z]"+r"(z1)); | |
__asm__ ( | |
"1: \n\t" | |
"jecxz 2f \n\t" | |
"movl -4(%[x],%[c],4), %[sum] \n\t" | |
"adcl -4(%[y],%[c],4), %[sum] \n\t" | |
"movl %[sum], -4(%[z],%[c],4) \n\t" | |
"lea -1(%[c]), %[c] \n\t" | |
"jmp 1b \n\t" | |
"2: \n\t" | |
: [c]"+c"(c2), [sum]"+r"(sum), [x]"+r"(x2), [y]"+r"(y2), [z]"+r"(z2)); | |
// copy segment in xp but not in yp | |
__asm__ ( | |
"1: \n\t" | |
"jecxz 2f \n\t" | |
"movl -4(%[x],%[c],4), %[sum] \n\t" | |
"adcl $0, %[sum] \n\t" | |
"movl %[sum], -4(%[z],%[c],4) \n\t" | |
"movl -8(%[x],%[c],4), %[sum] \n\t" | |
"adcl $0, %[sum] \n\t" | |
"movl %[sum], -8(%[z],%[c],4) \n\t" | |
"lea -2(%[c]), %[c] \n\t" | |
"jmp 1b \n\t" | |
"2: \n\t" | |
: [c]"+c"(c3), [sum]"+r"(sum), [x]"+r"(x3), [z]"+r"(z3)); | |
__asm__ ( | |
"1: \n\t" | |
"jecxz 2f \n\t" | |
"movl -4(%[x],%[c],4), %[sum] \n\t" | |
"adcl $0, %[sum] \n\t" | |
"movl %[sum], -4(%[z],%[c],4) \n\t" | |
"lea -1(%[c]), %[c] \n\t" | |
"jmp 1b \n\t" | |
"2: \n\t" | |
: [c]"+c"(c4), [sum]"+r"(sum), [x]"+r"(x4), [z]"+r"(z4)); | |
__asm__ ( | |
"movl $0, %[sum] \n\t" | |
"adcl $0, %[sum] \n\t" | |
"movl %[sum], (%[z]) \n\t" | |
: [sum]"+r"(sum), [z]"+r"(zz)); | |
} | |
void test1() { | |
iptr xl = 4; | |
iptr yl = 4; | |
int *x = malloc(sizeof(int) * xl); | |
int *y = malloc(sizeof(int) * yl); | |
int *z = malloc(sizeof(int) * (xl + 1)); | |
int *xp = x + xl - 1; | |
int *yp = y + yl - 1; | |
int *zp = z + xl; | |
int H = 2147483648; // 2^31 | |
x[0] = H-1; | |
x[1] = H-1; | |
x[2] = H-1; | |
x[3] = H; | |
y[0] = H; | |
y[1] = H; | |
y[2] = H; | |
y[3] = H; | |
asm_add(xp, xl, yp, yl, zp); | |
for (int j = 0; j < xl + 1; j++) { | |
printf("z[%d] = %u\n", j, z[j]); | |
} | |
} | |
void test2() { | |
iptr xl = 4; | |
iptr yl = 2; | |
int *x = malloc(sizeof(int) * xl); | |
int *y = malloc(sizeof(int) * yl); | |
int *z = malloc(sizeof(int) * (xl + 1)); | |
int *xp = x + xl - 1; | |
int *yp = y + yl - 1; | |
int *zp = z + xl; | |
int H = 2147483648; // 2^31 | |
x[0] = H+H-1; | |
x[1] = H+H-1; | |
x[2] = H-1; | |
x[3] = H; | |
y[0] = H; | |
y[1] = H; | |
asm_add(xp, xl, yp, yl, zp); | |
for (int j = 0; j < xl + 1; j++) { | |
printf("z[%d] = %u\n", j, z[j]); | |
} | |
} | |
void test3() { | |
iptr xl = 4; | |
iptr yl = 4; | |
int *x = malloc(sizeof(int) * xl); | |
int *y = malloc(sizeof(int) * yl); | |
int *z = malloc(sizeof(int) * (xl + 1)); | |
int *xp = x + xl - 1; | |
int *yp = y + yl - 1; | |
int *zp = z + xl; | |
int H = 2147483648; // 2^31 | |
x[0] = 1; | |
x[1] = 2; | |
x[2] = 3; | |
x[3] = 4; | |
y[0] = 1; | |
y[1] = 2; | |
y[2] = 3; | |
y[3] = 4; | |
asm_add(xp, xl, yp, yl, zp); | |
for (int j = 0; j < xl + 1; j++) { | |
printf("z[%d] = %u\n", j, z[j]); | |
} | |
} | |
void test4() { | |
iptr xl = 5; | |
iptr yl = 5; | |
int *x = malloc(sizeof(int) * xl); | |
int *y = malloc(sizeof(int) * yl); | |
int *z = malloc(sizeof(int) * (xl + 1)); | |
int *xp = x + xl - 1; | |
int *yp = y + yl - 1; | |
int *zp = z + xl; | |
int H = 2147483648; // 2^31 | |
x[0] = 1; | |
x[1] = 2; | |
x[2] = 3; | |
x[3] = 4; | |
x[4] = 5; | |
y[0] = 1; | |
y[1] = 2; | |
y[2] = 3; | |
y[3] = 4; | |
y[4] = 5; | |
asm_add(xp, xl, yp, yl, zp); | |
for (int j = 0; j < xl + 1; j++) { | |
printf("z[%d] = %u\n", j, z[j]); | |
} | |
} | |
int main () { | |
test1(); | |
printf("----------------------\n"); | |
test2(); | |
printf("----------------------\n"); | |
test3(); | |
printf("----------------------\n"); | |
test4(); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
z[0] = 1 | |
z[1] = 0 | |
z[2] = 0 | |
z[3] = 0 | |
z[4] = 0 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment