Skip to content

Instantly share code, notes, and snippets.

@yinwang0
Last active October 28, 2018 15:24
Show Gist options
  • Save yinwang0/290f34bb567a896eada4745173aa4477 to your computer and use it in GitHub Desktop.
Save yinwang0/290f34bb567a896eada4745173aa4477 to your computer and use it in GitHub Desktop.
demo of using adc instruction to add a bignum
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
typedef int bigit;
typedef long int iptr;
void asm_add(xp, xl, yp, yl, zp) bigit *xp, *yp, *zp; iptr xl, yl;
{
#define UNROLL 2
volatile bigit sum;
volatile iptr c1 = yl / UNROLL * UNROLL;
volatile iptr c2 = yl % UNROLL;
volatile iptr c3 = (xl - yl) / UNROLL * UNROLL;
volatile iptr c4 = (xl - yl) % UNROLL;
volatile bigit *x1 = xp - c1 + 1;
volatile bigit *y1 = yp - c1 + 1;
volatile bigit *z1 = zp - c1 + 1;
volatile bigit *x2 = xp - yl + 1;
volatile bigit *y2 = yp - yl + 1;
volatile bigit *z2 = zp - yl + 1;
volatile bigit *x3 = xp - xl + 1 + c4;
volatile bigit *z3 = zp - xl + 1 + c4;
volatile bigit *x4 = xp - xl + 1;
volatile bigit *z4 = zp - xl + 1;
volatile bigit *zz = zp - xl;
// process segment shared by xp and yp
__asm__ (
"clc \n\t"
"1: \n\t"
"jecxz 2f \n\t"
"movl -4(%[x],%[c],4), %[sum] \n\t"
"adcl -4(%[y],%[c],4), %[sum] \n\t"
"movl %[sum], -4(%[z],%[c],4) \n\t"
"movl -8(%[x],%[c],4), %[sum] \n\t"
"adcl -8(%[y],%[c],4), %[sum] \n\t"
"movl %[sum], -8(%[z],%[c],4) \n\t"
"lea -2(%[c]), %[c] \n\t"
"jmp 1b \n\t"
"2: \n\t"
: [c]"+c"(c1), [sum]"+r"(sum), [x]"+r"(x1), [y]"+r"(y1), [z]"+r"(z1));
__asm__ (
"1: \n\t"
"jecxz 2f \n\t"
"movl -4(%[x],%[c],4), %[sum] \n\t"
"adcl -4(%[y],%[c],4), %[sum] \n\t"
"movl %[sum], -4(%[z],%[c],4) \n\t"
"lea -1(%[c]), %[c] \n\t"
"jmp 1b \n\t"
"2: \n\t"
: [c]"+c"(c2), [sum]"+r"(sum), [x]"+r"(x2), [y]"+r"(y2), [z]"+r"(z2));
// copy segment in xp but not in yp
__asm__ (
"1: \n\t"
"jecxz 2f \n\t"
"movl -4(%[x],%[c],4), %[sum] \n\t"
"adcl $0, %[sum] \n\t"
"movl %[sum], -4(%[z],%[c],4) \n\t"
"movl -8(%[x],%[c],4), %[sum] \n\t"
"adcl $0, %[sum] \n\t"
"movl %[sum], -8(%[z],%[c],4) \n\t"
"lea -2(%[c]), %[c] \n\t"
"jmp 1b \n\t"
"2: \n\t"
: [c]"+c"(c3), [sum]"+r"(sum), [x]"+r"(x3), [z]"+r"(z3));
__asm__ (
"1: \n\t"
"jecxz 2f \n\t"
"movl -4(%[x],%[c],4), %[sum] \n\t"
"adcl $0, %[sum] \n\t"
"movl %[sum], -4(%[z],%[c],4) \n\t"
"lea -1(%[c]), %[c] \n\t"
"jmp 1b \n\t"
"2: \n\t"
: [c]"+c"(c4), [sum]"+r"(sum), [x]"+r"(x4), [z]"+r"(z4));
__asm__ (
"movl $0, %[sum] \n\t"
"adcl $0, %[sum] \n\t"
"movl %[sum], (%[z]) \n\t"
: [sum]"+r"(sum), [z]"+r"(zz));
}
void test1() {
iptr xl = 4;
iptr yl = 4;
int *x = malloc(sizeof(int) * xl);
int *y = malloc(sizeof(int) * yl);
int *z = malloc(sizeof(int) * (xl + 1));
int *xp = x + xl - 1;
int *yp = y + yl - 1;
int *zp = z + xl;
int H = 2147483648; // 2^31
x[0] = H-1;
x[1] = H-1;
x[2] = H-1;
x[3] = H;
y[0] = H;
y[1] = H;
y[2] = H;
y[3] = H;
asm_add(xp, xl, yp, yl, zp);
for (int j = 0; j < xl + 1; j++) {
printf("z[%d] = %u\n", j, z[j]);
}
}
void test2() {
iptr xl = 4;
iptr yl = 2;
int *x = malloc(sizeof(int) * xl);
int *y = malloc(sizeof(int) * yl);
int *z = malloc(sizeof(int) * (xl + 1));
int *xp = x + xl - 1;
int *yp = y + yl - 1;
int *zp = z + xl;
int H = 2147483648; // 2^31
x[0] = H+H-1;
x[1] = H+H-1;
x[2] = H-1;
x[3] = H;
y[0] = H;
y[1] = H;
asm_add(xp, xl, yp, yl, zp);
for (int j = 0; j < xl + 1; j++) {
printf("z[%d] = %u\n", j, z[j]);
}
}
void test3() {
iptr xl = 4;
iptr yl = 4;
int *x = malloc(sizeof(int) * xl);
int *y = malloc(sizeof(int) * yl);
int *z = malloc(sizeof(int) * (xl + 1));
int *xp = x + xl - 1;
int *yp = y + yl - 1;
int *zp = z + xl;
int H = 2147483648; // 2^31
x[0] = 1;
x[1] = 2;
x[2] = 3;
x[3] = 4;
y[0] = 1;
y[1] = 2;
y[2] = 3;
y[3] = 4;
asm_add(xp, xl, yp, yl, zp);
for (int j = 0; j < xl + 1; j++) {
printf("z[%d] = %u\n", j, z[j]);
}
}
void test4() {
iptr xl = 5;
iptr yl = 5;
int *x = malloc(sizeof(int) * xl);
int *y = malloc(sizeof(int) * yl);
int *z = malloc(sizeof(int) * (xl + 1));
int *xp = x + xl - 1;
int *yp = y + yl - 1;
int *zp = z + xl;
int H = 2147483648; // 2^31
x[0] = 1;
x[1] = 2;
x[2] = 3;
x[3] = 4;
x[4] = 5;
y[0] = 1;
y[1] = 2;
y[2] = 3;
y[3] = 4;
y[4] = 5;
asm_add(xp, xl, yp, yl, zp);
for (int j = 0; j < xl + 1; j++) {
printf("z[%d] = %u\n", j, z[j]);
}
}
int main () {
test1();
printf("----------------------\n");
test2();
printf("----------------------\n");
test3();
printf("----------------------\n");
test4();
}
z[0] = 1
z[1] = 0
z[2] = 0
z[3] = 0
z[4] = 0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment