Last active
December 31, 2023 02:32
-
-
Save suarezvictor/8ff30360e07db9ef499c829744d6f161 to your computer and use it in GitHub Desktop.
test using 9x9 multipliers
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//Copyright (C) 2023 Victor Suarez Rovere <[email protected]> | |
#include <stdint.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
typedef uint32_t uint18_t, uint19_t, uint27_t, uint23_t; | |
typedef uint16_t uint9_t, uint10_t, uint15_t; | |
typedef uint8_t uint1_t; | |
uint18_t LPM_MULT9X9(uint9_t a, uint9_t b) | |
{ | |
a &= (1<<9)-1; | |
b &= (1<<9)-1; | |
return a * b; | |
} | |
uint18_t LPM_MULT10X10(uint10_t a, uint10_t b) | |
{ | |
a &= (1<<10)-1; | |
b &= (1<<10)-1; | |
#if 0 | |
return a * b; | |
#else | |
uint9_t a_trunc = a; | |
uint9_t b_trunc = b; | |
a_trunc &= (1<<9)-1; | |
b_trunc &= (1<<9)-1; | |
uint1_t ah = a >> 9; | |
uint1_t bh = b >> 9; | |
return (ah && bh ? (1<<18):0) + (ah ? b_trunc<<9:0) + (bh ? a_trunc<<9:0) + LPM_MULT9X9(a_trunc, b_trunc); | |
#endif | |
} | |
uint16_t mult15x15_upper16(uint15_t x, uint15_t y) | |
{ | |
x &= (1<<15)-1; | |
y &= (1<<15)-1; | |
return (x*y)>>14; | |
} | |
uint27_t mult18x18_upper27(uint18_t x, uint18_t y) | |
{ | |
x &= (1<<18)-1; | |
y &= (1<<18)-1; | |
return (x*y)>>9; | |
} | |
uint27_t mult18x18_upper27_alt(uint18_t x, uint18_t y) | |
{ | |
//extract 9 bit fields | |
uint9_t a = x >> 9; | |
uint9_t b = x; | |
uint9_t d = y >> 9; | |
uint9_t e = y; | |
//mask bits for CPU execution | |
a &= (1<<9)-1; | |
b &= (1<<9)-1; | |
d &= (1<<9)-1; | |
e &= (1<<9)-1; | |
uint9_t be = LPM_MULT9X9(b, e) >> 9; | |
//be &= (1<<9)-1; //mask bits (not needed) | |
uint18_t bd = LPM_MULT9X9(b, d); | |
//bd &= (1<<18)-1; //mask bits (not needed) | |
uint19_t ae_bd = bd + LPM_MULT9X9(a, e); | |
//ae_bd &= (1<<19)-1; //mask bits (not needed) | |
uint18_t ad = LPM_MULT9X9(a, d); | |
//ad &= (1<<18)-1; //mask bits (not needed) | |
uint23_t r = (ad << 9) + (ae_bd + be); //see note below | |
r &= ((1<<23)-1); //NOTE: only 23 upper bits are significative, and only 21 needed for 15x15->16 | |
return r; | |
} | |
uint27_t mult18x18_upper27_trunc(uint18_t x, uint18_t y) | |
{ | |
//extract 9 bit fields | |
uint9_t a = x >> 9; | |
uint9_t b = x; | |
uint9_t d = y >> 9; | |
uint9_t e = y; | |
//mask bits for CPU execution | |
a &= (1<<9)-1; | |
b &= (1<<9)-1; | |
d &= (1<<9)-1; | |
e &= (1<<9)-1; | |
uint18_t bd = LPM_MULT9X9(b, d); | |
//bd &= (1<<18)-1; //mask bits (not needed) | |
uint19_t ae_bd = bd + LPM_MULT9X9(a, e); | |
//ae_bd &= (1<<19)-1; //mask bits (not needed) | |
uint18_t ad = LPM_MULT9X9(a, d); | |
//ad &= (1<<18)-1; //mask bits (not needed) | |
uint23_t r = (ad << 9) + ae_bd; //see note below | |
r &= ((1<<23)-1); //NOTE: only 23 upper bits are significative, and only 21 needed for 15x15->16 | |
return r; | |
} | |
//karatsuba multiplication | |
//r=LSB l=MSB | |
//(XlYl << n) + (((Xl+Xr)*(Yl+Yr)-XlYl-XrYr) << (n/2)) + XrYr | |
uint27_t mult18x18_upper27_k(uint18_t x, uint18_t y) | |
{ | |
//extract 9 bit fields | |
uint9_t a = x >> 9; | |
uint9_t b = x; | |
uint9_t d = y >> 9; | |
uint9_t e = y; | |
#ifndef __PIPELINEC__ | |
//mask bits for CPU execution | |
a &= (1<<9)-1; | |
b &= (1<<9)-1; | |
d &= (1<<9)-1; | |
e &= (1<<9)-1; | |
#endif | |
uint18_t ad = LPM_MULT9X9(a, d); | |
//ad &= (1<<18)-1; //mask bits (not needed) | |
uint18_t be = LPM_MULT9X9(b, e); | |
//be &= (1<<18)-1; //mask bits (not needed) | |
uint19_t ae_bd = LPM_MULT10X10(a+b, d+e); //19 bits are enough. NOTE: implemented with 9x9 multipliers | |
//ae_bd &= (1<<19)-1; //mask bits (not needed) | |
uint18_t m = ae_bd - ad - be; //16 bits are enough for 15x15 | |
//m &= (1<<18)-1; //mask bits (not needed) | |
return (ad << 9) + (((m<<9) + be) >> 9); | |
} | |
uint16_t mult15x15_upper16_alt(uint15_t x, uint15_t y) | |
{ | |
x &= (1<<15)-1; | |
y &= (1<<15)-1; | |
return mult18x18_upper27_k(x, y)>>(14-9); | |
} | |
int main() | |
{ | |
int count = 1000*1000*1000; | |
int maxerr = 0; | |
while(--count) | |
{ | |
uint18_t x = rand() & ((1<<15)-1); | |
uint18_t y = rand() & ((1<<15)-1); | |
//uint27_t r1 = mult18x18_upper27(x, y); | |
//uint27_t r2 = mult18x18_upper27_k(x, y); | |
uint16_t r1 = mult15x15_upper16(x, y); | |
uint16_t r2 = mult15x15_upper16_alt(x, y); | |
if(r1 != r2) | |
{ | |
int16_t err = abs(r2 - r1); | |
if(err > maxerr) | |
{ | |
maxerr = err; | |
fprintf(stderr, "x 0x%08X, y 0x%08X, r1 0x%08X, r2 0x%08X, err %d: FAILED\n", x, y, r1, r2, maxerr); | |
} | |
} | |
} | |
if(maxerr == 0) | |
{ | |
printf("PASSED\n"); | |
return 0; | |
} | |
return 1; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment