Created
January 16, 2022 21:55
-
-
Save jrmoserbaltimore/7cc4ff0a8db4cb26fb0d479aebeeae4a to your computer and use it in GitHub Desktop.
CORDIC slows as pipeline grows
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
`default_nettype uwire | |
interface ICORDIC | |
#( | |
parameter width=40 // 48 max | |
) | |
( | |
wire logic [width-1:0] x, | |
wire logic [width-1:0] y, | |
wire logic [width-1:0] z, | |
wire logic v | |
); | |
modport In | |
( | |
input .x(x), | |
input .y(y), | |
input .z(z), | |
input .v(v) | |
); | |
modport Out | |
( | |
output .x(x), | |
output .y(y), | |
output .z(z), | |
output .v(v) | |
); | |
endinterface | |
// Q1.47 | |
// Specializing to Circular, Linear, or Hyperbolic avoids 3-way muxes for the z calculation. | |
// The v input specifies vectoring mode. | |
// Sine and cosine are possible with a hard-wired circuit. | |
// | |
// Circular (0) Linear (1) Hyperbolic (2) | |
// v=0, x=1, y=0 sin(z), cos(z) sinh(z), cosh(z) | |
// | |
// This also allows e**z by adding sinh(z)+cosh(z); and w**t by e**(t*ln(w)) | |
module CORDIC | |
#( | |
parameter width=38, // 38 is optimal for 32 stages; 27 for 24 | |
parameter level=0, // which digit? 0-31 for a 32-bit circuit for example | |
parameter mode=0, // Circular, linear, hyperbolic | |
parameter stages = 1 | |
) | |
( | |
input uwire Clk, | |
ICORDIC.In In, | |
ICORDIC.Out Out | |
); | |
// 48-bit tables of 32 entries | |
// Generated as below: | |
// | |
// from fxpmath import Fxp | |
// from numpy import arctan, arctanh, pi | |
// | |
// # Run through the arctan and tanh functions for phase in radians normalized | |
// # to 2pi, i.e. a phase of [0,1) | |
// for trig in [arctan, arctanh]: | |
// print("localparam bit [0:31][47:0] {}Table = {{".format(trig.__name__)) | |
// for i in range(0,31): | |
// j = i if trig.__name__ == "arctan" else i+1 | |
// x = Fxp(trig(2**-j), dtype='UQ0.27') | |
// print(" 48'b{},".format(x.bin())) | |
// print("};") | |
// | |
// 1/K in Q0.48 = .100110110111010011101101101010000100001101111111 | |
// 1/K' in Q1.48 = 1.001101010001111010000111001000000000111011010111 | |
// K and K' should be embedded with constants if possible, e.g. (pi*KK')**3/192 | |
localparam bit [0:31][47:0] arctanTable = { | |
48'b110010010000111111011010101000100010000101101000, | |
48'b011101101011000110011100000101011000011011101101, | |
48'b001111101011011011101011111100100101100100000001, | |
48'b000111111101010110111010100110101010110000101111, | |
48'b000011111111101010101101110110111001011001111110, | |
48'b000001111111111101010101011011101110101001011101, | |
48'b000000111111111111101010101010110111011101101110, | |
48'b000000011111111111111101010101010101101110111011, | |
48'b000000001111111111111111101010101010101011011101, | |
48'b000000000111111111111111111101010101010101010110, | |
48'b000000000011111111111111111111101010101010101010, | |
48'b000000000001111111111111111111111101010101010101, | |
48'b000000000000111111111111111111111111101010101010, | |
48'b000000000000011111111111111111111111111101010101, | |
48'b000000000000001111111111111111111111111111101010, | |
48'b000000000000000111111111111111111111111111111101, | |
48'b000000000000000011111111111111111111111111111111, | |
48'b000000000000000001111111111111111111111111111111, | |
48'b000000000000000000111111111111111111111111111111, | |
48'b000000000000000000011111111111111111111111111111, | |
48'b000000000000000000001111111111111111111111111111, | |
48'b000000000000000000000111111111111111111111111111, | |
48'b000000000000000000000011111111111111111111111111, | |
48'b000000000000000000000001111111111111111111111111, | |
48'b000000000000000000000000111111111111111111111111, | |
48'b000000000000000000000000011111111111111111111111, | |
48'b000000000000000000000000001111111111111111111111, | |
48'b000000000000000000000000001000000000000000000000, | |
48'b000000000000000000000000000100000000000000000000, | |
48'b000000000000000000000000000010000000000000000000, | |
48'b000000000000000000000000000001000000000000000000 | |
}; | |
localparam bit [0:31][47:0] arctanhTable = { | |
48'b100011001001111101010011110101010110100000011000, | |
48'b010000010110001010111011111010100000010001010001, | |
48'b001000000010101100010010001110010011110101011101, | |
48'b000100000000010101011000100010101101001101110101, | |
48'b000010000000000010101010110001000100100011010111, | |
48'b000001000000000000010101010101100010001000101011, | |
48'b000000100000000000000010101010101011000100010001, | |
48'b000000010000000000000000010101010101010110001000, | |
48'b000000001000000000000000000010101010101010101100, | |
48'b000000000100000000000000000000010101010101010101, | |
48'b000000000010000000000000000000000010101010101010, | |
48'b000000000001000000000000000000000000010101010101, | |
48'b000000000000100000000000000000000000000010101010, | |
48'b000000000000010000000000000000000000000000010101, | |
48'b000000000000001000000000000000000000000000000010, | |
48'b000000000000000100000000000000000000000000000000, | |
48'b000000000000000010000000000000000000000000000000, | |
48'b000000000000000001000000000000000000000000000000, | |
48'b000000000000000000100000000000000000000000000000, | |
48'b000000000000000000010000000000000000000000000000, | |
48'b000000000000000000001000000000000000000000000000, | |
48'b000000000000000000000100000000000000000000000000, | |
48'b000000000000000000000010000000000000000000000000, | |
48'b000000000000000000000001000000000000000000000000, | |
48'b000000000000000000000000100000000000000000000000, | |
48'b000000000000000000000000010000000000000000000000, | |
48'b000000000000000000000000001000000000000000000000, | |
48'b000000000000000000000000000100000000000000000000, | |
48'b000000000000000000000000000010000000000000000000, | |
48'b000000000000000000000000000001000000000000000000, | |
48'b000000000000000000000000000000100000000000000000 | |
}; | |
//uwire d [0:stages-1]; | |
//uwire s [0:stages-1]; | |
logic [width-1:0] x [-1:stages-1]; | |
logic [width-1:0] y [-1:stages-1]; | |
logic [width-1:0] z [-1:stages-1]; | |
uwire v; | |
assign x[-1] = In.x; | |
assign y[-1] = In.y; | |
assign z[-1] = In.z; | |
assign v = In.v; | |
genvar i; | |
generate | |
for (i = 0; i < stages; i++) | |
begin: cordic | |
// Linear puts 2**0 == 1<<47 == 1.00000... in Q1.47 | |
bit [width-1:0] a = | |
mode == 0 | |
? arctanTable[level+i][47-:width] // Circular | |
: (mode == 1 ? 1 << (width-1-(level+i)) // Linear | |
: (mode == 2 ? arctanhTable[level+i][47-:width] : 0) // Hyperbolic | |
); | |
// This is one bit coming out of a 4-LUT; s as an input is just d, | |
// with the contents of the LUT set accordingly, no inversion required. | |
// for non-vector modes, d=sgn(z), so +1 if z is not negative, -1 otherwise. | |
// For vector modes, it's the sign of x times the sign of y, which is XOR: | |
// | |
// sign bit x y d d bit value | |
// 0 0 -1 1 | |
// 1 0 1 0 | |
// 0 1 1 0 | |
// 1 1 -1 1 | |
// | |
// d=1 means d is negative. | |
uwire d = v ? ~(x[i-1][width-1] ^ y[i-1][width-1]) : z[i-1][width-1]; | |
always_comb | |
begin | |
// x - (udy * 2**-i) | |
case (mode) | |
// d: 1 (d == -1) 0 (d == 1) | |
// x + y * 2**-i x - y * 2**-i | |
0: x[i] = x[i-1] + ((d ? y[i-1] : -y[i-1]) >> (level+i)); | |
// d: 1 (d == -1) 0 (d == 1) | |
// x - y * 2**-i x + y * 2**-i | |
2: x[i] = x[i-1] + ((~d ? y[i-1] : -y[i-1]) >> (level+i)); | |
// in vector mode, u=0 | |
1: x[i] = x[i-1]; | |
default: x[i] = 0; // FIXME: ERROR | |
endcase | |
// y is unaffected by u or a | |
// d: 1 (d == -1) 0 (d == 1) | |
// y - x * 2**-i y + x * 2**-1 | |
y[i] = y[i-1] + ((d ? -x[i-1] : x[i-1]) >> (level+i)); | |
// a is hard-wired to one of the adder's input ports. | |
// d: 1 (d == -1) 0 (d == 1) | |
// z + a z - a | |
z[i] = z[i-1] + (d ? a : -a); | |
end | |
end | |
endgenerate | |
always @(posedge Clk) | |
begin | |
Out.x <= x[stages-1]; | |
Out.y <= y[stages-1]; | |
Out.z <= z[stages-1]; | |
Out.v <= v; | |
end | |
endmodule | |
// 6ns target | |
// levels=2, stages=4: WNS=-0.436, out_z on genblock 0 to out_z on genblock 1 | |
// levels=4, stages=4: WNS=-4.537, out_z register on genblock 1 to out_z register on genblock 2 | |
// levels=8, stages=4: WNS=-5.176, out_z register 37 on genblock 3 to out_z register 37 on genblock 4 | |
// That's not how pipelines work! | |
module TopLevelTestCordic | |
#( | |
parameter width=32, // 38 is optimal for 32 stages; 27 for 24 | |
parameter levels=8, // how many in the pipeline | |
parameter stages=4 // how many stages within each CORDIC module | |
) | |
( | |
input uwire Clk, | |
//ICORDIC.In In, | |
input logic [31:0] Angle, | |
//ICORDIC.Out Out | |
output logic [37:0] Sine | |
); | |
logic [31:0] angle_buf[0:1]; | |
logic [37:0] sine_buf [0:7]; | |
uwire [37:0] stage_x [-1:levels-1]; | |
uwire [37:0] stage_y [-1:levels-1]; | |
uwire [37:0] stage_z [-1:levels-1]; | |
uwire stage_v [-1:levels-1]; | |
assign stage_x[-1] = 38'b01; | |
assign stage_y[-1] = 38'b0; | |
assign stage_z[-1] = {angle_buf[1], 6'b0}; | |
assign stage_v[-1] = 1'b0; | |
genvar i; | |
generate | |
for (i=0; i<levels; i++) begin | |
ICORDIC # (.width(38)) icin( | |
.x(stage_x[i-1]), | |
.y(stage_y[i-1]), | |
.z(stage_z[i-1]), | |
.v(stage_v[i-1]) | |
); | |
ICORDIC # (.width(38)) icout( | |
.x(stage_x[i]), | |
.y(stage_y[i]), | |
.z(stage_z[i]), | |
.v(stage_v[i]) | |
); | |
CORDIC # (.width(38), .level(i*stages), .mode(0), .stages(stages)) cordic_stage( | |
.Clk(Clk), | |
.In(icin.In), | |
.Out(icout.Out) | |
); | |
end | |
endgenerate | |
always_ff @(posedge Clk) | |
begin | |
angle_buf[0] <= Angle; | |
angle_buf[1] <= angle_buf[0]; | |
sine_buf[0] <= stage_z[levels-1]; | |
//for (integer i = 0; i < 7; i++) | |
// sine_buf[i+1] = sine_buf[i]; | |
Sine <= sine_buf[0]; | |
end | |
endmodule |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment