Skip to content

Instantly share code, notes, and snippets.

@jrmoserbaltimore
Created January 16, 2022 21:55
Show Gist options
  • Save jrmoserbaltimore/7cc4ff0a8db4cb26fb0d479aebeeae4a to your computer and use it in GitHub Desktop.
Save jrmoserbaltimore/7cc4ff0a8db4cb26fb0d479aebeeae4a to your computer and use it in GitHub Desktop.
CORDIC slows as pipeline grows
`default_nettype uwire
interface ICORDIC
#(
parameter width=40 // 48 max
)
(
wire logic [width-1:0] x,
wire logic [width-1:0] y,
wire logic [width-1:0] z,
wire logic v
);
modport In
(
input .x(x),
input .y(y),
input .z(z),
input .v(v)
);
modport Out
(
output .x(x),
output .y(y),
output .z(z),
output .v(v)
);
endinterface
// Q1.47
// Specializing to Circular, Linear, or Hyperbolic avoids 3-way muxes for the z calculation.
// The v input specifies vectoring mode.
// Sine and cosine are possible with a hard-wired circuit.
//
// Circular (0) Linear (1) Hyperbolic (2)
// v=0, x=1, y=0 sin(z), cos(z) sinh(z), cosh(z)
//
// This also allows e**z by adding sinh(z)+cosh(z); and w**t by e**(t*ln(w))
module CORDIC
#(
parameter width=38, // 38 is optimal for 32 stages; 27 for 24
parameter level=0, // which digit? 0-31 for a 32-bit circuit for example
parameter mode=0, // Circular, linear, hyperbolic
parameter stages = 1
)
(
input uwire Clk,
ICORDIC.In In,
ICORDIC.Out Out
);
// 48-bit tables of 32 entries
// Generated as below:
//
// from fxpmath import Fxp
// from numpy import arctan, arctanh, pi
//
// # Run through the arctan and tanh functions for phase in radians normalized
// # to 2pi, i.e. a phase of [0,1)
// for trig in [arctan, arctanh]:
// print("localparam bit [0:31][47:0] {}Table = {{".format(trig.__name__))
// for i in range(0,31):
// j = i if trig.__name__ == "arctan" else i+1
// x = Fxp(trig(2**-j), dtype='UQ0.27')
// print(" 48'b{},".format(x.bin()))
// print("};")
//
// 1/K in Q0.48 = .100110110111010011101101101010000100001101111111
// 1/K' in Q1.48 = 1.001101010001111010000111001000000000111011010111
// K and K' should be embedded with constants if possible, e.g. (pi*KK')**3/192
localparam bit [0:31][47:0] arctanTable = {
48'b110010010000111111011010101000100010000101101000,
48'b011101101011000110011100000101011000011011101101,
48'b001111101011011011101011111100100101100100000001,
48'b000111111101010110111010100110101010110000101111,
48'b000011111111101010101101110110111001011001111110,
48'b000001111111111101010101011011101110101001011101,
48'b000000111111111111101010101010110111011101101110,
48'b000000011111111111111101010101010101101110111011,
48'b000000001111111111111111101010101010101011011101,
48'b000000000111111111111111111101010101010101010110,
48'b000000000011111111111111111111101010101010101010,
48'b000000000001111111111111111111111101010101010101,
48'b000000000000111111111111111111111111101010101010,
48'b000000000000011111111111111111111111111101010101,
48'b000000000000001111111111111111111111111111101010,
48'b000000000000000111111111111111111111111111111101,
48'b000000000000000011111111111111111111111111111111,
48'b000000000000000001111111111111111111111111111111,
48'b000000000000000000111111111111111111111111111111,
48'b000000000000000000011111111111111111111111111111,
48'b000000000000000000001111111111111111111111111111,
48'b000000000000000000000111111111111111111111111111,
48'b000000000000000000000011111111111111111111111111,
48'b000000000000000000000001111111111111111111111111,
48'b000000000000000000000000111111111111111111111111,
48'b000000000000000000000000011111111111111111111111,
48'b000000000000000000000000001111111111111111111111,
48'b000000000000000000000000001000000000000000000000,
48'b000000000000000000000000000100000000000000000000,
48'b000000000000000000000000000010000000000000000000,
48'b000000000000000000000000000001000000000000000000
};
localparam bit [0:31][47:0] arctanhTable = {
48'b100011001001111101010011110101010110100000011000,
48'b010000010110001010111011111010100000010001010001,
48'b001000000010101100010010001110010011110101011101,
48'b000100000000010101011000100010101101001101110101,
48'b000010000000000010101010110001000100100011010111,
48'b000001000000000000010101010101100010001000101011,
48'b000000100000000000000010101010101011000100010001,
48'b000000010000000000000000010101010101010110001000,
48'b000000001000000000000000000010101010101010101100,
48'b000000000100000000000000000000010101010101010101,
48'b000000000010000000000000000000000010101010101010,
48'b000000000001000000000000000000000000010101010101,
48'b000000000000100000000000000000000000000010101010,
48'b000000000000010000000000000000000000000000010101,
48'b000000000000001000000000000000000000000000000010,
48'b000000000000000100000000000000000000000000000000,
48'b000000000000000010000000000000000000000000000000,
48'b000000000000000001000000000000000000000000000000,
48'b000000000000000000100000000000000000000000000000,
48'b000000000000000000010000000000000000000000000000,
48'b000000000000000000001000000000000000000000000000,
48'b000000000000000000000100000000000000000000000000,
48'b000000000000000000000010000000000000000000000000,
48'b000000000000000000000001000000000000000000000000,
48'b000000000000000000000000100000000000000000000000,
48'b000000000000000000000000010000000000000000000000,
48'b000000000000000000000000001000000000000000000000,
48'b000000000000000000000000000100000000000000000000,
48'b000000000000000000000000000010000000000000000000,
48'b000000000000000000000000000001000000000000000000,
48'b000000000000000000000000000000100000000000000000
};
//uwire d [0:stages-1];
//uwire s [0:stages-1];
logic [width-1:0] x [-1:stages-1];
logic [width-1:0] y [-1:stages-1];
logic [width-1:0] z [-1:stages-1];
uwire v;
assign x[-1] = In.x;
assign y[-1] = In.y;
assign z[-1] = In.z;
assign v = In.v;
genvar i;
generate
for (i = 0; i < stages; i++)
begin: cordic
// Linear puts 2**0 == 1<<47 == 1.00000... in Q1.47
bit [width-1:0] a =
mode == 0
? arctanTable[level+i][47-:width] // Circular
: (mode == 1 ? 1 << (width-1-(level+i)) // Linear
: (mode == 2 ? arctanhTable[level+i][47-:width] : 0) // Hyperbolic
);
// This is one bit coming out of a 4-LUT; s as an input is just d,
// with the contents of the LUT set accordingly, no inversion required.
// for non-vector modes, d=sgn(z), so +1 if z is not negative, -1 otherwise.
// For vector modes, it's the sign of x times the sign of y, which is XOR:
//
// sign bit x y d d bit value
// 0 0 -1 1
// 1 0 1 0
// 0 1 1 0
// 1 1 -1 1
//
// d=1 means d is negative.
uwire d = v ? ~(x[i-1][width-1] ^ y[i-1][width-1]) : z[i-1][width-1];
always_comb
begin
// x - (udy * 2**-i)
case (mode)
// d: 1 (d == -1) 0 (d == 1)
// x + y * 2**-i x - y * 2**-i
0: x[i] = x[i-1] + ((d ? y[i-1] : -y[i-1]) >> (level+i));
// d: 1 (d == -1) 0 (d == 1)
// x - y * 2**-i x + y * 2**-i
2: x[i] = x[i-1] + ((~d ? y[i-1] : -y[i-1]) >> (level+i));
// in vector mode, u=0
1: x[i] = x[i-1];
default: x[i] = 0; // FIXME: ERROR
endcase
// y is unaffected by u or a
// d: 1 (d == -1) 0 (d == 1)
// y - x * 2**-i y + x * 2**-1
y[i] = y[i-1] + ((d ? -x[i-1] : x[i-1]) >> (level+i));
// a is hard-wired to one of the adder's input ports.
// d: 1 (d == -1) 0 (d == 1)
// z + a z - a
z[i] = z[i-1] + (d ? a : -a);
end
end
endgenerate
always @(posedge Clk)
begin
Out.x <= x[stages-1];
Out.y <= y[stages-1];
Out.z <= z[stages-1];
Out.v <= v;
end
endmodule
// 6ns target
// levels=2, stages=4: WNS=-0.436, out_z on genblock 0 to out_z on genblock 1
// levels=4, stages=4: WNS=-4.537, out_z register on genblock 1 to out_z register on genblock 2
// levels=8, stages=4: WNS=-5.176, out_z register 37 on genblock 3 to out_z register 37 on genblock 4
// That's not how pipelines work!
module TopLevelTestCordic
#(
parameter width=32, // 38 is optimal for 32 stages; 27 for 24
parameter levels=8, // how many in the pipeline
parameter stages=4 // how many stages within each CORDIC module
)
(
input uwire Clk,
//ICORDIC.In In,
input logic [31:0] Angle,
//ICORDIC.Out Out
output logic [37:0] Sine
);
logic [31:0] angle_buf[0:1];
logic [37:0] sine_buf [0:7];
uwire [37:0] stage_x [-1:levels-1];
uwire [37:0] stage_y [-1:levels-1];
uwire [37:0] stage_z [-1:levels-1];
uwire stage_v [-1:levels-1];
assign stage_x[-1] = 38'b01;
assign stage_y[-1] = 38'b0;
assign stage_z[-1] = {angle_buf[1], 6'b0};
assign stage_v[-1] = 1'b0;
genvar i;
generate
for (i=0; i<levels; i++) begin
ICORDIC # (.width(38)) icin(
.x(stage_x[i-1]),
.y(stage_y[i-1]),
.z(stage_z[i-1]),
.v(stage_v[i-1])
);
ICORDIC # (.width(38)) icout(
.x(stage_x[i]),
.y(stage_y[i]),
.z(stage_z[i]),
.v(stage_v[i])
);
CORDIC # (.width(38), .level(i*stages), .mode(0), .stages(stages)) cordic_stage(
.Clk(Clk),
.In(icin.In),
.Out(icout.Out)
);
end
endgenerate
always_ff @(posedge Clk)
begin
angle_buf[0] <= Angle;
angle_buf[1] <= angle_buf[0];
sine_buf[0] <= stage_z[levels-1];
//for (integer i = 0; i < 7; i++)
// sine_buf[i+1] = sine_buf[i];
Sine <= sine_buf[0];
end
endmodule
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment