Skip to content

Instantly share code, notes, and snippets.

@ZipCPU
Last active May 10, 2019 19:33
Show Gist options
  • Save ZipCPU/01570aa2a7c2f45872bb495c026c823d to your computer and use it in GitHub Desktop.
Save ZipCPU/01570aa2a7c2f45872bb495c026c823d to your computer and use it in GitHub Desktop.
One hot Verilog
// Here's the goal: imagine you have a return from a set of values.
// One of those data returns will set the i_valid line high. In that
// case, we want to return the corresponding bit. Hence, if i_valid[N],
// for any N, then the output should always be i_data[N] and the output
// valid signal should also be high.
//
// The goal:
// 1. How low can you get the logic for such a design.
// 2. Can you make the code to do that generic
//
// LUT counting:
// Yosys returns LUT2s, LUT3s, LUT4s, LUT5s, LUT6s, MUXF7s, and MUXF8s.
// To score, I calculated: LUT6s + LUT5s + max(LUT4s,LUT1s)
// + max(LUT3s,LUT2s). In one case, there were many LUT2s, so
// instead of max(LUT3s,LUT2s) I used LUT2s/2+LUT3s.
//
`default_nettype none
//
//
module onehotsel(i_valid, i_data, o_valid, o_data);
parameter LG = 6;
localparam WID = (1<<LG);
localparam SEL = 1;
//
input wire [WID-1:0] i_valid;
input wire [WID-1:0] i_data;
output reg o_valid;
output reg o_data;
integer N;
generate if (SEL == 0)
begin : BASIC
// Basic step one: a cascaded set of multiplexers
// This is what I'd do if I hadn't done this exercise
// Notice the high depth of the LUTs.
//
// LG 3
// 2 levels, 10 LUTs
// LG 4
// 3 levels, 20 LUTs
// LG 5
// 5 levels, 44 LUTs
// LG 6
// 8 levels, 87 LUTs
always @(*)
begin
o_valid = 0;
o_data = 0;
for(N=0; N<WID; N=N+1)
begin
if (i_valid[N])
begin
o_valid = 1;
o_data = i_data[N];
end
end
end
end else if (SEL == 1)
begin
// Let's just OR everything together. By starting with
// zeros, and only OR'ing 1'b1 values to the output,
// Yosys can balance the MUX trees, so this works a bit
// better. If nothing is selected, the result will be zero
//
// Much to my surprise in hindsight, this approach yields
// the lowest logic.
//
// LG 3
// 2 levels, 5 LUTs
// LG 4
// 2 levels, 10 LUTs
// LG 5
// 2 levels, 48 LUTs
// LG 6
// 3 levels, 54 LUTs
//
always @(*)
begin
o_valid = 0;
o_data = 0;
for(N=0; N<WID; N=N+1)
begin
if (i_valid[N])
begin
o_valid = 1;
o_data = o_data | i_data[N];
end
end
end
end else if (SEL == 2)
begin
// Third selection: GO HOG WILD!
// 1. Build mux trees ourselves
// 2. Force the mux trees to be balanced
// 3. Always select *something*. If nothing is selected, the
// result should be i_data[0]
//
// This is a great idea, it just didn't work nearly as well
// as the last approach.
//
// Build a MUX tree to evaluate the output
reg [(1<<LG)-2:0] pre_val;
reg [(1<<LG)-2:0] pre_dat;
if (0 && (LG == 2))
begin
// This is the basic idea. We'll rewrite it in a
// moment to be more generic.
always @(*)
begin
pre_val[0] = |i_valid[1:0];
pre_val[1] = |i_valid[3:2];
//
pre_val[2] = |pre_val[1:0];
end
always @(*)
begin
pre_dat[0] = (i_valid[1]) ? i_data[1] : i_data[0];
pre_dat[1] = (i_valid[3]) ? i_data[3] : i_data[2];
//
pre_dat[2] = (pre_val[1]) ? pre_dat[1] : pre_dat[0];
end
always @(*)
o_valid = pre_val[2];
always @(*)
o_data = pre_dat[2];
end else if (0 && (LG == 3))
begin
// In case you didn't get the basic idea before,
// here it is again, this time for 8 input valids
// and data signals. Again, we'll do this again
// in a moment to be more generic. The big thing
// here is, this is easier to read than the generic
// approach.
//
always @(*)
begin
pre_val[0] = |i_valid[1:0];
pre_val[1] = |i_valid[3:2];
pre_val[2] = |i_valid[5:4];
pre_val[3] = |i_valid[7:6];
//
pre_val[4] = |pre_val[1:0];
pre_val[5] = |pre_val[3:2];
//
pre_val[6] = |pre_val[5:4];
end
always @(*)
begin
pre_dat[0] = (i_valid[1]) ? i_data[1] : i_data[0];
pre_dat[1] = (i_valid[3]) ? i_data[3] : i_data[2];
pre_dat[2] = (i_valid[5]) ? i_data[5] : i_data[4];
pre_dat[3] = (i_valid[7]) ? i_data[7] : i_data[6];
//
pre_dat[4] = (pre_val[1]) ? pre_dat[1] : pre_dat[0];
pre_dat[5] = (pre_val[3]) ? pre_dat[3] : pre_dat[2];
//
pre_dat[6] = (pre_val[5]) ? pre_dat[5] : pre_dat[4];
end
always @(*)
o_valid = pre_val[6];
always @(*)
o_data = pre_dat[6];
end else begin : GENERIC_SELECTION_TREE
// As written below, the logic is so difficult to
// follow that if I hadn't done it several times
// for specific values of LG, I wouldn't have been
// able to even understand what I was trying to do
// below.
//
// LG = 3
// 2 levels, 7 LUTs
// LG = 4
// 2 Levels, 26 LUTs
// LG = 5
// 3 Levels, 36 LUTs
// LG = 6
// 3 Levels, 102 LUTs
always @(*)
begin
for(N=0; N<(1<<(LG-1)); N=N+1)
begin
pre_val[N] = |i_valid[2*N +: 2];
end
for(N=0; N+(1<<(LG-1)) < (1<<LG)-1; N=N+1)
begin
pre_val[N+(1<<(LG-1))]
= |pre_val[2*N +: 2];
end
end
always @(*)
begin
for(N=0; N<(1<<LG)/2; N=N+1)
begin
if (i_valid[2*N+1])
pre_dat[N] = i_data[2*N+1];
else
pre_dat[N] = i_data[2*N];
end
for(N=0; N+(1<<(LG-1))<(1<<LG)-1; N=N+1)
begin
if (pre_val[2*N+1])
pre_dat[N+(1<<(LG-1))] = pre_dat[2*N+1];
else
pre_dat[N+(1<<(LG-1))] = pre_dat[2*N];
end
end
always @(*)
o_valid = pre_val[(1<<LG)-2];
always @(*)
o_data = pre_dat[(1<<LG)-2];
end
end endgenerate
//
// Formally verify that we kept our logic correct.
//
// I don't trust any of this logic if it hasn't been verified.
`ifdef FORMAL
//
// i_valid is only ever allowed to be zero, or to have one bit high, $onehot()
//
`ifdef VERIFIC
//
// SVA expression for $onehot0
//
always @(*)
assume($onehot0(i_valid));
`else
//
// If $onehot0 isn't supported
//
always @(*)
for(N=0; N<WID; N=N+1)
begin
if (i_valid[N])
assume(i_valid == (1<<N));
end
`endif
//
// If nothing is valid on the input, assert that the output
// valid is low
//
always @(*)
if (i_valid == 0)
assert(!o_valid);
// Now check each of the valid bits to assert that, if valid,
// we have the right bit out
always @(*)
for(N=0; N<WID; N=N+1)
begin
if (i_valid[N])
begin
assert(o_valid);
assert(o_data == i_data[N]);
end
end
`endif
endmodule
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment