Last active
May 10, 2019 19:33
-
-
Save ZipCPU/01570aa2a7c2f45872bb495c026c823d to your computer and use it in GitHub Desktop.
One hot Verilog
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Here's the goal: imagine you have a return from a set of values. | |
// One of those data returns will set the i_valid line high. In that | |
// case, we want to return the corresponding bit. Hence, if i_valid[N], | |
// for any N, then the output should always be i_data[N] and the output | |
// valid signal should also be high. | |
// | |
// The goal: | |
// 1. How low can you get the logic for such a design. | |
// 2. Can you make the code to do that generic | |
// | |
// LUT counting: | |
// Yosys returns LUT2s, LUT3s, LUT4s, LUT5s, LUT6s, MUXF7s, and MUXF8s. | |
// To score, I calculated: LUT6s + LUT5s + max(LUT4s,LUT1s) | |
// + max(LUT3s,LUT2s). In one case, there were many LUT2s, so | |
// instead of max(LUT3s,LUT2s) I used LUT2s/2+LUT3s. | |
// | |
`default_nettype none | |
// | |
// | |
module onehotsel(i_valid, i_data, o_valid, o_data); | |
parameter LG = 6; | |
localparam WID = (1<<LG); | |
localparam SEL = 1; | |
// | |
input wire [WID-1:0] i_valid; | |
input wire [WID-1:0] i_data; | |
output reg o_valid; | |
output reg o_data; | |
integer N; | |
generate if (SEL == 0) | |
begin : BASIC | |
// Basic step one: a cascaded set of multiplexers | |
// This is what I'd do if I hadn't done this exercise | |
// Notice the high depth of the LUTs. | |
// | |
// LG 3 | |
// 2 levels, 10 LUTs | |
// LG 4 | |
// 3 levels, 20 LUTs | |
// LG 5 | |
// 5 levels, 44 LUTs | |
// LG 6 | |
// 8 levels, 87 LUTs | |
always @(*) | |
begin | |
o_valid = 0; | |
o_data = 0; | |
for(N=0; N<WID; N=N+1) | |
begin | |
if (i_valid[N]) | |
begin | |
o_valid = 1; | |
o_data = i_data[N]; | |
end | |
end | |
end | |
end else if (SEL == 1) | |
begin | |
// Let's just OR everything together. By starting with | |
// zeros, and only OR'ing 1'b1 values to the output, | |
// Yosys can balance the MUX trees, so this works a bit | |
// better. If nothing is selected, the result will be zero | |
// | |
// Much to my surprise in hindsight, this approach yields | |
// the lowest logic. | |
// | |
// LG 3 | |
// 2 levels, 5 LUTs | |
// LG 4 | |
// 2 levels, 10 LUTs | |
// LG 5 | |
// 2 levels, 48 LUTs | |
// LG 6 | |
// 3 levels, 54 LUTs | |
// | |
always @(*) | |
begin | |
o_valid = 0; | |
o_data = 0; | |
for(N=0; N<WID; N=N+1) | |
begin | |
if (i_valid[N]) | |
begin | |
o_valid = 1; | |
o_data = o_data | i_data[N]; | |
end | |
end | |
end | |
end else if (SEL == 2) | |
begin | |
// Third selection: GO HOG WILD! | |
// 1. Build mux trees ourselves | |
// 2. Force the mux trees to be balanced | |
// 3. Always select *something*. If nothing is selected, the | |
// result should be i_data[0] | |
// | |
// This is a great idea, it just didn't work nearly as well | |
// as the last approach. | |
// | |
// Build a MUX tree to evaluate the output | |
reg [(1<<LG)-2:0] pre_val; | |
reg [(1<<LG)-2:0] pre_dat; | |
if (0 && (LG == 2)) | |
begin | |
// This is the basic idea. We'll rewrite it in a | |
// moment to be more generic. | |
always @(*) | |
begin | |
pre_val[0] = |i_valid[1:0]; | |
pre_val[1] = |i_valid[3:2]; | |
// | |
pre_val[2] = |pre_val[1:0]; | |
end | |
always @(*) | |
begin | |
pre_dat[0] = (i_valid[1]) ? i_data[1] : i_data[0]; | |
pre_dat[1] = (i_valid[3]) ? i_data[3] : i_data[2]; | |
// | |
pre_dat[2] = (pre_val[1]) ? pre_dat[1] : pre_dat[0]; | |
end | |
always @(*) | |
o_valid = pre_val[2]; | |
always @(*) | |
o_data = pre_dat[2]; | |
end else if (0 && (LG == 3)) | |
begin | |
// In case you didn't get the basic idea before, | |
// here it is again, this time for 8 input valids | |
// and data signals. Again, we'll do this again | |
// in a moment to be more generic. The big thing | |
// here is, this is easier to read than the generic | |
// approach. | |
// | |
always @(*) | |
begin | |
pre_val[0] = |i_valid[1:0]; | |
pre_val[1] = |i_valid[3:2]; | |
pre_val[2] = |i_valid[5:4]; | |
pre_val[3] = |i_valid[7:6]; | |
// | |
pre_val[4] = |pre_val[1:0]; | |
pre_val[5] = |pre_val[3:2]; | |
// | |
pre_val[6] = |pre_val[5:4]; | |
end | |
always @(*) | |
begin | |
pre_dat[0] = (i_valid[1]) ? i_data[1] : i_data[0]; | |
pre_dat[1] = (i_valid[3]) ? i_data[3] : i_data[2]; | |
pre_dat[2] = (i_valid[5]) ? i_data[5] : i_data[4]; | |
pre_dat[3] = (i_valid[7]) ? i_data[7] : i_data[6]; | |
// | |
pre_dat[4] = (pre_val[1]) ? pre_dat[1] : pre_dat[0]; | |
pre_dat[5] = (pre_val[3]) ? pre_dat[3] : pre_dat[2]; | |
// | |
pre_dat[6] = (pre_val[5]) ? pre_dat[5] : pre_dat[4]; | |
end | |
always @(*) | |
o_valid = pre_val[6]; | |
always @(*) | |
o_data = pre_dat[6]; | |
end else begin : GENERIC_SELECTION_TREE | |
// As written below, the logic is so difficult to | |
// follow that if I hadn't done it several times | |
// for specific values of LG, I wouldn't have been | |
// able to even understand what I was trying to do | |
// below. | |
// | |
// LG = 3 | |
// 2 levels, 7 LUTs | |
// LG = 4 | |
// 2 Levels, 26 LUTs | |
// LG = 5 | |
// 3 Levels, 36 LUTs | |
// LG = 6 | |
// 3 Levels, 102 LUTs | |
always @(*) | |
begin | |
for(N=0; N<(1<<(LG-1)); N=N+1) | |
begin | |
pre_val[N] = |i_valid[2*N +: 2]; | |
end | |
for(N=0; N+(1<<(LG-1)) < (1<<LG)-1; N=N+1) | |
begin | |
pre_val[N+(1<<(LG-1))] | |
= |pre_val[2*N +: 2]; | |
end | |
end | |
always @(*) | |
begin | |
for(N=0; N<(1<<LG)/2; N=N+1) | |
begin | |
if (i_valid[2*N+1]) | |
pre_dat[N] = i_data[2*N+1]; | |
else | |
pre_dat[N] = i_data[2*N]; | |
end | |
for(N=0; N+(1<<(LG-1))<(1<<LG)-1; N=N+1) | |
begin | |
if (pre_val[2*N+1]) | |
pre_dat[N+(1<<(LG-1))] = pre_dat[2*N+1]; | |
else | |
pre_dat[N+(1<<(LG-1))] = pre_dat[2*N]; | |
end | |
end | |
always @(*) | |
o_valid = pre_val[(1<<LG)-2]; | |
always @(*) | |
o_data = pre_dat[(1<<LG)-2]; | |
end | |
end endgenerate | |
// | |
// Formally verify that we kept our logic correct. | |
// | |
// I don't trust any of this logic if it hasn't been verified. | |
`ifdef FORMAL | |
// | |
// i_valid is only ever allowed to be zero, or to have one bit high, $onehot() | |
// | |
`ifdef VERIFIC | |
// | |
// SVA expression for $onehot0 | |
// | |
always @(*) | |
assume($onehot0(i_valid)); | |
`else | |
// | |
// If $onehot0 isn't supported | |
// | |
always @(*) | |
for(N=0; N<WID; N=N+1) | |
begin | |
if (i_valid[N]) | |
assume(i_valid == (1<<N)); | |
end | |
`endif | |
// | |
// If nothing is valid on the input, assert that the output | |
// valid is low | |
// | |
always @(*) | |
if (i_valid == 0) | |
assert(!o_valid); | |
// Now check each of the valid bits to assert that, if valid, | |
// we have the right bit out | |
always @(*) | |
for(N=0; N<WID; N=N+1) | |
begin | |
if (i_valid[N]) | |
begin | |
assert(o_valid); | |
assert(o_data == i_data[N]); | |
end | |
end | |
`endif | |
endmodule |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment