Created
February 5, 2018 04:36
-
-
Save buttercutter/12f4ec8ca3a8c8baa4df3829daa07f52 to your computer and use it in GitHub Desktop.
Sobel convolution for Xilinx ML605 (single channel) example in http://riffa.ucsd.edu/node/16
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
`timescale 1ns/1ns | |
//---------------------------------------------------------------------------- | |
// This software is Copyright © 2012 The Regents of the University of | |
// California. All Rights Reserved. | |
// | |
// Permission to copy, modify, and distribute this software and its | |
// documentation for educational, research and non-profit purposes, without | |
// fee, and without a written agreement is hereby granted, provided that the | |
// above copyright notice, this paragraph and the following three paragraphs | |
// appear in all copies. | |
// | |
// Permission to make commercial use of this software may be obtained by | |
// contacting: | |
// Technology Transfer Office | |
// 9500 Gilman Drive, Mail Code 0910 | |
// University of California | |
// La Jolla, CA 92093-0910 | |
// (858) 534-5815 | |
// [email protected] | |
// | |
// This software program and documentation are copyrighted by The Regents of | |
// the University of California. The software program and documentation are | |
// supplied "as is", without any accompanying services from The Regents. The | |
// Regents does not warrant that the operation of the program will be | |
// uninterrupted or error-free. The end-user understands that the program was | |
// developed for research purposes and is advised not to rely exclusively on | |
// the program for any reason. | |
// | |
// IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO | |
// ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR | |
// CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING | |
// OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, | |
// EVEN IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF | |
// THE POSSIBILITY OF SUCH DAMAGE. THE UNIVERSITY OF | |
// CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES, | |
// INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF | |
// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. | |
// THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, | |
// AND THE UNIVERSITY OF CALIFORNIA HAS NO OBLIGATIONS TO | |
// PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR | |
// MODIFICATIONS. | |
//---------------------------------------------------------------------------- | |
//---------------------------------------------------------------------------- | |
// Filename: chnl_convolution.v | |
// Version: 1.00.a | |
// Verilog Standard: Verilog-2001 | |
// Description: RIFFA channel adapter for the cell_sorting_top module. | |
// Data is received on the RX port in sequence: avg_img_data, img_data0, | |
// img_data1, ... The avg_img_data is stored in a BRAM and repeatedly read out | |
// with each img_data. The img_data is consumed directly from the RX FIFO. The | |
// resulting radius data is stored in a BRAM. The sorting is pipelined, so 2 | |
// BRAMs are flip-flopped for storing radius data. When the RX transaction | |
// begins, a TX transaction is started for sending the radius data. Reading of | |
// the radius data happens concurrently with the processing of the img_data and | |
// the saving of the new radius data to the alternative BRAM. The TX | |
// transaction ends when all the img_data radius values have been sent to the | |
// TX FIFO. | |
// | |
// Author: Matt Jacobsen | |
// History: @mattj: Version 2.0 | |
//----------------------------------------------------------------------------- | |
module chnl_convolution #( | |
parameter C_PCI_DATA_WIDTH = 9'd64 | |
) | |
( | |
input CLK, | |
input RST, | |
output CHNL_RX_CLK, | |
input CHNL_RX, | |
output CHNL_RX_ACK, | |
input CHNL_RX_LAST, | |
input [31:0] CHNL_RX_LEN, | |
input [30:0] CHNL_RX_OFF, | |
input [C_PCI_DATA_WIDTH-1:0] CHNL_RX_DATA, | |
input CHNL_RX_DATA_VALID, | |
output CHNL_RX_DATA_REN, | |
output CHNL_TX_CLK, | |
output CHNL_TX, | |
input CHNL_TX_ACK, | |
output CHNL_TX_LAST, | |
output [31:0] CHNL_TX_LEN, | |
output [30:0] CHNL_TX_OFF, | |
output [C_PCI_DATA_WIDTH-1:0] CHNL_TX_DATA, | |
output CHNL_TX_DATA_VALID, | |
input CHNL_TX_DATA_REN | |
); | |
`include "common_functions.v" | |
reg [2:0] rRxState=0; | |
reg [10:0] rFrameWidth=0; | |
reg [10:0] rFrameHeight=0; | |
reg [10:0] rFrameWidthM2=0; | |
reg [10:0] rFrameHeightM2=0; | |
reg [21:0] rInputBytes=0; | |
reg [21:0] rOutputBytes=0; | |
reg [19:0] rOutputWords=0; | |
reg [21:0] rCount=0; | |
reg [1:0] rTxState=0; | |
reg rFlush0=0, rFlush1=0, rFlush2=0, rFlush3=0, rFlush4=0; | |
reg rValid0=0, rValid1=0, rValid2=0, rValid3=0; | |
wire wDBytesRen; | |
wire wDBytesFlushed; | |
wire wDBytesDE; | |
wire [7:0] wDBytesData; | |
wire wDBytesConsumed = (wDBytesRen & CHNL_RX_DATA_VALID); | |
reg rWinRdy=0; | |
wire wWinRdy; | |
wire wWinRen; | |
wire wWinValid; | |
wire [(8*3*3)-1:0] wWin; | |
wire wAccRen; | |
wire wAccFlushed; | |
wire [7:0] wP1 = wWin[0*8 +:8]; | |
wire [7:0] wP2 = wWin[1*8 +:8]; | |
wire [7:0] wP3 = wWin[2*8 +:8]; | |
wire [7:0] wP4 = wWin[3*8 +:8]; | |
wire [7:0] wP5 = wWin[4*8 +:8]; | |
wire [7:0] wP6 = wWin[5*8 +:8]; | |
wire [7:0] wP7 = wWin[6*8 +:8]; | |
wire [7:0] wP8 = wWin[7*8 +:8]; | |
wire [7:0] wP9 = wWin[8*8 +:8]; | |
reg [9:0] rGx0=0; | |
reg [9:0] rGx1=0; | |
reg [9:0] rGy0=0; | |
reg [9:0] rGy1=0; | |
reg [10:0] rGx=0; | |
reg [10:0] rGy=0; | |
reg [10:0] rGxAbs=0; | |
reg [10:0] rGyAbs=0; | |
reg [10:0] rPixel=0; | |
assign CHNL_RX_CLK = CLK; | |
assign CHNL_RX_ACK = (rRxState == 3'd1); | |
assign CHNL_RX_DATA_REN = (rRxState == 3'd1) || (wDBytesConsumed && rRxState == 3'd4); | |
assign CHNL_TX_CLK = CLK; | |
assign CHNL_TX = (rTxState != 2'd0); | |
assign CHNL_TX_LAST = 1'd1; | |
assign CHNL_TX_LEN = rOutputWords; // in words | |
assign CHNL_TX_OFF = 0; | |
// Accepts input from the RX port in the following seq: | |
// {HEIGHT, WIDTH}, | |
// {PX7, ... PX1, PX0}, | |
// ... | |
always @(posedge CLK or posedge RST) begin | |
if (RST | (!CHNL_RX & !CHNL_RX_DATA_VALID)) begin | |
rRxState <= #1 0; | |
end | |
else begin | |
case (rRxState) | |
3'd0: begin // Wait for the TX state machine to sync up | |
rRxState <= #1 rRxState + (CHNL_RX && rTxState == 2'd0); | |
end | |
3'd1: begin // Save the image information | |
rFrameWidth <= #1 CHNL_RX_DATA[10:0]; | |
rFrameHeight <= #1 CHNL_RX_DATA[42:32]; | |
rFrameWidthM2 <= #1 CHNL_RX_DATA[10:0] - 2'd2; | |
rFrameHeightM2 <= #1 CHNL_RX_DATA[42:32] - 2'd2; | |
rCount <= #1 0; | |
rRxState <= #1 rRxState + CHNL_RX_DATA_VALID; | |
end | |
3'd2: begin // Calculate the limits | |
rInputBytes <= #1 rFrameWidth * rFrameHeight; | |
rOutputBytes <= #1 rFrameWidthM2 * rFrameHeightM2; | |
rRxState <= #1 3'd3; | |
end | |
3'd3: begin // Calculate the limits | |
rOutputWords <= #1 (rOutputBytes[1] | rOutputBytes[0]) + (rOutputBytes>>2); | |
rRxState <= #1 3'd4; | |
end | |
3'd4: begin // Read the image until completion | |
rCount <= #1 (rCount + (wDBytesConsumed*8)); | |
rRxState <= #1 rRxState + (rCount >= rInputBytes); | |
end | |
3'd5: begin // Wait for the CHNL_RX to drop | |
end | |
default: begin | |
rRxState <= #1 3'd0; | |
end | |
endcase | |
end | |
end | |
// Feed single bytes into the sliding window. | |
distribute_bytes #( | |
.C_IN_BYTES(8), | |
.C_OUT_BYTES(1) | |
) dbytes ( | |
.CLK(CLK), | |
.RST(RST), | |
.INDATA_EN(CHNL_RX_DATA_VALID && (rRxState == 3'd4)), | |
.INDATA(CHNL_RX_DATA), | |
.INDATA_RD_EN(wDBytesRen), | |
.FLUSH(!(rRxState == 3'd1 || rRxState == 3'd2 || rRxState == 3'd3 || rRxState == 3'd4)), | |
.FLUSHED(wDBytesFlushed), | |
.OUTDATA_EN(wDBytesDE), | |
.OUTDATA(wDBytesData), | |
.OUTDATA_RD_EN(wWinRen) | |
); | |
// Slide a 3x3 window over the input image. | |
sliding_window #( | |
.C_PIXEL_WIDTH(8), | |
.C_MAX_LEN(1920), | |
.C_WIN_DIM(3) | |
) slidingWin ( | |
.RST(RST), | |
.CLK(CLK), | |
.PX_EOF(wDBytesFlushed), | |
.PX_VALID(wDBytesDE), | |
.PX_DATA(wDBytesData), | |
.PX_TAKE(wWinRen), | |
.PX_LINE_LEN(rFrameWidth), | |
.WIN(wWin), | |
.WIN_VALID(wWinValid), | |
.WIN_RDY(wWinRdy), | |
.WIN_SHIFT(wAccRen) | |
); | |
// Convolve the image: sobel filter (horiz + vert) | |
always @ (posedge CLK) begin | |
if (wAccRen) begin | |
rWinRdy <= #1 wWinRdy; | |
rFlush0 <= #1 wDBytesFlushed; | |
rFlush1 <= #1 rFlush0; | |
rValid0 <= #1 (rWinRdy & wWinValid & !rFlush0); | |
rGx0 <= #1 wP1 + (wP4*2) + wP7; | |
rGx1 <= #1 wP3 + (wP6*2) + wP9; | |
rGy0 <= #1 wP1 + (wP2*2) + wP3; | |
rGy1 <= #1 wP7 + (wP8*2) + wP9; | |
rFlush2 <= #1 rFlush1; | |
rValid1 <= #1 rValid0; | |
rGx <= #1 rGx1 - rGx0; | |
rGy <= #1 rGy0 - rGy1; | |
rFlush3 <= #1 rFlush2; | |
rValid2 <= #1 rValid1; | |
if (rGx[10]) | |
rGxAbs <= #1 (~rGx) + 1'd1; | |
else | |
rGxAbs <= #1 rGx; | |
if (rGy[10]) | |
rGyAbs <= #1 (~rGy) + 1'd1; | |
else | |
rGyAbs <= #1 rGy; | |
rFlush4 <= #1 rFlush3; | |
rValid3 <= #1 rValid2; | |
rPixel <= #1 rGxAbs + rGyAbs; | |
end | |
end | |
// Accumulate the single byte data from the convolution. | |
accumulate_bytes #( | |
.C_IN_BYTES(1), | |
.C_OUT_BYTES(8) | |
) accumulate ( | |
.RST(RST), | |
.CLK(CLK), | |
.INDATA_EN(rValid3 & wAccRen), | |
.INDATA((rPixel[10] | rPixel[9] ? 8'd255 : rPixel[8:1])), | |
.INDATA_RD_EN(wAccRen), | |
.FLUSH(rFlush4), | |
.FLUSHED(wAccFlushed), | |
.OUTDATA_EN(CHNL_TX_DATA_VALID), | |
.OUTDATA(CHNL_TX_DATA), | |
.OUTDATA_RD_EN(CHNL_TX_DATA_REN) | |
); | |
// Wait until the RX transaction starts, then start a TX transaction for | |
// the response. Output data in the following seq: | |
// {PX7, ..., PX1, PX0}, | |
// ... | |
always @(posedge CLK or posedge RST) begin | |
if (RST) begin | |
rTxState <= #1 0; | |
end | |
else begin | |
case (rTxState) | |
2'd0: begin // Wait for rRxState == 3'd3 | |
rTxState <= #1 (rRxState == 3'd3); | |
end | |
2'd1: begin // Wait for the CHNL_TX_ACK | |
rTxState <= #1 rTxState + CHNL_TX_ACK; | |
end | |
2'd2: begin // Send data to the FIFO | |
rTxState <= #1 rTxState + (!wAccFlushed); | |
end | |
2'd3: begin // Finish the TX transaction | |
rTxState <= #1 rTxState + (wAccFlushed & !CHNL_TX_DATA_VALID); | |
end | |
endcase | |
end | |
end | |
endmodule |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment