Skip to content

Instantly share code, notes, and snippets.

@buttercutter
Last active June 11, 2018 06:30
Show Gist options
  • Save buttercutter/9d185d35a6e6db0da10992a19c36f754 to your computer and use it in GitHub Desktop.
Save buttercutter/9d185d35a6e6db0da10992a19c36f754 to your computer and use it in GitHub Desktop.
C++ test code of HLS computation kernel for both Xillybus and RIFFA PCIe framework
// g++ -g -pedantic -Wall -Werror -Wextra -fsanitize=address -fno-omit-frame-pointer host.cpp -o host `pkg-config --cflags --libs opencv`
#include <opencv2/core/core.hpp>
//#include <opencv2/imgcodecs/imgcodecs.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <unistd.h>
#include <fcntl.h>
#include <iostream>
#include <fstream> // std::ifstream, std::ofstream
#include <string>
#include <sys/wait.h>
#include <errno.h>
#include <cmath>
using namespace cv;
using namespace std;
#define FORK 1
//#define LOOPBACK 1
#define RGB2YUV 1
unsigned int image_width;
unsigned int image_height;
const unsigned int CHNL_NUM = 3;
const unsigned int RED_CHNL = 2;
const unsigned int GREEN_CHNL = 1;
const unsigned int BLUE_CHNL = 0;
const unsigned int STREAM_WIDTH = 128;
const unsigned int NUM_OF_BITS_PER_BYTE = 8;
const unsigned int PIXEL_VALUE_RANGE = 8; // number of bits occupied by [R, G, B] or [Y, U, V] respectively (8-bit unsigned integer for each components) , https://docs.microsoft.com/en-us/windows-hardware/drivers/display/yuv-rgb-data-range-conversions
const unsigned int NUM_OF_COMPONENTS_IN_A_PIXEL = 3; // input: [R, G, B] output:[Y, U, V]
const unsigned int PIXEL_NUM_THAT_FITS_STREAM_WIDTH = 5; // 128-bit stream can at most fits 5 pixels ((PIXEL_NUM_THAT_FITS_STREAM_WIDTH*NUM_OF_COMPONENTS_IN_A_PIXEL*PIXEL_VALUE_RANGE) bits = 120 bits), each pixels contains R, G, B which are encoded in 8 bits for each of the three color components
struct RGB_packet{
uint8_t R,G,B;
};
struct YUV_packet{
uint8_t Y,U,V;
};
struct YUV_packet* rgb2yuv(struct RGB_packet rgb_input) // convert rgb to yuv
{
unsigned char R = rgb_input.R;
unsigned char G = rgb_input.G;
unsigned char B = rgb_input.B;
int Y_temp, U_temp, V_temp;
vector<YUV_packet> vyuv_result = vector<YUV_packet>(sizeof(unsigned char) * NUM_OF_COMPONENTS_IN_A_PIXEL);
struct YUV_packet *yuv_result = vyuv_result.data();
// https://en.wikipedia.org/wiki/YUV#Full_swing_for_BT.601
Y_temp = 77*R + 150*G + 29*B;
U_temp = -43*R - 84*G + 127*B;
V_temp = 127*R - 106*G - 21*B;
Y_temp = (Y_temp + 128) >> 8;
U_temp = (U_temp + 128) >> 8;
V_temp = (V_temp + 128) >> 8;
yuv_result->Y = Y_temp;
yuv_result->U = U_temp + 128;
yuv_result->V = V_temp + 128;
return yuv_result;
}
/*
Plain write() may not write all bytes requested in the buffer, so
allwrite() loops until all data was indeed written, or exits in
case of failure, except for EINTR. The way the EINTR condition is
handled is the standard way of making sure the process can be suspended
with CTRL-Z and then continue running properly.
The function has no return value, because it always succeeds (or exits
instead of returning).
The function doesn't expect to reach EOF either.
*/
void allwrite(int fd, unsigned char *buf, unsigned int len) {
unsigned int sent = 0;
int rc;
while (sent < len) {
rc = write(fd, buf + sent, len - sent);fsync(fd);
if ((rc < 0) && (errno == EINTR)){
printf("continue in allwrite\n");
continue;
}
if (rc < 0) {
perror("allwrite() failed to write");
exit(1);
}
if (rc == 0) {
fprintf(stderr, "Reached write EOF (?!)\n");
exit(1);
}
sent += rc;
}
//return sent;
}
void allread(int fd, unsigned char *buf, unsigned int len) {
unsigned int recvd = 0;
int rc;
while (recvd < len) {
if(len < STREAM_WIDTH/NUM_OF_BITS_PER_BYTE) printf("before last read\n");
rc = read(fd, buf + recvd, len - recvd);//fsync(fd);
if(len < STREAM_WIDTH/NUM_OF_BITS_PER_BYTE) printf("rc = %d\n", rc);
if ((rc < 0) && (errno == EINTR)){
printf("continue in allread\n");
continue;
}
if (rc < 0) {
perror("allwrite() failed to write");
exit(1);
}
if (rc == 0) {
fprintf(stderr, "Reached write EOF (?!)\n");
exit(1);
}
recvd += rc;
}
//return recvd;
}
int main(int argc, char *argv[]) {
int fdr, fdw;
uint8_t *wr_buf, *rd_buf;
#ifdef FORK
int wait_status; // for wait()
pid_t pid;
#endif
struct RGB_packet *tologic;
struct YUV_packet *fromlogic;
fdr = open("/dev/xillybus_read_128", O_RDONLY); // will change to /dev/xillybus_read_128
fdw = open("/dev/xillybus_write_128", O_WRONLY); // will change to /dev/xillybus_write_128
if ((fdr < 0) || (fdw < 0)) {
perror("Failed to open Xillybus device file(s)");
exit(1);
}
// READ in an image file
String imageName( "lena512color.tiff" ); // by default
if( argc > 1)
{
imageName = argv[1];
}
Mat image;
image = imread( imageName, IMREAD_COLOR ); // Read the file
if( image.empty() ) // Check for invalid input
{
cout << "Could not open or find the image" << std::endl ;
return -1;
}
else
{
image_width = image.size().width;
image_height = image.size().height;
}
namedWindow( "Original Image", WINDOW_AUTOSIZE );
imshow( "Original Image", image );
Mat rgbchannel[CHNL_NUM];
// The actual splitting.
split(image, rgbchannel);
namedWindow("Red", WINDOW_AUTOSIZE);
imshow("Red", rgbchannel[RED_CHNL]);
namedWindow("Green", WINDOW_AUTOSIZE);
imshow("Green", rgbchannel[GREEN_CHNL]);
namedWindow("Blue", WINDOW_AUTOSIZE);
imshow("Blue", rgbchannel[BLUE_CHNL]);
waitKey(0); // see all three split channels before feeding in the channel data to xillybus/RIFFA for hardware computation
vector<RGB_packet> vTo(image_width * image_height); // lena.tiff is sized as 3*512*512
tologic = vTo.data();
if (!tologic) {
fprintf(stderr, "Failed to allocate memory\n");
exit(1);
}
for(unsigned int pixel_index = 0; pixel_index < (image_width * image_height); pixel_index++)
{
tologic[pixel_index].R = *(rgbchannel[RED_CHNL].data + pixel_index);
tologic[pixel_index].G = *(rgbchannel[GREEN_CHNL].data + pixel_index);
tologic[pixel_index].B = *(rgbchannel[BLUE_CHNL].data + pixel_index);
}
#ifdef FORK
pid = fork();
if (pid < 0) {
perror("Failed to fork()");
exit(1);
}
if (pid) {
close(fdr);
#endif
unsigned int num_of_pixels_sent = 0; // this is actual pixels number already sent, does not include the empty 8 bits
//unsigned int if_index = 0;
unsigned int rgb_stream_index = 0;
uint8_t rgb_stream[STREAM_WIDTH/NUM_OF_BITS_PER_BYTE + 1]; // could accomodate 5 pixels
while (num_of_pixels_sent < image_width * image_height)
{
if(((image_width * image_height)-num_of_pixels_sent) >= PIXEL_NUM_THAT_FITS_STREAM_WIDTH)
{
// arrange the five pixels in the format as in https://i.imgur.com/mdJwk7J.png
//if_index++; //printf("if_index = %d\n\r", if_index);
//if(if_index == 3) break;
for(rgb_stream_index = 0; (rgb_stream_index+NUM_OF_COMPONENTS_IN_A_PIXEL-1)<((STREAM_WIDTH/NUM_OF_BITS_PER_BYTE)-1); rgb_stream_index=rgb_stream_index+NUM_OF_COMPONENTS_IN_A_PIXEL)
{
rgb_stream[rgb_stream_index] = tologic[(rgb_stream_index/NUM_OF_COMPONENTS_IN_A_PIXEL)+num_of_pixels_sent].R;
rgb_stream[rgb_stream_index+1] = tologic[(rgb_stream_index/NUM_OF_COMPONENTS_IN_A_PIXEL)+num_of_pixels_sent].G;
rgb_stream[rgb_stream_index+NUM_OF_COMPONENTS_IN_A_PIXEL-1] = tologic[(rgb_stream_index/NUM_OF_COMPONENTS_IN_A_PIXEL)+num_of_pixels_sent].B;
}
rgb_stream[STREAM_WIDTH/NUM_OF_BITS_PER_BYTE] = '\0'; // however, this NULL character is not sent across write()
rgb_stream[(STREAM_WIDTH/NUM_OF_BITS_PER_BYTE)-1] = 0; // remember that the eight most significant bits of the 128-bits stream are ignored by hardware logic
/*for(unsigned int j=0; j<(STREAM_WIDTH/NUM_OF_BITS_PER_BYTE); j++)
{
printf("rgb_stream[%d] = %d\n\r", j, rgb_stream[j]); //break;
}*/
wr_buf = rgb_stream; // write() writes wr_buf in first-in-first-out order, so rgb_stream[0] will be written first into fdw as the least significant byte
allwrite(fdw, wr_buf, STREAM_WIDTH/NUM_OF_BITS_PER_BYTE); // this write() is 128-bits or 16 bytes which include the empty MSB 8 bits
//printf("wr = %d\n", wr);
num_of_pixels_sent = num_of_pixels_sent + PIXEL_NUM_THAT_FITS_STREAM_WIDTH;
//printf("num_of_pixels_sent = %d\n", num_of_pixels_sent);
}
else // the remaining pixels do not fill all five pixel slots for a 128-bit stream
{
//break; // just to send bytes which is divisible by "STREAM_WIDTH/NUM_OF_BITS_PER_BYTE" bytes
for(rgb_stream_index = 0; (rgb_stream_index+NUM_OF_COMPONENTS_IN_A_PIXEL-1)<(((image_width * image_height)-num_of_pixels_sent)*NUM_OF_COMPONENTS_IN_A_PIXEL); rgb_stream_index=rgb_stream_index+NUM_OF_COMPONENTS_IN_A_PIXEL)
{
rgb_stream[rgb_stream_index] = tologic[(rgb_stream_index/NUM_OF_COMPONENTS_IN_A_PIXEL)+num_of_pixels_sent].R;
rgb_stream[rgb_stream_index+1] = tologic[(rgb_stream_index/NUM_OF_COMPONENTS_IN_A_PIXEL)+num_of_pixels_sent].G;
rgb_stream[rgb_stream_index+NUM_OF_COMPONENTS_IN_A_PIXEL-1] = tologic[(rgb_stream_index/NUM_OF_COMPONENTS_IN_A_PIXEL)+num_of_pixels_sent].B;
}
for(rgb_stream_index = (((image_width * image_height)-num_of_pixels_sent)*NUM_OF_COMPONENTS_IN_A_PIXEL); rgb_stream_index<((STREAM_WIDTH/NUM_OF_BITS_PER_BYTE)-1); rgb_stream_index=rgb_stream_index+1)
{
rgb_stream[rgb_stream_index] = 0; // If the data doesn't end on a 16-byte boundary, consider padding with dummy data (usually zeros).
}
rgb_stream[STREAM_WIDTH/NUM_OF_BITS_PER_BYTE] = '\0'; // however, this NULL character is not sent across write()
rgb_stream[(STREAM_WIDTH/NUM_OF_BITS_PER_BYTE)-1] = (image_width * image_height)-num_of_pixels_sent; // remember that the eight most significant bits of the 128-bits stream are ignored by hardware logic, but not in the last 128-bit data transaction, since this will help with hardware 'ap_start' and 'is_last_few_pixels' signals
/*for(unsigned int j=0; j<(((image_width * image_height)-num_of_pixels_sent)*NUM_OF_COMPONENTS_IN_A_PIXEL+1); j++)
{
printf("rgb_stream[%d] = %d\n\r", j, rgb_stream[j]);
}*/
wr_buf = rgb_stream; // this is a partially filled 128-bit stream (with less than 5 pixels)
allwrite(fdw, wr_buf, STREAM_WIDTH/NUM_OF_BITS_PER_BYTE); // this is a write() which contains some dummy zero data and less than 5 pixels
//printf("wr = %d\n", wr);
break; // finish sending all (image_width * image_height) pixels
}
}
write(fdw, NULL, 0); // flush the write stream
close(fdw);
#ifdef FORK
printf("*** Write process enters waiting status .....\n");
pid = wait(&wait_status);
printf("*** write process detects read process with pid %d was done ***\n", pid); // most probably write process will be done first, since FPGA computation takes a few clock cyles
return 0;
}
else {
close(fdw);
#endif
vector<YUV_packet> vFrom(image_width * image_height);
fromlogic = vFrom.data();
//printf("fromlogic[0].Y = %p \n", &fromlogic[0].Y);
if (!fromlogic) {
fprintf(stderr, "Failed to allocate memory\n");
exit(1);
}
unsigned int num_of_pixels_received = 0; // this is actual pixels number already received, does not include the empty 8 bits
unsigned int yuv_stream_index = 0;
uint8_t yuv_stream[STREAM_WIDTH/NUM_OF_BITS_PER_BYTE + 1]; // could accomodate 5 pixels
while (num_of_pixels_received < image_width * image_height) {
if(((image_width * image_height)-num_of_pixels_received) >= PIXEL_NUM_THAT_FITS_STREAM_WIDTH)
{
rd_buf = yuv_stream;
printf("before read() \n");
allread(fdr, rd_buf, STREAM_WIDTH/NUM_OF_BITS_PER_BYTE);
printf("after read() \n");
// For every five pixels (128 bits) received from hardware logic computation, print out the YUV values of all five pixels
/*for(yuv_stream_index = 0; yuv_stream_index<STREAM_WIDTH/NUM_OF_BITS_PER_BYTE; yuv_stream_index=yuv_stream_index+1)
{
printf("yuv_stream[%d] = %d\n", yuv_stream_index, yuv_stream[yuv_stream_index]);
}*/
yuv_stream[STREAM_WIDTH/NUM_OF_BITS_PER_BYTE] = '\0'; // this NULL character is only to act as "stop bit" for character array
// store the calculated output YUV pixels into fromlogic such that we could reconstruct the image in its original dimension for visual display
for(yuv_stream_index = 0; (yuv_stream_index+NUM_OF_COMPONENTS_IN_A_PIXEL-1)<(STREAM_WIDTH/NUM_OF_BITS_PER_BYTE); yuv_stream_index=yuv_stream_index+NUM_OF_COMPONENTS_IN_A_PIXEL)
{
fromlogic[(yuv_stream_index/NUM_OF_COMPONENTS_IN_A_PIXEL)+num_of_pixels_received].Y = yuv_stream[yuv_stream_index];
fromlogic[(yuv_stream_index/NUM_OF_COMPONENTS_IN_A_PIXEL)+num_of_pixels_received].U = yuv_stream[yuv_stream_index+1];
fromlogic[(yuv_stream_index/NUM_OF_COMPONENTS_IN_A_PIXEL)+num_of_pixels_received].V = yuv_stream[yuv_stream_index+NUM_OF_COMPONENTS_IN_A_PIXEL-1];
}
num_of_pixels_received = num_of_pixels_received + PIXEL_NUM_THAT_FITS_STREAM_WIDTH;
printf("num_of_pixels_received = %d\n\r", num_of_pixels_received);
//if(num_of_pixels_received == 40940) break; // just to test if there is actually something being read, or returned from hardware
}
else // the remaining pixels do not fill all five pixel slots for a 128-bit stream
{
//break; // just to test the rest of received bytes which is divisible by "STREAM_WIDTH/NUM_OF_BITS_PER_BYTE" bytes
rd_buf = yuv_stream;
printf("before read in else. \n");
allread(fdr, rd_buf, ((image_width * image_height)-num_of_pixels_received)*NUM_OF_COMPONENTS_IN_A_PIXEL); // is a partially filled 128-bit stream (with less than 5 pixels)
printf("after read in else. \n");
for(yuv_stream_index = 0; (yuv_stream_index+NUM_OF_COMPONENTS_IN_A_PIXEL-1)<(((image_width * image_height)-num_of_pixels_received)*NUM_OF_COMPONENTS_IN_A_PIXEL); yuv_stream_index=yuv_stream_index+NUM_OF_COMPONENTS_IN_A_PIXEL)
{
fromlogic[(yuv_stream_index/NUM_OF_COMPONENTS_IN_A_PIXEL)+num_of_pixels_received].Y = yuv_stream[yuv_stream_index];
fromlogic[(yuv_stream_index/NUM_OF_COMPONENTS_IN_A_PIXEL)+num_of_pixels_received].U = yuv_stream[yuv_stream_index+1];
fromlogic[(yuv_stream_index/NUM_OF_COMPONENTS_IN_A_PIXEL)+num_of_pixels_received].V = yuv_stream[yuv_stream_index+NUM_OF_COMPONENTS_IN_A_PIXEL-1];
}
break; // finish receiving all (image_width * image_height) pixels
}
}
printf("before for loop, data integrity check\n");
for (unsigned int i = 0; i < (image_width * image_height); i++) // check the perfomance of hardware with respect to software computation
{
#ifdef LOOPBACK
if( (tologic[i].R != fromlogic[i].Y) ||
(tologic[i].G != fromlogic[i].U) ||
(tologic[i].B != fromlogic[i].V) )
#elif RGB2YUV
uint8_t expected_Y = rgb2yuv(tologic[i])->Y;
uint8_t expected_U = rgb2yuv(tologic[i])->U;
uint8_t expected_V = rgb2yuv(tologic[i])->V;
if( (abs(expected_Y - fromlogic[i].Y) > 1) ||
(abs(expected_U - fromlogic[i].U) > 1) ||
(abs(expected_V - fromlogic[i].V) > 1) ) // rgb2yuv conversion hardware tolerance fails by more than 1 compared to software computation
#endif
{
printf("********************************* Attention *************************************\n\r");
printf("R:%d G:%d B:%d \n\r", tologic[i].R, tologic[i].G, tologic[i].B);
printf("Y:%d U:%d V:%d \n\r", fromlogic[i].Y, fromlogic[i].U, fromlogic[i].V);
#ifdef RGB2YUV
printf("expected_Y:%d expected_U:%d expected_V:%d \n\r", expected_Y, expected_U, expected_V);
#endif
break; // just for troubleshooting
//exit(1);
}
}
//free(tologic);
//free(fromlogic);
printf("after for loop, data integrity check\n");
close(fdr);
#ifdef FORK
printf("*** Read process enters waiting status .....\n");
pid = wait(&wait_status);
printf("*** read process detects write process with pid %d was done ***\n", pid); // most probably write process will be done first, since FPGA computation takes a few clock cyles
return 0;
}
#endif
/*pid = wait(&wait_status);
printf("*** Parent detects process %d is done ***\n", pid);
printf("*** Parent exits ***\n");*/
exit(0);
}
// g++ -g -pedantic -Wall -Werror -Wextra -fsanitize=address test.cpp -o test
#define FORK 1
#include <unistd.h>
#include <fcntl.h>
#include <iostream>
#include <fstream>
#include <sys/wait.h>
#include <errno.h>
using namespace std;
const unsigned int image_width = 512;
const unsigned int image_height = 512;
const unsigned int NUM_OF_COMPONENTS_IN_A_PIXEL = 3; // input: [R, G, B] output:[Y, U, V]
void allwrite(int fd, unsigned char *buf, int len) {
int sent = 0;
int rc;
while (sent < len) {
rc = write(fd, buf + sent, len - sent);//fsync(fd);
if ((rc < 0) && (errno == EINTR)){
printf("continue in allwrite\n");
continue;
}
if (rc < 0) {
perror("allwrite() failed to write");
exit(1);
}
if (rc == 0) {
fprintf(stderr, "Reached write EOF (?!)\n");
exit(1);
}
sent += rc;
}
//return sent;
}
void allread(int fd, unsigned char *buf, int len) {
int recvd = 0;
int rc;
while (recvd < len) {
rc = read(fd, buf + recvd, len - recvd);//fsync(fd);
if ((rc < 0) && (errno == EINTR))
continue;
if (rc < 0) {
perror("allwrite() failed to write");
exit(1);
}
if (rc == 0) {
fprintf(stderr, "Reached write EOF (?!)\n");
exit(1);
}
recvd += rc;
}
//return recvd;
}
int main() {
#ifdef FORK
int wait_status; // for wait()
pid_t pid;
#endif
int fdr, fdw;
uint8_t *wr_buf, *rd_buf;
fdr = open("/dev/xillybus_read_128", O_RDONLY);
fdw = open("/dev/xillybus_write_128", O_WRONLY);
if ((fdr < 0) || (fdw < 0)) {
perror("Failed to open Xillybus device file(s)");
exit(1);
}
#ifdef FORK
pid = fork();
if (pid < 0) {
perror("Failed to fork()");
exit(1);
}
if (pid) {
close(fdr);
#endif
uint8_t rgb_stream[image_width*image_width*NUM_OF_COMPONENTS_IN_A_PIXEL + 1]; // could accomodate all (image_width*image_width) RGB pixels
rgb_stream[image_width*image_width*NUM_OF_COMPONENTS_IN_A_PIXEL] = '\0'; // however, this NULL character is not sent across write()
for(unsigned int rgb_index=0; rgb_index<(image_width*image_width*NUM_OF_COMPONENTS_IN_A_PIXEL); rgb_index++)
{
rgb_stream[rgb_index] = 1; // send all ones to fpga
}
wr_buf = rgb_stream;
allwrite(fdw, wr_buf, image_width*image_width*NUM_OF_COMPONENTS_IN_A_PIXEL);
allwrite(fdw, NULL, 0); // flush the write stream
printf("after allwrite() \n");
close(fdw);
#ifdef FORK
printf("*** Write process enters waiting status .....\n");
pid = wait(&wait_status);
printf("*** write process detects read process with pid %d was done ***\n", pid); // most probably write process will be done first, since FPGA computation takes a few clock cyles
return 0;
}
else {
close(fdw);
#endif
uint8_t yuv_stream[image_width*image_width*NUM_OF_COMPONENTS_IN_A_PIXEL + 1]; // could accomodate (image_width*image_width) YUV pixels
yuv_stream[image_width*image_width*NUM_OF_COMPONENTS_IN_A_PIXEL] = '\0'; // this NULL character is only to act as "stop bit" for character array
rd_buf = yuv_stream;
printf("before allread() \n");
allread(fdr, rd_buf, image_width*image_width*NUM_OF_COMPONENTS_IN_A_PIXEL);
printf("after allread() \n");
close(fdr);
#ifdef FORK
printf("*** Read process enters waiting status .....\n");
pid = wait(&wait_status);
printf("*** read process detects write process with pid %d was done ***\n", pid); // most probably write process will be done first, since FPGA computation takes a few clock cyles
return 0;
}
#endif
}
// g++ -g -pedantic -Wall -Werror -Wextra -fsanitize=thread -fno-omit-frame-pointer thread.cpp -o thread `pkg-config --cflags --libs opencv` -std=c++17
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <unistd.h>
#include <memory>
#include <thread> // std::thread
#include <mutex> // std::mutex, std::scoped_lock
#include <stdexcept> // std::logic_error
#include <fcntl.h>
#include <iostream>
#include <fstream> // std::ifstream, std::ofstream
#include <string>
#include <sys/wait.h>
#include <errno.h>
#include <cmath>
#include "timer.h"
using namespace cv;
using namespace std;
//#define LOOPBACK 1
#define RGB2YUV 1
unsigned int image_width;
unsigned int image_height;
const unsigned int CHNL_NUM = 3;
const unsigned int RED_CHNL = 2;
const unsigned int GREEN_CHNL = 1;
const unsigned int BLUE_CHNL = 0;
const unsigned int Y_CHNL = 2;
const unsigned int U_CHNL = 1;
const unsigned int V_CHNL = 0;
const unsigned int STREAM_WIDTH = 128;
const unsigned int NUM_OF_BITS_PER_BYTE = 8;
const unsigned int PIXEL_VALUE_RANGE = 8; // number of bits occupied by [R, G, B] or [Y, U, V] respectively (8-bit unsigned integer for each components) , https://docs.microsoft.com/en-us/windows-hardware/drivers/display/yuv-rgb-data-range-conversions
const unsigned int NUM_OF_COMPONENTS_IN_A_PIXEL = 3; // input: [R, G, B] output:[Y, U, V]
const unsigned int PIXEL_NUM_THAT_FITS_STREAM_WIDTH = 5; // 128-bit stream can at most fits 5 pixels ((PIXEL_NUM_THAT_FITS_STREAM_WIDTH*NUM_OF_COMPONENTS_IN_A_PIXEL*PIXEL_VALUE_RANGE) bits = 120 bits), each pixels contains R, G, B which are encoded in 8 bits for each of the three color components
const unsigned int NUM_OF_THREAD = 2; // one write thread, one read thread
struct RGB_packet{
uint8_t R,G,B;
};
struct YUV_packet{
uint8_t Y,U,V;
};
struct YUV_packet* rgb2yuv(unsigned char R, unsigned char G, unsigned char B) // convert rgb to yuv
{
int Y_temp, U_temp, V_temp;
vector<YUV_packet> vyuv_result = vector<YUV_packet>(sizeof(unsigned char) * NUM_OF_COMPONENTS_IN_A_PIXEL);
struct YUV_packet *yuv_result = vyuv_result.data();
// https://en.wikipedia.org/wiki/YUV#Full_swing_for_BT.601
Y_temp = 77*R + 150*G + 29*B;
U_temp = -43*R - 84*G + 127*B;
V_temp = 127*R - 106*G - 21*B;
Y_temp = (Y_temp + 128) >> 8;
U_temp = (U_temp + 128) >> 8;
V_temp = (V_temp + 128) >> 8;
yuv_result->Y = Y_temp;
yuv_result->U = U_temp + 128;
yuv_result->V = V_temp + 128;
return yuv_result;
}
class logic_array
{
static vector<struct RGB_packet> tologic;
static vector<struct YUV_packet> fromlogic;
static mutex tologic_mutex;
static mutex fromlogic_mutex;
public:
logic_array(bool init_mem) // initiates memory pointers for RGB and YUV pixels
{
if(init_mem)
{
logic_array::tologic.resize(image_width * image_height) ; // lena.tiff is sized as 3*512*512
logic_array::fromlogic.resize(image_width * image_height);
}
}
vector<struct RGB_packet> get_tologic() { return tologic; }
vector<struct YUV_packet> get_fromlogic() { return fromlogic; }
unsigned char read_from_tologic(unsigned int color_channel, unsigned int array_index)
{
scoped_lock lock(logic_array::tologic_mutex);
unsigned char value = 0;
if(color_channel == RED_CHNL) value = logic_array::tologic[array_index].R;
else if(color_channel == GREEN_CHNL) value = logic_array::tologic[array_index].G;
else if(color_channel == BLUE_CHNL) value = logic_array::tologic[array_index].B;
return value;
}
unsigned char read_from_fromlogic(unsigned int color_channel, unsigned int array_index)
{
scoped_lock lock(logic_array::fromlogic_mutex);
unsigned char value = 0;
if(color_channel == Y_CHNL) value = logic_array::fromlogic[array_index].Y;
else if(color_channel == U_CHNL) value = logic_array::fromlogic[array_index].U;
else if(color_channel == V_CHNL) value = logic_array::fromlogic[array_index].V;
return value;
}
void write_into_tologic(unsigned char value, unsigned int color_channel, unsigned int array_index)
{
scoped_lock lock(logic_array::tologic_mutex);
if(color_channel == RED_CHNL) logic_array::tologic[array_index].R = value;
else if(color_channel == GREEN_CHNL) logic_array::tologic[array_index].G = value;
else if(color_channel == BLUE_CHNL) logic_array::tologic[array_index].B = value;
}
void write_into_fromlogic(unsigned char value, unsigned int color_channel, unsigned int array_index)
{
scoped_lock lock(logic_array::fromlogic_mutex);
if(color_channel == Y_CHNL) logic_array::fromlogic[array_index].Y = value;
else if(color_channel == U_CHNL) logic_array::fromlogic[array_index].U = value;
else if(color_channel == V_CHNL) logic_array::fromlogic[array_index].V = value;
}
};
// static inits
std::vector<struct RGB_packet> logic_array::tologic = std::vector<struct RGB_packet>();
std::vector<struct YUV_packet> logic_array::fromlogic = std::vector<struct YUV_packet>();
mutex logic_array::tologic_mutex = mutex();
mutex logic_array::fromlogic_mutex = mutex();
/*
Plain write() may not write all bytes requested in the buffer, so
allwrite() loops until all data was indeed written, or exits in
case of failure, except for EINTR. The way the EINTR condition is
handled is the standard way of making sure the process can be suspended
with CTRL-Z and then continue running properly.
The function has no return value, because it always succeeds (or exits
instead of returning).
The function doesn't expect to reach EOF either.
*/
void allwrite(int fd, unsigned char *buf, unsigned int len) {
unsigned int sent = 0;
int rc;
while (sent < len) {
rc = write(fd, buf + sent, len - sent);fsync(fd);
if ((rc < 0) && (errno == EINTR)){
printf("continue in allwrite\n");
continue;
}
if (rc < 0) {
perror("allwrite() failed to write");
exit(1);
}
if (rc == 0) {
fprintf(stderr, "Reached write EOF (?!)\n");
exit(1);
}
sent += rc;
}
//return sent;
}
void allread(int fd, unsigned char *buf, unsigned int len) {
unsigned int recvd = 0;
int rc;
while (recvd < len) {
//if(len < STREAM_WIDTH/NUM_OF_BITS_PER_BYTE) printf("before last read\n");
rc = read(fd, buf + recvd, len - recvd);fsync(fd);
//if(len < STREAM_WIDTH/NUM_OF_BITS_PER_BYTE) printf("rc = %d\n", rc);
if ((rc < 0) && (errno == EINTR)){
printf("continue in allread\n");
continue;
}
if (rc < 0) {
perror("allwrite() failed to write");
exit(1);
}
if (rc == 0) {
fprintf(stderr, "Reached write EOF (?!)\n");
exit(1);
}
recvd += rc;
}
//return recvd;
}
void write_thread(Mat *rgbchannel){
int fdw;
uint8_t *wr_buf;
fdw = open("/dev/xillybus_write_128", O_WRONLY); // will change to /dev/xillybus_write_128
if (fdw < 0) {
perror("Failed to open /dev/xillybus_write_128");
exit(1);
}
logic_array logic(0);
for(unsigned int pixel_index = 0; pixel_index < (image_width * image_height); pixel_index++)
{
logic.write_into_tologic(*(rgbchannel[RED_CHNL].data + pixel_index), RED_CHNL, pixel_index);
logic.write_into_tologic(*(rgbchannel[GREEN_CHNL].data + pixel_index), GREEN_CHNL, pixel_index);
logic.write_into_tologic(*(rgbchannel[BLUE_CHNL].data + pixel_index), BLUE_CHNL, pixel_index);
}
unsigned int num_of_pixels_sent = 0; // this is actual pixels number already sent, does not include the empty 8 bits
//unsigned int if_index = 0;
unsigned int rgb_stream_index = 0;
uint8_t rgb_stream[STREAM_WIDTH/NUM_OF_BITS_PER_BYTE + 1]; // could accomodate 5 pixels
while (num_of_pixels_sent < image_width * image_height)
{
if(((image_width * image_height)-num_of_pixels_sent) >= PIXEL_NUM_THAT_FITS_STREAM_WIDTH)
{
// arrange the five pixels in the format as in https://i.imgur.com/mdJwk7J.png
//if_index++; //printf("if_index = %d\n\r", if_index);
//if(if_index == 3) break;
for(rgb_stream_index = 0; (rgb_stream_index+NUM_OF_COMPONENTS_IN_A_PIXEL-1)<((STREAM_WIDTH/NUM_OF_BITS_PER_BYTE)-1); rgb_stream_index=rgb_stream_index+NUM_OF_COMPONENTS_IN_A_PIXEL)
{
rgb_stream[rgb_stream_index] = logic.read_from_tologic(RED_CHNL , (rgb_stream_index/NUM_OF_COMPONENTS_IN_A_PIXEL)+num_of_pixels_sent);
rgb_stream[rgb_stream_index+1] = logic.read_from_tologic(GREEN_CHNL , (rgb_stream_index/NUM_OF_COMPONENTS_IN_A_PIXEL)+num_of_pixels_sent);
rgb_stream[rgb_stream_index+NUM_OF_COMPONENTS_IN_A_PIXEL-1] = logic.read_from_tologic(BLUE_CHNL , (rgb_stream_index/NUM_OF_COMPONENTS_IN_A_PIXEL)+num_of_pixels_sent);
}
rgb_stream[STREAM_WIDTH/NUM_OF_BITS_PER_BYTE] = '\0'; // however, this NULL character is not sent across write()
rgb_stream[(STREAM_WIDTH/NUM_OF_BITS_PER_BYTE)-1] = 0; // remember that the eight most significant bits of the 128-bits stream are ignored by hardware logic
/*for(unsigned int j=0; j<(STREAM_WIDTH/NUM_OF_BITS_PER_BYTE); j++)
{
printf("rgb_stream[%d] = %d\n\r", j, rgb_stream[j]); //break;
}*/
wr_buf = rgb_stream; // write() writes wr_buf in first-in-first-out order, so rgb_stream[0] will be written first into fdw as the least significant byte
allwrite(fdw, wr_buf, STREAM_WIDTH/NUM_OF_BITS_PER_BYTE); // this write() is 128-bits or 16 bytes which include the empty MSB 8 bits
//printf("wr = %d\n", wr);
num_of_pixels_sent = num_of_pixels_sent + PIXEL_NUM_THAT_FITS_STREAM_WIDTH;
//printf("num_of_pixels_sent = %d\n", num_of_pixels_sent);
}
else // the remaining pixels do not fill all five pixel slots for a 128-bit stream
{
//break; // just to send bytes which is divisible by "STREAM_WIDTH/NUM_OF_BITS_PER_BYTE" bytes
for(rgb_stream_index = 0; (rgb_stream_index+NUM_OF_COMPONENTS_IN_A_PIXEL-1)<(((image_width * image_height)-num_of_pixels_sent)*NUM_OF_COMPONENTS_IN_A_PIXEL); rgb_stream_index=rgb_stream_index+NUM_OF_COMPONENTS_IN_A_PIXEL)
{
rgb_stream[rgb_stream_index] = logic.read_from_tologic(RED_CHNL , (rgb_stream_index/NUM_OF_COMPONENTS_IN_A_PIXEL)+num_of_pixels_sent);
rgb_stream[rgb_stream_index+1] = logic.read_from_tologic(GREEN_CHNL , (rgb_stream_index/NUM_OF_COMPONENTS_IN_A_PIXEL)+num_of_pixels_sent);
rgb_stream[rgb_stream_index+NUM_OF_COMPONENTS_IN_A_PIXEL-1] = logic.read_from_tologic(BLUE_CHNL , (rgb_stream_index/NUM_OF_COMPONENTS_IN_A_PIXEL)+num_of_pixels_sent);
}
for(rgb_stream_index = (((image_width * image_height)-num_of_pixels_sent)*NUM_OF_COMPONENTS_IN_A_PIXEL); rgb_stream_index<((STREAM_WIDTH/NUM_OF_BITS_PER_BYTE)-1); rgb_stream_index=rgb_stream_index+1)
{
rgb_stream[rgb_stream_index] = 0; // If the data doesn't end on a 16-byte boundary, consider padding with dummy data (usually zeros).
}
rgb_stream[STREAM_WIDTH/NUM_OF_BITS_PER_BYTE] = '\0'; // however, this NULL character is not sent across write()
rgb_stream[(STREAM_WIDTH/NUM_OF_BITS_PER_BYTE)-1] = (image_width * image_height)-num_of_pixels_sent; // remember that the eight most significant bits of the 128-bits stream are ignored by hardware logic, but not in the last 128-bit data transaction, since this will help with hardware 'ap_start' and 'is_last_few_pixels' signals
/*for(unsigned int j=0; j<(((image_width * image_height)-num_of_pixels_sent)*NUM_OF_COMPONENTS_IN_A_PIXEL+1); j++)
{
printf("rgb_stream[%d] = %d\n\r", j, rgb_stream[j]);
}*/
wr_buf = rgb_stream; // this is a partially filled 128-bit stream (with less than 5 pixels)
allwrite(fdw, wr_buf, STREAM_WIDTH/NUM_OF_BITS_PER_BYTE); // this is a write() which contains some dummy zero data and less than 5 pixels
//printf("wr = %d\n", wr);
break; // finish sending all (image_width * image_height) pixels
}
}
write(fdw, NULL, 0); // flush the write stream
close(fdw);
}
void read_thread(){
int fdr;
uint8_t *rd_buf;
fdr = open("/dev/xillybus_read_128", O_RDONLY); // will change to /dev/xillybus_read_128
if (fdr < 0) {
perror("Failed to open /dev/xillybus_read_128");
exit(1);
}
logic_array logic(0);
unsigned int num_of_pixels_received = 0; // this is actual pixels number already received, does not include the empty 8 bits
unsigned int yuv_stream_index = 0;
uint8_t yuv_stream[STREAM_WIDTH/NUM_OF_BITS_PER_BYTE + 1]; // could accomodate 5 pixels
while (num_of_pixels_received < image_width * image_height) {
if(((image_width * image_height)-num_of_pixels_received) >= PIXEL_NUM_THAT_FITS_STREAM_WIDTH)
{
rd_buf = yuv_stream;
//printf("before read() \n");
allread(fdr, rd_buf, STREAM_WIDTH/NUM_OF_BITS_PER_BYTE);
//printf("after read() \n");
// For every five pixels (128 bits) received from hardware logic computation, print out the YUV values of all five pixels
/*for(yuv_stream_index = 0; yuv_stream_index<STREAM_WIDTH/NUM_OF_BITS_PER_BYTE; yuv_stream_index=yuv_stream_index+1)
{
printf("yuv_stream[%d] = %d\n", yuv_stream_index, yuv_stream[yuv_stream_index]);
}*/
yuv_stream[STREAM_WIDTH/NUM_OF_BITS_PER_BYTE] = '\0'; // this NULL character is only to act as "stop bit" for character array
// store the calculated output YUV pixels into fromlogic such that we could reconstruct the image in its original dimension for visual display
for(yuv_stream_index = 0; (yuv_stream_index+NUM_OF_COMPONENTS_IN_A_PIXEL-1)<(STREAM_WIDTH/NUM_OF_BITS_PER_BYTE); yuv_stream_index=yuv_stream_index+NUM_OF_COMPONENTS_IN_A_PIXEL)
{
logic.write_into_fromlogic(yuv_stream[yuv_stream_index], Y_CHNL, (yuv_stream_index/NUM_OF_COMPONENTS_IN_A_PIXEL)+num_of_pixels_received);
logic.write_into_fromlogic(yuv_stream[yuv_stream_index+1], U_CHNL, (yuv_stream_index/NUM_OF_COMPONENTS_IN_A_PIXEL)+num_of_pixels_received);
logic.write_into_fromlogic(yuv_stream[yuv_stream_index+NUM_OF_COMPONENTS_IN_A_PIXEL-1], V_CHNL, (yuv_stream_index/NUM_OF_COMPONENTS_IN_A_PIXEL)+num_of_pixels_received);
}
num_of_pixels_received = num_of_pixels_received + PIXEL_NUM_THAT_FITS_STREAM_WIDTH;
//printf("num_of_pixels_received = %d\n\r", num_of_pixels_received);
//if(num_of_pixels_received == 40940) break; // just to test if there is actually something being read, or returned from hardware
}
else // the remaining pixels do not fill all five pixel slots for a 128-bit stream
{
//break; // just to test the rest of received bytes which is divisible by "STREAM_WIDTH/NUM_OF_BITS_PER_BYTE" bytes
rd_buf = yuv_stream;
//printf("before read in else. \n");
allread(fdr, rd_buf, ((image_width * image_height)-num_of_pixels_received)*NUM_OF_COMPONENTS_IN_A_PIXEL); // is a partially filled 128-bit stream (with less than 5 pixels)
//printf("after read in else. \n");
for(yuv_stream_index = 0; (yuv_stream_index+NUM_OF_COMPONENTS_IN_A_PIXEL-1)<(((image_width * image_height)-num_of_pixels_received)*NUM_OF_COMPONENTS_IN_A_PIXEL); yuv_stream_index=yuv_stream_index+NUM_OF_COMPONENTS_IN_A_PIXEL)
{
logic.write_into_fromlogic(yuv_stream[yuv_stream_index], Y_CHNL, (yuv_stream_index/NUM_OF_COMPONENTS_IN_A_PIXEL)+num_of_pixels_received);
logic.write_into_fromlogic(yuv_stream[yuv_stream_index+1], U_CHNL, (yuv_stream_index/NUM_OF_COMPONENTS_IN_A_PIXEL)+num_of_pixels_received);
logic.write_into_fromlogic(yuv_stream[yuv_stream_index+NUM_OF_COMPONENTS_IN_A_PIXEL-1], V_CHNL, (yuv_stream_index/NUM_OF_COMPONENTS_IN_A_PIXEL)+num_of_pixels_received);
}
break; // finish receiving all (image_width * image_height) pixels
}
}
close(fdr);
}
bool check_data(){
logic_array logic(0);
//printf("before for loop, data integrity check\n");
for (unsigned int i = 0; i < (image_width * image_height); i++) // check the perfomance of hardware with respect to software computation
{
uint8_t tologic_R = logic.read_from_tologic(RED_CHNL , i);
uint8_t tologic_G = logic.read_from_tologic(GREEN_CHNL , i);
uint8_t tologic_B = logic.read_from_tologic(BLUE_CHNL , i);
uint8_t fromlogic_Y = logic.read_from_fromlogic(Y_CHNL , i);
uint8_t fromlogic_U = logic.read_from_fromlogic(U_CHNL , i);
uint8_t fromlogic_V = logic.read_from_fromlogic(V_CHNL , i);
#ifdef LOOPBACK
if( (tologic_R != fromlogic_Y) ||
(tologic_G != fromlogic_U) ||
(tologic_B != fromlogic_V) )
#elif RGB2YUV
uint8_t expected_Y = rgb2yuv(tologic_R, tologic_G, tologic_B)->Y;
uint8_t expected_U = rgb2yuv(tologic_R, tologic_G, tologic_B)->U;
uint8_t expected_V = rgb2yuv(tologic_R, tologic_G, tologic_B)->V;
if( (abs(expected_Y - fromlogic_Y) > 1) ||
(abs(expected_U - fromlogic_U) > 1) ||
(abs(expected_V - fromlogic_V) > 1) ) // rgb2yuv conversion hardware tolerance fails by more than 1 compared to software computation
#endif
{
printf("********************************* Attention *************************************\n\r");
printf("R:%d G:%d B:%d \n\r", tologic_R, tologic_G, tologic_B);
printf("Y:%d U:%d V:%d \n\r", fromlogic_Y, fromlogic_U, fromlogic_V);
#ifdef RGB2YUV
printf("i:%d expected_Y:%d expected_U:%d expected_V:%d \n\r", i, expected_Y, expected_U, expected_V);
#endif
return false;
}
}
//printf("after for loop, data integrity check\n");
return true;
}
void display_yuv_image()
{
uint8_t *yuv_array = new uint8_t[image_height*image_width*NUM_OF_COMPONENTS_IN_A_PIXEL+1];
logic_array logic(0);
for(unsigned int yuv_array_index = 0; (yuv_array_index+NUM_OF_COMPONENTS_IN_A_PIXEL-1) < (image_height*image_width*NUM_OF_COMPONENTS_IN_A_PIXEL); yuv_array_index = yuv_array_index + NUM_OF_COMPONENTS_IN_A_PIXEL)
{
uint8_t fromlogic_Y = logic.read_from_fromlogic(Y_CHNL , yuv_array_index/NUM_OF_COMPONENTS_IN_A_PIXEL);
uint8_t fromlogic_U = logic.read_from_fromlogic(U_CHNL , yuv_array_index/NUM_OF_COMPONENTS_IN_A_PIXEL);
uint8_t fromlogic_V = logic.read_from_fromlogic(V_CHNL , yuv_array_index/NUM_OF_COMPONENTS_IN_A_PIXEL);
yuv_array[yuv_array_index] = fromlogic_V;
yuv_array[yuv_array_index+1] = fromlogic_U;
yuv_array[yuv_array_index+NUM_OF_COMPONENTS_IN_A_PIXEL-1] = fromlogic_Y;
}
yuv_array[image_height*image_width*NUM_OF_COMPONENTS_IN_A_PIXEL] = '\0';
// https://docs.opencv.org/2.4/modules/core/doc/basic_structures.html#Mat::Mat(int%20rows,%20int%20cols,%20int%20type,%20void*%20data,%20size_t%20step)
// Mat(int rows, int cols, int type, void* data, size_t step=AUTO_STEP)
// To create an image
// CV_8UC3 depicts : (3 channels,8 bit image depth
Mat yuv_img(image_height, image_width, CV_8UC3, yuv_array);
namedWindow("YUV_output", WINDOW_AUTOSIZE);
imshow("YUV_output", yuv_img);
Mat yuvchannel[CHNL_NUM];
// The actual splitting into Y, U and V.
split(yuv_img, yuvchannel);
namedWindow("Y_channel", WINDOW_AUTOSIZE);
imshow("Y_channel", yuvchannel[Y_CHNL]);
namedWindow("U_channel", WINDOW_AUTOSIZE);
imshow("U_channel", yuvchannel[U_CHNL]);
namedWindow("V_channel", WINDOW_AUTOSIZE);
imshow("V_channel", yuvchannel[V_CHNL]);
waitKey(0); // allow infinite time to view the YUV output image until any key is pressed
// destroy the window with the name, "YUV_output", "Y_channel", "U_channel", "V_channel"
destroyWindow("YUV_output"); destroyWindow("Y_channel"); destroyWindow("U_channel"); destroyWindow("V_channel");
imwrite("lena.png" , yuv_img); // save the yuv output image to a file
delete yuv_array; // free the dynamic memory
}
int main(int argc, char *argv[]) {
// READ in an image file
String imageName( "lena512color.tiff" ); // by default
if( argc > 1)
{
imageName = argv[1];
}
Mat image;
image = imread( imageName, IMREAD_COLOR ); // Read the file
if( image.empty() ) // Check for invalid input
{
cout << "Could not open or find the image" << std::endl ;
return -1;
}
else
{
image_width = image.size().width;
image_height = image.size().height;
}
namedWindow( "Original Image", WINDOW_AUTOSIZE );
imshow( "Original Image", image );
Mat rgbchannel[CHNL_NUM];
// The actual splitting.
split(image, rgbchannel);
namedWindow("Red", WINDOW_AUTOSIZE);
imshow("Red", rgbchannel[RED_CHNL]);
namedWindow("Green", WINDOW_AUTOSIZE);
imshow("Green", rgbchannel[GREEN_CHNL]);
namedWindow("Blue", WINDOW_AUTOSIZE);
imshow("Blue", rgbchannel[BLUE_CHNL]);
waitKey(0); // see all three split channels before feeding in the channel data to xillybus/RIFFA for hardware computation
logic_array logic(1); // memory initialization
thread wr_th (write_thread, rgbchannel); // spawn new thread that calls write_thread()
thread rd_th (read_thread); // spawn new thread that calls read_thread()
//Initialize timer
GET_TIME_INIT(NUM_OF_THREAD);
GET_TIME_VAL(0);
// synchronize threads:
wr_th.join(); // pauses until wr_th finishes
rd_th.join(); // pauses until rd_th finishes
GET_TIME_VAL(1);
printf("round-trip time: %f us\n\r", (TIME_VAL_TO_MS(1) - TIME_VAL_TO_MS(0))*1000.0);
printf("overall bw: %f MegaBytes/second\n\r", image_width*image_height*NUM_OF_COMPONENTS_IN_A_PIXEL/((TIME_VAL_TO_MS(1) - TIME_VAL_TO_MS(0))/1000.0));
printf("Now, we are going to check the result returned from FPGA\n");
if(check_data() == true)
{
printf("all computation results are correct !\n");
display_yuv_image();
}
// destroy the windows with the name, "Red", "Green", "Blue"
destroyWindow("Red"); destroyWindow("Green"); destroyWindow("Blue");
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment