-
-
Save berak/ccad7b4c0727d7ccda9c to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <opencv2/opencv.hpp> | |
using namespace std; | |
#include <cstdio> | |
#include <iostream> | |
using namespace cv; | |
// | |
// | |
// "Multimode Spatiotemporal Background Modeling for Complex Scenes" | |
// Sun, Li ; De Neyer, Quentin ; De Vleeschouwer, Christophe | |
// (EUSIPCO 2012) | |
// | |
//per pixel: | |
// (model update) | |
// per existing mode: | |
// if neighbourhood looks similar, | |
// increase freq and interpol value | |
// else add a new mode | |
// (find smallest frq and replace that mode, | |
// freq=0, v=pixel with largest distance(?) ) | |
// decrease frq for all modes to remove 'unseen' ones | |
// | |
// (seperation) | |
// find mode with highest freq, | |
// if freq larger than threshold -> background | |
// | |
// | |
struct MSBP | |
{ | |
ushort M; // num modes | |
ushort fi; // increase freq by fi | |
ushort F; // saturation /threshold value for freq (learning rate param) | |
double chroma; // factor for chroma distance | |
double thresh_d; // distance threshold for neighbour evaluation | |
vector< Mat_< Vec3b > > model_y; // yuv | |
vector< Mat_< ushort > > model_f; // freq | |
MSBP() : M(10),fi(2),F(200),chroma(1.2),thresh_d(2000) {} // paper, footnote 2 (i probably missed any value there for M(or K)) | |
bool empty() | |
{ | |
return model_y.empty(); | |
} | |
void reset(const Mat &yuv) | |
{ | |
model_y.resize(M); | |
model_f.resize(M); | |
model_y[0] = Mat_<Vec3b>(yuv.size(), 0); | |
model_f[0] = Mat_<ushort>(yuv.size(), 1); // needed for 1st. iteration (slightly different approach to the paper, but not much, imho) | |
for (int i=1; i<M; i++) | |
{ | |
model_y[i] = Mat_<Vec3b>(yuv.size(), 0); | |
model_f[i] = Mat_<ushort>(yuv.size(), 0); | |
} | |
} | |
inline | |
double dist(const Vec3b &a, const Vec3b &b) | |
{ | |
double dy = (a[0]-b[0]); | |
double du = (a[1]-b[1]); | |
double dv = (a[2]-b[2]); | |
return dy*dy + chroma * (du*du + dv*dv); | |
} | |
void segment(const Mat &bgr, Mat &mask) | |
{ | |
Mat_<Vec3b> yuv; | |
cvtColor(bgr, yuv, COLOR_BGR2YUV); | |
Mat_<uchar> seg(yuv.size(), 255); // init to fg | |
if (empty()) | |
reset(yuv); | |
const static int nb[9][2] = {0,0, -1,0, -1,1, 0,1, 1,1, 1,0, 1,-1, 0,-1, -1,-1}; | |
for (int y=1; y<yuv.rows-1; y++) | |
{ | |
for (int x=1; x<yuv.cols-1; x++) | |
{ | |
int maxI = -1; | |
int maxM = -1; | |
for (int m=0; m<M; m++) | |
{ | |
ushort &freq = model_f[m](y,x); | |
if (freq == 0) continue; | |
Vec3b &p = model_y[m](y,x); | |
// sample 3x3 neighbourhood | |
for (int n=0; n<9; n++) | |
{ | |
Vec3b &q = yuv(y + nb[n][0], x + nb[n][1]); | |
double d = dist(p,q); | |
if (d > thresh_d) | |
{ | |
maxI=n; | |
maxM=m; | |
break; // do we really need the 'largest' one, or will the 'first' one do ? | |
} | |
} | |
if (maxM == -1) | |
{ | |
if (freq < F) | |
freq += fi; | |
// todo : interpolation, beta <--> 1-beta , as mentionend in the paper, i don't bet much on it atm. | |
} | |
else | |
{ | |
break; | |
} | |
} | |
if (maxM != -1) // new mode | |
{ | |
// find a free slot(or the one with smallest freq): | |
int minF = 100000; | |
int minId = -1; | |
for (int m=0; m<M; m++) | |
{ | |
if (m == maxM) continue; | |
int f = model_f[m](y,x); | |
if (f < minF) | |
{ | |
minF=f; | |
minId=m; | |
} | |
if (f == 0) break; | |
} | |
// assign new mode | |
model_f[minId](y,x) = fi; | |
model_y[minId](y,x) = yuv(y + nb[maxI][0], x + nb[maxI][1]); | |
} | |
for (int m=0; m<M; m++) | |
{ | |
if (model_f[m](y,x) >= F) | |
{ | |
seg(y,x) = 0; // it's a bg pixel | |
break; | |
} | |
} | |
for (int m=0; m<M; m++) | |
{ | |
if (model_f[m](y,x)>0) | |
model_f[m](y,x) --; | |
// hmm, decreasing by one, but increasing by fi(larger) seems to lead to a lot of 'ghosts' | |
} | |
} | |
} | |
mask = seg; | |
} | |
}; | |
int main() | |
{ | |
MSBP bg; // how you'd call it ? the 2 most difficult things in cs are: 1. cache invalidation 2. naming things 3. one-offf errors. | |
VideoCapture cap(0); | |
// setup some sliders: | |
int thresh=bg.thresh_d; | |
int F=bg.F; | |
int fi = bg.fi; | |
namedWindow("controls"); | |
createTrackbar("F","controls",&F,1000); | |
createTrackbar("fi","controls",&fi,10); | |
createTrackbar("t", "controls",&thresh,10000); | |
while( cap.isOpened() ) | |
{ | |
bg.thresh_d = (double)thresh; | |
bg.F = (ushort)F; | |
bg.fi = (ushort)fi; | |
Mat frame; | |
if ( ! cap.read(frame) ) | |
break; | |
Mat mask; | |
bg.segment(frame,mask); | |
Mat segc; | |
cvtColor(mask, segc, cv::COLOR_GRAY2BGR); | |
addWeighted(frame,0.4, segc, 0.6, 0, frame ); | |
imshow("msbp",frame); | |
int k = waitKey(10); | |
if ( k==27 ) | |
break; | |
if ( k==' ' ) | |
bg.reset(frame); | |
cerr << "."; // roughly 4fps on an outdated 2ghz athlon2 | |
} | |
return 0; // atm, it's not the timing, the output kinda sucks. | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment