Created
March 15, 2018 21:07
-
-
Save wojteklu/4de1929f34534b61fbc5264a44d670e4 to your computer and use it in GitHub Desktop.
Generating a saliency map with the spectral residual approach
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// | |
// Copyright © 2018 wojteklu. All rights reserved. | |
// | |
#import <UIKit/UIKit.h> | |
@interface SaliencyDetector: NSObject | |
-(CGRect)findMostProminentPartOfImage:(UIImage *)image; | |
-(UIImage *)selectMostProminentPartOfImage:(UIImage *)image; | |
-(UIImage *)generateSaliencyMapOfImage:(UIImage *)image; | |
@end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// | |
// Copyright © 2018 wojteklu. All rights reserved. | |
// | |
#import "SaliencyDetector.h" | |
#import "UIImage+Mat.h" | |
using namespace std; | |
using namespace cv; | |
@implementation SaliencyDetector | |
#pragma mark - public methods | |
-(CGRect)findMostProminentPartOfImage:(UIImage *)image { | |
Mat input = [UIImage matFromImage:image]; | |
Mat saliencyMap = [self calculateSaliencyOfImage:input]; | |
cv::Rect rect = [self findBiggestContourRectFromMat:saliencyMap]; | |
return CGRectMake(rect.x, rect.y, rect.width, rect.height); | |
} | |
-(UIImage *)selectMostProminentPartOfImage:(UIImage *)image { | |
Mat input = [UIImage matFromImage:image]; | |
Mat saliencyMap = [self calculateSaliencyOfImage:input]; | |
cv::Rect rect = [self findBiggestContourRectFromMat:saliencyMap]; | |
rectangle(input, rect, CV_RGB(0,0,255), 5); | |
return [UIImage imageFromMat:input withOrientation:image.imageOrientation]; | |
} | |
-(UIImage *)generateSaliencyMapOfImage:(UIImage *)image { | |
Mat input = [UIImage matFromImage:image]; | |
Mat saliencyMap = [self calculateSaliencyOfImage:input]; | |
return [UIImage imageFromMat:saliencyMap withOrientation:image.imageOrientation]; | |
} | |
#pragma mark - spectral residual approach | |
-(Mat)calculateSaliencyOfImage:(Mat)image { | |
Mat magnitude; | |
cvtColor(image, image, COLOR_RGB2BGR); | |
// calculate saliency magnitude of each channel independently | |
Mat channels[3]; | |
split(image, channels); | |
Mat magnitudes[3]; | |
for (int i=0; i<3; i++) { | |
magnitudes[i] = [self calculateMagnitudeOfChannel:channels[i]]; | |
} | |
// calculate overall salience of a multi-channel image | |
// determined by the average over all channels | |
Mat output, average; | |
calcCovarMatrix(magnitudes, 3, output, average, CV_COVAR_NORMAL); | |
// blur to make the result appear smoother | |
GaussianBlur(average, magnitude, cv::Size( 5, 5 ), 8); | |
// square matrix to highlight the regions of high salience | |
magnitude = magnitude.mul(magnitude); | |
// normalize values, so that the largest value is 1 | |
double minVal, maxVal; | |
minMaxLoc( magnitude, &minVal, &maxVal ); | |
magnitude = magnitude / maxVal; | |
magnitude.convertTo(magnitude, CV_32F); | |
// scale it back up to its original resolution | |
resize(magnitude, magnitude, image.size()); | |
// threshold the saliency map | |
magnitude = magnitude * 255; | |
magnitude.convertTo(magnitude, CV_8U); | |
Mat saliencyMap; | |
threshold(magnitude, saliencyMap, 0, 255, THRESH_BINARY + THRESH_OTSU); | |
return saliencyMap; | |
} | |
-(Mat)calculateMagnitudeOfChannel:(Mat)channel { | |
cv::Size smallFrame(64, 64); | |
resize(channel, channel, smallFrame); | |
Mat mergedImage(smallFrame, CV_64FC2); | |
Mat imageDFT; | |
std::vector<Mat> vector; | |
Mat realImage(smallFrame, CV_64F); | |
channel.convertTo(realImage, CV_64F); | |
Mat imaginaryImage(smallFrame, CV_64F); | |
imaginaryImage.setTo(0); | |
vector.push_back(realImage); | |
vector.push_back(imaginaryImage); | |
// calculate the magnitude and phase of Fourier spectrum | |
merge(vector, mergedImage); | |
dft(mergedImage, imageDFT, DFT_COMPLEX_OUTPUT); | |
split(imageDFT, vector ); | |
Mat angle(smallFrame, CV_64F); | |
Mat magnitude(smallFrame, CV_64F); | |
cartToPolar(vector.at(0), vector.at(1), magnitude, angle, false ); | |
// calculate log magnitude of Fourier spectrum | |
Mat logAmplitude; | |
log(magnitude, logAmplitude); | |
// approximate the averaged spectrum of a typical natural image | |
// by convolving the image with a local averaging filter | |
Mat logAmplitudeBlur; | |
blur( logAmplitude, logAmplitudeBlur, cv::Size(3, 3)); | |
// calculate the spectral residual. The spectral residual primarily | |
// contains the nontrivial parts of a scene | |
exp(logAmplitude - logAmplitudeBlur, magnitude); | |
// calculate the saliency map by using the inverse Fourier transform | |
polarToCart(magnitude, angle, vector.at(0), vector.at(1), false ); | |
merge(vector, imageDFT); | |
idft(imageDFT, mergedImage); | |
split(mergedImage, vector); | |
cartToPolar(vector.at(0), vector.at(1), magnitude, angle, false ); | |
return magnitude; | |
} | |
#pragma mark - finding biggest contour | |
-(cv::Rect)findBiggestContourRectFromMat:(Mat)mat { | |
std::vector<std::vector<cv::Point> > contours; | |
std::vector<cv::Vec4i> hierarchy; | |
cv::findContours(mat, contours, hierarchy, CV_RETR_EXTERNAL, CV_CHAIN_APPROX_SIMPLE, cv::Point(0, 0) ); | |
std::sort(contours.begin(), contours.end(), compareContourAreas); | |
if (contours.size() == 0) { | |
return cv::Rect(0,0,0,0); | |
} | |
std::vector<cv::Point> biggestContour = contours[contours.size()-1]; | |
return boundingRect(biggestContour); | |
} | |
bool compareContourAreas(std::vector<cv::Point> contour1, std::vector<cv::Point> contour2) { | |
return fabs(contourArea(cv::Mat(contour1))) < fabs(contourArea(cv::Mat(contour2))); | |
} | |
@end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// | |
// Copyright © 2018 wojteklu. All rights reserved. | |
// | |
#ifdef __cplusplus | |
#undef NO | |
#undef YES | |
#import <opencv2/opencv.hpp> | |
#import <opencv2/imgcodecs/ios.h> | |
#endif | |
#import <UIKit/UIKit.h> | |
@interface UIImage (Mat) | |
+(cv::Mat)matFromImage:(UIImage *)image; | |
+(UIImage *)imageFromMat:(cv::Mat)mat withOrientation:(UIImageOrientation)orientation; | |
@end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// | |
// Copyright © 2018 wojteklu. All rights reserved. | |
// | |
#import "UIImage+Mat.h" | |
using namespace std; | |
using namespace cv; | |
@implementation UIImage (Mat) | |
+(Mat)matFromImage:(UIImage *)image { | |
CGColorSpaceRef colorSpace = CGImageGetColorSpace(image.CGImage); | |
CGFloat cols = image.size.width; | |
CGFloat rows = image.size.height; | |
if (image.imageOrientation == UIImageOrientationLeft || image.imageOrientation == UIImageOrientationRight) { | |
cols = image.size.height; | |
rows = image.size.width; | |
} | |
// 8 bits per component, 4 channels (color channels + alpha) | |
cv::Mat mat(rows, cols, CV_8UC4); | |
CGContextRef contextRef = CGBitmapContextCreate(mat.data, // pointer to data | |
cols, // width of bitmap | |
rows, // height of bitmap | |
8, // bits per component | |
mat.step[0], // bytes per row | |
colorSpace, | |
kCGImageAlphaNoneSkipLast|kCGBitmapByteOrderDefault); | |
CGContextDrawImage(contextRef, CGRectMake(0, 0, cols, rows), image.CGImage); | |
CGContextRelease(contextRef); | |
CGColorSpaceRelease(colorSpace); | |
// swap channels | |
std::vector<Mat> ch; | |
cv::split(mat,ch); | |
std::swap(ch[0],ch[2]); | |
cv::merge(ch,mat); | |
return mat; | |
} | |
+(UIImage *)imageFromMat:(cv::Mat)mat withOrientation:(UIImageOrientation)orientation { | |
NSData *data = [NSData dataWithBytes:mat.data length:mat.elemSize()*mat.total()]; | |
CGColorSpaceRef colorSpace; | |
CGBitmapInfo bitmapInfo; | |
if (mat.elemSize() == 1) { | |
colorSpace = CGColorSpaceCreateDeviceGray(); | |
bitmapInfo = kCGImageAlphaNone | kCGBitmapByteOrderDefault; | |
} else { | |
colorSpace = CGColorSpaceCreateDeviceRGB(); | |
bitmapInfo = kCGBitmapByteOrder32Little | | |
(mat.elemSize() == 3? kCGImageAlphaNone : kCGImageAlphaNoneSkipFirst); | |
} | |
CGDataProviderRef provider = CGDataProviderCreateWithCFData((__bridge CFDataRef)data); | |
CGImageRef imageRef = CGImageCreate( | |
mat.cols, //width | |
mat.rows, //height | |
8, //bits per component | |
8 * mat.elemSize(), //bits per pixel | |
mat.step[0], //bytesPerRow | |
colorSpace, | |
bitmapInfo, | |
provider, | |
NULL, | |
false, | |
kCGRenderingIntentDefault); | |
UIImage *image = [UIImage imageWithCGImage:imageRef scale:1 orientation:orientation]; | |
CGImageRelease(imageRef); | |
CGDataProviderRelease(provider); | |
CGColorSpaceRelease(colorSpace); | |
return image; | |
} | |
@end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment