Skip to content

Instantly share code, notes, and snippets.

@StelianMorariu
Forked from joa/StackBlur.java
Last active August 29, 2015 14:12
Show Gist options
  • Save StelianMorariu/3f04e5e8bd5b71bc0cbc to your computer and use it in GitHub Desktop.
Save StelianMorariu/3f04e5e8bd5b71bc0cbc to your computer and use it in GitHub Desktop.
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
/**
*
*/
public final class StackBlur {
private static final int PARALLEL_THRESHOLD = 2048 * 2048;
private static final int PARALLEL_PADDING = 16;
private static final int PARALLEL_AMOUNT = Runtime.getRuntime().availableProcessors();
private static final ExecutorService PARALLEL_POOL = Executors.newFixedThreadPool(PARALLEL_AMOUNT);
private static final int[] TABLE_MUL = {
512, 512, 456, 512, 328, 456, 335, 512, 405, 328, 271, 456, 388, 335, 292, 512,
454, 405, 364, 328, 298, 271, 496, 456, 420, 388, 360, 335, 312, 292, 273, 512,
482, 454, 428, 405, 383, 364, 345, 328, 312, 298, 284, 271, 259, 496, 475, 456,
437, 420, 404, 388, 374, 360, 347, 335, 323, 312, 302, 292, 282, 273, 265, 512,
497, 482, 468, 454, 441, 428, 417, 405, 394, 383, 373, 364, 354, 345, 337, 328,
320, 312, 305, 298, 291, 284, 278, 271, 265, 259, 507, 496, 485, 475, 465, 456,
446, 437, 428, 420, 412, 404, 396, 388, 381, 374, 367, 360, 354, 347, 341, 335,
329, 323, 318, 312, 307, 302, 297, 292, 287, 282, 278, 273, 269, 265, 261, 512,
505, 497, 489, 482, 475, 468, 461, 454, 447, 441, 435, 428, 422, 417, 411, 405,
399, 394, 389, 383, 378, 373, 368, 364, 359, 354, 350, 345, 341, 337, 332, 328,
324, 320, 316, 312, 309, 305, 301, 298, 294, 291, 287, 284, 281, 278, 274, 271,
268, 265, 262, 259, 257, 507, 501, 496, 491, 485, 480, 475, 470, 465, 460, 456,
451, 446, 442, 437, 433, 428, 424, 420, 416, 412, 408, 404, 400, 396, 392, 388,
385, 381, 377, 374, 370, 367, 363, 360, 357, 354, 350, 347, 344, 341, 338, 335,
332, 329, 326, 323, 320, 318, 315, 312, 310, 307, 304, 302, 299, 297, 294, 292,
289, 287, 285, 282, 280, 278, 275, 273, 271, 269, 267, 265, 263, 261, 259};
private static final int[] TABLE_SHR = {
9, 11, 12, 13, 13, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 17,
17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18, 18, 18, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 20, 20, 20,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22,
22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 23,
23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24};
private final int[] stack;
private final int div;
private final int radius;
public StackBlur(final int radius) {
if(radius > 254 || radius < 0) {
throw new IllegalArgumentException("radius must be in [0, 254]");
}
if (radius < 2) {
stack = null;
div = 0;
} else {
div = (radius << 1) + 1;
stack = new int[((div << 2) + PARALLEL_PADDING) * PARALLEL_AMOUNT];
}
this.radius = radius;
}
public void apply(final byte[] src, final int srcOffset, final byte[] dst, final int dstOffset, final int width, final int height) {
if (src.length < (srcOffset + width * height * 4)) {
throw new IllegalArgumentException("src has insufficient size");
}
if (dst.length < (dstOffset + width * height * 4)) {
throw new IllegalArgumentException("dst has insufficient size");
}
if(radius < 2) {
System.arraycopy(src, srcOffset, dst, dstOffset, width * height * 4);
} else if (width * height * radius < PARALLEL_THRESHOLD) {
blur(src, srcOffset, dst, dstOffset, stack, 0, radius, div, width, height, 0, 1);
blur(dst, dstOffset, dst, dstOffset, stack, 0, radius, div, width, height, 0, 2);
} else {
final Future<?>[] futures = new Future<?>[PARALLEL_AMOUNT];
final StackBlurTask[] tasks = new StackBlurTask[PARALLEL_AMOUNT];
for(int i = 0; i < PARALLEL_AMOUNT; ++i) {
tasks[i] =
new StackBlurTask(src, srcOffset, dst, dstOffset, stack, ((div << 2) + PARALLEL_PADDING) * i, radius, div, width, height, i);
futures[i] = PARALLEL_POOL.submit(tasks[i]);
}
join(futures);
for(int i = 0; i < PARALLEL_AMOUNT; ++i) {
tasks[i].step = 2;
futures[i] = PARALLEL_POOL.submit(tasks[i]);
}
join(futures);
}
}
private static void blur(
final byte[] src, final int srcOffset,
final byte[] dst, final int dstOffset,
final int[] stack,
final int stackOffset,
final int radius,
final int div,
final int w,
final int h,
final int core,
final int step) {
int x, y, xp, yp, i;
int sp;
int stackStart;
int stackIndex;
int srcIndex;
int dstIndex;
int sumR, sumG, sumB, sumA;
int sumInR, sumInG, sumInB, sumInA;
int sumOutR, sumOutG, sumOutB, sumOutA;
int r, g, b, a;
final int wm = w - 1;
final int hm = h - 1;
final int w4 = w * 4;
final int mul_sum = TABLE_MUL[radius];
final int shr_sum = TABLE_SHR[radius];
if (step == 1) {
final int minY = core * h / PARALLEL_AMOUNT;
final int maxY = (core + 1) * h / PARALLEL_AMOUNT;
for (y = minY; y < maxY; y++) {
sumR = sumG = sumB = sumA =
sumInR = sumInG = sumInB = sumInA =
sumOutR = sumOutG = sumOutB = sumOutA = 0;
srcIndex = srcOffset + w4 * y; // start of line (0,y)
for(i = 0; i <= radius; i++) {
r = src[srcIndex] & 0xff;
g = src[srcIndex + 1] & 0xff;
b = src[srcIndex + 2] & 0xff;
a = src[srcIndex + 3] & 0xff;
stackIndex = stackOffset + 4 * i;
stack[stackIndex] = r;
stack[stackIndex + 1] = g;
stack[stackIndex + 2] = b;
stack[stackIndex + 3] = a;
sumR += r * (i + 1);
sumG += g * (i + 1);
sumB += b * (i + 1);
sumA += a * (i + 1);
sumOutR += r;
sumOutG += g;
sumOutB += b;
sumOutA += a;
}
for(i = 1; i <= radius; i++) {
if(i <= wm) srcIndex += 4;
r = src[srcIndex] & 0xff;
g = src[srcIndex + 1] & 0xff;
b = src[srcIndex + 2] & 0xff;
a = src[srcIndex + 3] & 0xff;
stackIndex = stackOffset + 4 * (i + radius);
stack[stackIndex] = r;
stack[stackIndex + 1] = g;
stack[stackIndex + 2] = b;
stack[stackIndex + 3] = a;
sumR += r * (radius + 1 - i);
sumG += g * (radius + 1 - i);
sumB += b * (radius + 1 - i);
sumA += a * (radius + 1 - i);
sumInR += r;
sumInG += g;
sumInB += b;
sumInA += a;
}
sp = radius;
xp = radius;
if (xp > wm) xp = wm;
srcIndex = srcOffset + 4 * (xp + y * w);
dstIndex = dstOffset + y * w4;
for (x = 0; x < w; x++) {
dst[dstIndex] = (byte)((sumR * mul_sum) >> shr_sum);
dst[dstIndex + 1] = (byte)((sumG * mul_sum) >> shr_sum);
dst[dstIndex + 2] = (byte)((sumB * mul_sum) >> shr_sum);
dst[dstIndex + 3] = (byte)((sumA * mul_sum) >> shr_sum);
dstIndex += 4;
sumR -= sumOutR;
sumG -= sumOutG;
sumB -= sumOutB;
sumA -= sumOutA;
stackStart = sp + div - radius;
if(stackStart >= div) stackStart -= div;
stackIndex = stackOffset + 4 * stackStart;
sumOutR -= stack[stackIndex];
sumOutG -= stack[stackIndex + 1];
sumOutB -= stack[stackIndex + 2];
sumOutA -= stack[stackIndex + 3];
if(xp < wm) {
srcIndex += 4;
++xp;
}
r = src[srcIndex] & 0xff;
g = src[srcIndex + 1] & 0xff;
b = src[srcIndex + 2] & 0xff;
a = src[srcIndex + 3] & 0xff;
stack[stackIndex] = r;
stack[stackIndex + 1] = g;
stack[stackIndex + 2] = b;
stack[stackIndex + 3] = a;
sumInR += r;
sumInG += g;
sumInB += b;
sumInA += a;
sumR += sumInR;
sumG += sumInG;
sumB += sumInB;
sumA += sumInA;
++sp;
if(sp >= div) sp = 0;
stackIndex = stackOffset + sp * 4;
r = stack[stackIndex];
g = stack[stackIndex + 1];
b = stack[stackIndex + 2];
a = stack[stackIndex + 3];
sumOutR += r;
sumOutG += g;
sumOutB += b;
sumOutA += a;
sumInR -= r;
sumInG -= g;
sumInB -= b;
sumInA -= a;
}
}
}
// step 2
if(step == 2) {
final int minX = core * w / PARALLEL_AMOUNT;
final int maxX = (core + 1) * w / PARALLEL_AMOUNT;
for(x = minX; x < maxX; x++) {
sumR = sumG = sumB = sumA =
sumInR = sumInG = sumInB = sumInA =
sumOutR = sumOutG = sumOutB = sumOutA = 0;
srcIndex = srcOffset + 4 * x; // x,0
for(i = 0; i <= radius; i++) {
r = src[srcIndex] & 0xff;
g = src[srcIndex + 1] & 0xff;
b = src[srcIndex + 2] & 0xff;
a = src[srcIndex + 3] & 0xff;
stackIndex = stackOffset + 4 * i;
stack[stackIndex] = r;
stack[stackIndex + 1] = g;
stack[stackIndex + 2] = b;
stack[stackIndex + 3] = a;
sumR += r * (i + 1);
sumG += g * (i + 1);
sumB += b * (i + 1);
sumA += a * (i + 1);
sumOutR += r;
sumOutG += g;
sumOutB += b;
sumOutA += a;
}
for(i = 1; i <= radius; i++) {
if(i <= hm) srcIndex += w4; // +stride
r = src[srcIndex] & 0xff;
g = src[srcIndex + 1] & 0xff;
b = src[srcIndex + 2] & 0xff;
a = src[srcIndex + 3] & 0xff;
stackIndex = stackOffset + 4 * (i + radius);
stack[stackIndex] = r;
stack[stackIndex + 1] = g;
stack[stackIndex + 2] = b;
stack[stackIndex + 3] = a;
sumR += r * (radius + 1 - i);
sumG += g * (radius + 1 - i);
sumB += b * (radius + 1 - i);
sumA += a * (radius + 1 - i);
sumInR += r;
sumInG += g;
sumInB += b;
sumInA += a;
}
sp = radius;
yp = radius;
if(yp > hm) yp = hm;
srcIndex = srcOffset + 4 * (x + yp * w);
dstIndex = dstOffset + 4 * x;
for(y = 0; y < h; y++) {
dst[dstIndex] = (byte)((sumR * mul_sum) >> shr_sum);
dst[dstIndex + 1] = (byte)((sumG * mul_sum) >> shr_sum);
dst[dstIndex + 2] = (byte)((sumB * mul_sum) >> shr_sum);
dst[dstIndex + 3] = (byte)((sumA * mul_sum) >> shr_sum);
dstIndex += w4;
sumR -= sumOutR;
sumG -= sumOutG;
sumB -= sumOutB;
sumA -= sumOutA;
stackStart = sp + div - radius;
if(stackStart >= div) stackStart -= div;
stackIndex = stackOffset + 4 * stackStart;
sumOutR -= stack[stackIndex];
sumOutG -= stack[stackIndex + 1];
sumOutB -= stack[stackIndex + 2];
sumOutA -= stack[stackIndex + 3];
if(yp < hm) {
srcIndex += w4; // stride
++yp;
}
r = src[srcIndex] & 0xff;
g = src[srcIndex + 1] & 0xff;
b = src[srcIndex + 2] & 0xff;
a = src[srcIndex + 3] & 0xff;
stack[stackIndex] = r;
stack[stackIndex + 1] = g;
stack[stackIndex + 2] = b;
stack[stackIndex + 3] = a;
sumInR += r;
sumInG += g;
sumInB += b;
sumInA += a;
sumR += sumInR;
sumG += sumInG;
sumB += sumInB;
sumA += sumInA;
++sp;
if(sp >= div) sp = 0;
stackIndex = stackOffset + sp * 4;
r = stack[stackIndex];
g = stack[stackIndex + 1];
b = stack[stackIndex + 2];
a = stack[stackIndex + 3];
sumOutR += r;
sumOutG += g;
sumOutB += b;
sumOutA += a;
sumInR -= r;
sumInG -= g;
sumInB -= b;
sumInA -= a;
}
}
}
}
private static void sneakyThrow(Throwable t) {
StackBlur.<RuntimeException>sneakyThrow2(t);
}
@SuppressWarnings("unchecked")
private static <T extends Throwable> T sneakyThrow2(final Throwable t) throws T {
throw (T)t;
}
private static void join(Future<?>[] futures) {
for(int i = 0; i < PARALLEL_AMOUNT; ++i) {
try {
futures[i].get();
} catch (InterruptedException interrupt) {
Thread.currentThread().interrupt();
} catch (ExecutionException executionException) {
sneakyThrow(executionException.getCause());
}
}
}
private static class StackBlurTask implements Runnable {
private final byte[] src;
private final int srcOffset;
private final byte[] dst;
private final int dstOffset;
private final int[] stack;
private final int stackOffset;
private final int radius;
private final int div;
private final int width;
private final int height;
private final int core;
private volatile int step;
public StackBlurTask(byte[] src, int srcOffset, byte[] dst, int dstOffset, int[] stack, int stackOffset, int radius, int div, int width, int height, int core) {
this.src = src;
this.srcOffset = srcOffset;
this.dst = dst;
this.dstOffset = dstOffset;
this.stack = stack;
this.stackOffset = stackOffset;
this.radius = radius;
this.div = div;
this.width = width;
this.height = height;
this.core = core;
this.step = 1;
}
@Override
public void run() {
if(step == 1) {
blur(src, srcOffset, dst, dstOffset, stack, stackOffset, radius, div, width, height, core, 1);
} else {
blur(dst, dstOffset, dst, dstOffset, stack, stackOffset, radius, div, width, height, core, 2);
}
}
}
}
import javax.imageio.ImageIO;
import javax.swing.*;
import java.awt.*;
import java.awt.image.BufferedImage;
import java.io.InputStream;
import java.nio.file.*;
/**
*
*/
public final class StackBlurDemo extends JPanel {
private static final boolean BENCHMARK = true;
public static void main(final String[] args) throws Throwable {
if(args.length < 1) {
System.out.println("Usage: path");
return;
}
final Path p = Paths.get(args[0]);
if(!Files.isReadable(p)) {
System.err.println("Cannot read "+p);
return;
}
final byte[] colors;
final int width;
final int height;
try(final InputStream in = Files.newInputStream(p)) {
final BufferedImage image = ImageIO.read(in);
width = image.getWidth();
height = image.getHeight();
final int size = width * height;
image.coerceData(false);
final int[] packed = image.getRGB(0, 0, width, height, null, 0, width);
colors = new byte[size << 2];
for(int i = 0, j = 0; i < size; i++) {
final int color = packed[i];
colors[j++] = (byte)((color >> 0x18) & 0xff);
colors[j++] = (byte)((color >> 0x10) & 0xff);
colors[j++] = (byte)((color >> 0x08) & 0xff);
colors[j++] = (byte)(color & 0xff);
}
}
if(BENCHMARK) {
final StackBlur blur = new StackBlur(100);
final byte[] src = colors;
final byte[] dst = new byte[colors.length];
System.out.println("warmup ...");
for(int i = 0; i < 100; ++i) {
blur.apply(src, 0, dst, 0, width, height);
}
System.out.println("measure ...");
final long[] timings = new long[100];
for(int i = 0; i < timings.length; ++i) {
final long t0 = System.nanoTime();
blur.apply(src, 0, dst, 0, width, height);
final long t1 = System.nanoTime();
timings[i] = t1 - t0;
}
final long total = sum(timings);
System.out.println("avg[ms]: "+((double)total / (double)timings.length * 1e-6));
}
final JFrame frame = new JFrame("StackBlur");
frame.setSize(width, height);
frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
frame.add(new StackBlurDemo(colors, width, height));
frame.setVisible(true);
}
private final byte[] src;
private final byte[] dst;
private final int[] packed;
private final int width, height;
private final BufferedImage image;
private final StackBlur blur;
private StackBlurDemo(final byte[] src, final int width, final int height) {
this.src = src;
this.dst = new byte[src.length];
this.width = width;
this.height = height;
this.packed = new int[width * height];
this.image = new BufferedImage(width, height, BufferedImage.TYPE_INT_ARGB);
this.blur = new StackBlur(100);
applyFilter();
}
private void applyFilter() {
blur.apply(src, 0, dst, 0, width, height);
}
private void updateImage() {
for(int i = 0, j = 0; i < packed.length; ++i) {
packed[i] = ((dst[j++] & 0xff) << 0x18) | ((dst[j++] & 0xff) << 0x10) | ((dst[j++] & 0xff) << 0x08) | (dst[j++] & 0xff);
}
image.setRGB(0, 0, width, height, packed, 0, width);
}
@Override
protected void paintComponent(Graphics g) {
super.paintComponent(g);
updateImage();
g.drawImage(image, 0, 0, null);
}
private static long sum(final long[] values) {
long x = 0L;
for(final long value : values) {
x += value;
}
return x;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment