-
-
Save StelianMorariu/3f04e5e8bd5b71bc0cbc to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.util.concurrent.ExecutionException; | |
import java.util.concurrent.ExecutorService; | |
import java.util.concurrent.Executors; | |
import java.util.concurrent.Future; | |
/** | |
* | |
*/ | |
public final class StackBlur { | |
private static final int PARALLEL_THRESHOLD = 2048 * 2048; | |
private static final int PARALLEL_PADDING = 16; | |
private static final int PARALLEL_AMOUNT = Runtime.getRuntime().availableProcessors(); | |
private static final ExecutorService PARALLEL_POOL = Executors.newFixedThreadPool(PARALLEL_AMOUNT); | |
private static final int[] TABLE_MUL = { | |
512, 512, 456, 512, 328, 456, 335, 512, 405, 328, 271, 456, 388, 335, 292, 512, | |
454, 405, 364, 328, 298, 271, 496, 456, 420, 388, 360, 335, 312, 292, 273, 512, | |
482, 454, 428, 405, 383, 364, 345, 328, 312, 298, 284, 271, 259, 496, 475, 456, | |
437, 420, 404, 388, 374, 360, 347, 335, 323, 312, 302, 292, 282, 273, 265, 512, | |
497, 482, 468, 454, 441, 428, 417, 405, 394, 383, 373, 364, 354, 345, 337, 328, | |
320, 312, 305, 298, 291, 284, 278, 271, 265, 259, 507, 496, 485, 475, 465, 456, | |
446, 437, 428, 420, 412, 404, 396, 388, 381, 374, 367, 360, 354, 347, 341, 335, | |
329, 323, 318, 312, 307, 302, 297, 292, 287, 282, 278, 273, 269, 265, 261, 512, | |
505, 497, 489, 482, 475, 468, 461, 454, 447, 441, 435, 428, 422, 417, 411, 405, | |
399, 394, 389, 383, 378, 373, 368, 364, 359, 354, 350, 345, 341, 337, 332, 328, | |
324, 320, 316, 312, 309, 305, 301, 298, 294, 291, 287, 284, 281, 278, 274, 271, | |
268, 265, 262, 259, 257, 507, 501, 496, 491, 485, 480, 475, 470, 465, 460, 456, | |
451, 446, 442, 437, 433, 428, 424, 420, 416, 412, 408, 404, 400, 396, 392, 388, | |
385, 381, 377, 374, 370, 367, 363, 360, 357, 354, 350, 347, 344, 341, 338, 335, | |
332, 329, 326, 323, 320, 318, 315, 312, 310, 307, 304, 302, 299, 297, 294, 292, | |
289, 287, 285, 282, 280, 278, 275, 273, 271, 269, 267, 265, 263, 261, 259}; | |
private static final int[] TABLE_SHR = { | |
9, 11, 12, 13, 13, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 17, | |
17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18, 18, 18, 19, | |
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 20, 20, 20, | |
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 21, | |
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, | |
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, | |
22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, | |
22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 23, | |
23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, | |
23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, | |
23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, | |
23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, | |
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, | |
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, | |
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, | |
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24}; | |
private final int[] stack; | |
private final int div; | |
private final int radius; | |
public StackBlur(final int radius) { | |
if(radius > 254 || radius < 0) { | |
throw new IllegalArgumentException("radius must be in [0, 254]"); | |
} | |
if (radius < 2) { | |
stack = null; | |
div = 0; | |
} else { | |
div = (radius << 1) + 1; | |
stack = new int[((div << 2) + PARALLEL_PADDING) * PARALLEL_AMOUNT]; | |
} | |
this.radius = radius; | |
} | |
public void apply(final byte[] src, final int srcOffset, final byte[] dst, final int dstOffset, final int width, final int height) { | |
if (src.length < (srcOffset + width * height * 4)) { | |
throw new IllegalArgumentException("src has insufficient size"); | |
} | |
if (dst.length < (dstOffset + width * height * 4)) { | |
throw new IllegalArgumentException("dst has insufficient size"); | |
} | |
if(radius < 2) { | |
System.arraycopy(src, srcOffset, dst, dstOffset, width * height * 4); | |
} else if (width * height * radius < PARALLEL_THRESHOLD) { | |
blur(src, srcOffset, dst, dstOffset, stack, 0, radius, div, width, height, 0, 1); | |
blur(dst, dstOffset, dst, dstOffset, stack, 0, radius, div, width, height, 0, 2); | |
} else { | |
final Future<?>[] futures = new Future<?>[PARALLEL_AMOUNT]; | |
final StackBlurTask[] tasks = new StackBlurTask[PARALLEL_AMOUNT]; | |
for(int i = 0; i < PARALLEL_AMOUNT; ++i) { | |
tasks[i] = | |
new StackBlurTask(src, srcOffset, dst, dstOffset, stack, ((div << 2) + PARALLEL_PADDING) * i, radius, div, width, height, i); | |
futures[i] = PARALLEL_POOL.submit(tasks[i]); | |
} | |
join(futures); | |
for(int i = 0; i < PARALLEL_AMOUNT; ++i) { | |
tasks[i].step = 2; | |
futures[i] = PARALLEL_POOL.submit(tasks[i]); | |
} | |
join(futures); | |
} | |
} | |
private static void blur( | |
final byte[] src, final int srcOffset, | |
final byte[] dst, final int dstOffset, | |
final int[] stack, | |
final int stackOffset, | |
final int radius, | |
final int div, | |
final int w, | |
final int h, | |
final int core, | |
final int step) { | |
int x, y, xp, yp, i; | |
int sp; | |
int stackStart; | |
int stackIndex; | |
int srcIndex; | |
int dstIndex; | |
int sumR, sumG, sumB, sumA; | |
int sumInR, sumInG, sumInB, sumInA; | |
int sumOutR, sumOutG, sumOutB, sumOutA; | |
int r, g, b, a; | |
final int wm = w - 1; | |
final int hm = h - 1; | |
final int w4 = w * 4; | |
final int mul_sum = TABLE_MUL[radius]; | |
final int shr_sum = TABLE_SHR[radius]; | |
if (step == 1) { | |
final int minY = core * h / PARALLEL_AMOUNT; | |
final int maxY = (core + 1) * h / PARALLEL_AMOUNT; | |
for (y = minY; y < maxY; y++) { | |
sumR = sumG = sumB = sumA = | |
sumInR = sumInG = sumInB = sumInA = | |
sumOutR = sumOutG = sumOutB = sumOutA = 0; | |
srcIndex = srcOffset + w4 * y; // start of line (0,y) | |
for(i = 0; i <= radius; i++) { | |
r = src[srcIndex] & 0xff; | |
g = src[srcIndex + 1] & 0xff; | |
b = src[srcIndex + 2] & 0xff; | |
a = src[srcIndex + 3] & 0xff; | |
stackIndex = stackOffset + 4 * i; | |
stack[stackIndex] = r; | |
stack[stackIndex + 1] = g; | |
stack[stackIndex + 2] = b; | |
stack[stackIndex + 3] = a; | |
sumR += r * (i + 1); | |
sumG += g * (i + 1); | |
sumB += b * (i + 1); | |
sumA += a * (i + 1); | |
sumOutR += r; | |
sumOutG += g; | |
sumOutB += b; | |
sumOutA += a; | |
} | |
for(i = 1; i <= radius; i++) { | |
if(i <= wm) srcIndex += 4; | |
r = src[srcIndex] & 0xff; | |
g = src[srcIndex + 1] & 0xff; | |
b = src[srcIndex + 2] & 0xff; | |
a = src[srcIndex + 3] & 0xff; | |
stackIndex = stackOffset + 4 * (i + radius); | |
stack[stackIndex] = r; | |
stack[stackIndex + 1] = g; | |
stack[stackIndex + 2] = b; | |
stack[stackIndex + 3] = a; | |
sumR += r * (radius + 1 - i); | |
sumG += g * (radius + 1 - i); | |
sumB += b * (radius + 1 - i); | |
sumA += a * (radius + 1 - i); | |
sumInR += r; | |
sumInG += g; | |
sumInB += b; | |
sumInA += a; | |
} | |
sp = radius; | |
xp = radius; | |
if (xp > wm) xp = wm; | |
srcIndex = srcOffset + 4 * (xp + y * w); | |
dstIndex = dstOffset + y * w4; | |
for (x = 0; x < w; x++) { | |
dst[dstIndex] = (byte)((sumR * mul_sum) >> shr_sum); | |
dst[dstIndex + 1] = (byte)((sumG * mul_sum) >> shr_sum); | |
dst[dstIndex + 2] = (byte)((sumB * mul_sum) >> shr_sum); | |
dst[dstIndex + 3] = (byte)((sumA * mul_sum) >> shr_sum); | |
dstIndex += 4; | |
sumR -= sumOutR; | |
sumG -= sumOutG; | |
sumB -= sumOutB; | |
sumA -= sumOutA; | |
stackStart = sp + div - radius; | |
if(stackStart >= div) stackStart -= div; | |
stackIndex = stackOffset + 4 * stackStart; | |
sumOutR -= stack[stackIndex]; | |
sumOutG -= stack[stackIndex + 1]; | |
sumOutB -= stack[stackIndex + 2]; | |
sumOutA -= stack[stackIndex + 3]; | |
if(xp < wm) { | |
srcIndex += 4; | |
++xp; | |
} | |
r = src[srcIndex] & 0xff; | |
g = src[srcIndex + 1] & 0xff; | |
b = src[srcIndex + 2] & 0xff; | |
a = src[srcIndex + 3] & 0xff; | |
stack[stackIndex] = r; | |
stack[stackIndex + 1] = g; | |
stack[stackIndex + 2] = b; | |
stack[stackIndex + 3] = a; | |
sumInR += r; | |
sumInG += g; | |
sumInB += b; | |
sumInA += a; | |
sumR += sumInR; | |
sumG += sumInG; | |
sumB += sumInB; | |
sumA += sumInA; | |
++sp; | |
if(sp >= div) sp = 0; | |
stackIndex = stackOffset + sp * 4; | |
r = stack[stackIndex]; | |
g = stack[stackIndex + 1]; | |
b = stack[stackIndex + 2]; | |
a = stack[stackIndex + 3]; | |
sumOutR += r; | |
sumOutG += g; | |
sumOutB += b; | |
sumOutA += a; | |
sumInR -= r; | |
sumInG -= g; | |
sumInB -= b; | |
sumInA -= a; | |
} | |
} | |
} | |
// step 2 | |
if(step == 2) { | |
final int minX = core * w / PARALLEL_AMOUNT; | |
final int maxX = (core + 1) * w / PARALLEL_AMOUNT; | |
for(x = minX; x < maxX; x++) { | |
sumR = sumG = sumB = sumA = | |
sumInR = sumInG = sumInB = sumInA = | |
sumOutR = sumOutG = sumOutB = sumOutA = 0; | |
srcIndex = srcOffset + 4 * x; // x,0 | |
for(i = 0; i <= radius; i++) { | |
r = src[srcIndex] & 0xff; | |
g = src[srcIndex + 1] & 0xff; | |
b = src[srcIndex + 2] & 0xff; | |
a = src[srcIndex + 3] & 0xff; | |
stackIndex = stackOffset + 4 * i; | |
stack[stackIndex] = r; | |
stack[stackIndex + 1] = g; | |
stack[stackIndex + 2] = b; | |
stack[stackIndex + 3] = a; | |
sumR += r * (i + 1); | |
sumG += g * (i + 1); | |
sumB += b * (i + 1); | |
sumA += a * (i + 1); | |
sumOutR += r; | |
sumOutG += g; | |
sumOutB += b; | |
sumOutA += a; | |
} | |
for(i = 1; i <= radius; i++) { | |
if(i <= hm) srcIndex += w4; // +stride | |
r = src[srcIndex] & 0xff; | |
g = src[srcIndex + 1] & 0xff; | |
b = src[srcIndex + 2] & 0xff; | |
a = src[srcIndex + 3] & 0xff; | |
stackIndex = stackOffset + 4 * (i + radius); | |
stack[stackIndex] = r; | |
stack[stackIndex + 1] = g; | |
stack[stackIndex + 2] = b; | |
stack[stackIndex + 3] = a; | |
sumR += r * (radius + 1 - i); | |
sumG += g * (radius + 1 - i); | |
sumB += b * (radius + 1 - i); | |
sumA += a * (radius + 1 - i); | |
sumInR += r; | |
sumInG += g; | |
sumInB += b; | |
sumInA += a; | |
} | |
sp = radius; | |
yp = radius; | |
if(yp > hm) yp = hm; | |
srcIndex = srcOffset + 4 * (x + yp * w); | |
dstIndex = dstOffset + 4 * x; | |
for(y = 0; y < h; y++) { | |
dst[dstIndex] = (byte)((sumR * mul_sum) >> shr_sum); | |
dst[dstIndex + 1] = (byte)((sumG * mul_sum) >> shr_sum); | |
dst[dstIndex + 2] = (byte)((sumB * mul_sum) >> shr_sum); | |
dst[dstIndex + 3] = (byte)((sumA * mul_sum) >> shr_sum); | |
dstIndex += w4; | |
sumR -= sumOutR; | |
sumG -= sumOutG; | |
sumB -= sumOutB; | |
sumA -= sumOutA; | |
stackStart = sp + div - radius; | |
if(stackStart >= div) stackStart -= div; | |
stackIndex = stackOffset + 4 * stackStart; | |
sumOutR -= stack[stackIndex]; | |
sumOutG -= stack[stackIndex + 1]; | |
sumOutB -= stack[stackIndex + 2]; | |
sumOutA -= stack[stackIndex + 3]; | |
if(yp < hm) { | |
srcIndex += w4; // stride | |
++yp; | |
} | |
r = src[srcIndex] & 0xff; | |
g = src[srcIndex + 1] & 0xff; | |
b = src[srcIndex + 2] & 0xff; | |
a = src[srcIndex + 3] & 0xff; | |
stack[stackIndex] = r; | |
stack[stackIndex + 1] = g; | |
stack[stackIndex + 2] = b; | |
stack[stackIndex + 3] = a; | |
sumInR += r; | |
sumInG += g; | |
sumInB += b; | |
sumInA += a; | |
sumR += sumInR; | |
sumG += sumInG; | |
sumB += sumInB; | |
sumA += sumInA; | |
++sp; | |
if(sp >= div) sp = 0; | |
stackIndex = stackOffset + sp * 4; | |
r = stack[stackIndex]; | |
g = stack[stackIndex + 1]; | |
b = stack[stackIndex + 2]; | |
a = stack[stackIndex + 3]; | |
sumOutR += r; | |
sumOutG += g; | |
sumOutB += b; | |
sumOutA += a; | |
sumInR -= r; | |
sumInG -= g; | |
sumInB -= b; | |
sumInA -= a; | |
} | |
} | |
} | |
} | |
private static void sneakyThrow(Throwable t) { | |
StackBlur.<RuntimeException>sneakyThrow2(t); | |
} | |
@SuppressWarnings("unchecked") | |
private static <T extends Throwable> T sneakyThrow2(final Throwable t) throws T { | |
throw (T)t; | |
} | |
private static void join(Future<?>[] futures) { | |
for(int i = 0; i < PARALLEL_AMOUNT; ++i) { | |
try { | |
futures[i].get(); | |
} catch (InterruptedException interrupt) { | |
Thread.currentThread().interrupt(); | |
} catch (ExecutionException executionException) { | |
sneakyThrow(executionException.getCause()); | |
} | |
} | |
} | |
private static class StackBlurTask implements Runnable { | |
private final byte[] src; | |
private final int srcOffset; | |
private final byte[] dst; | |
private final int dstOffset; | |
private final int[] stack; | |
private final int stackOffset; | |
private final int radius; | |
private final int div; | |
private final int width; | |
private final int height; | |
private final int core; | |
private volatile int step; | |
public StackBlurTask(byte[] src, int srcOffset, byte[] dst, int dstOffset, int[] stack, int stackOffset, int radius, int div, int width, int height, int core) { | |
this.src = src; | |
this.srcOffset = srcOffset; | |
this.dst = dst; | |
this.dstOffset = dstOffset; | |
this.stack = stack; | |
this.stackOffset = stackOffset; | |
this.radius = radius; | |
this.div = div; | |
this.width = width; | |
this.height = height; | |
this.core = core; | |
this.step = 1; | |
} | |
@Override | |
public void run() { | |
if(step == 1) { | |
blur(src, srcOffset, dst, dstOffset, stack, stackOffset, radius, div, width, height, core, 1); | |
} else { | |
blur(dst, dstOffset, dst, dstOffset, stack, stackOffset, radius, div, width, height, core, 2); | |
} | |
} | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import javax.imageio.ImageIO; | |
import javax.swing.*; | |
import java.awt.*; | |
import java.awt.image.BufferedImage; | |
import java.io.InputStream; | |
import java.nio.file.*; | |
/** | |
* | |
*/ | |
public final class StackBlurDemo extends JPanel { | |
private static final boolean BENCHMARK = true; | |
public static void main(final String[] args) throws Throwable { | |
if(args.length < 1) { | |
System.out.println("Usage: path"); | |
return; | |
} | |
final Path p = Paths.get(args[0]); | |
if(!Files.isReadable(p)) { | |
System.err.println("Cannot read "+p); | |
return; | |
} | |
final byte[] colors; | |
final int width; | |
final int height; | |
try(final InputStream in = Files.newInputStream(p)) { | |
final BufferedImage image = ImageIO.read(in); | |
width = image.getWidth(); | |
height = image.getHeight(); | |
final int size = width * height; | |
image.coerceData(false); | |
final int[] packed = image.getRGB(0, 0, width, height, null, 0, width); | |
colors = new byte[size << 2]; | |
for(int i = 0, j = 0; i < size; i++) { | |
final int color = packed[i]; | |
colors[j++] = (byte)((color >> 0x18) & 0xff); | |
colors[j++] = (byte)((color >> 0x10) & 0xff); | |
colors[j++] = (byte)((color >> 0x08) & 0xff); | |
colors[j++] = (byte)(color & 0xff); | |
} | |
} | |
if(BENCHMARK) { | |
final StackBlur blur = new StackBlur(100); | |
final byte[] src = colors; | |
final byte[] dst = new byte[colors.length]; | |
System.out.println("warmup ..."); | |
for(int i = 0; i < 100; ++i) { | |
blur.apply(src, 0, dst, 0, width, height); | |
} | |
System.out.println("measure ..."); | |
final long[] timings = new long[100]; | |
for(int i = 0; i < timings.length; ++i) { | |
final long t0 = System.nanoTime(); | |
blur.apply(src, 0, dst, 0, width, height); | |
final long t1 = System.nanoTime(); | |
timings[i] = t1 - t0; | |
} | |
final long total = sum(timings); | |
System.out.println("avg[ms]: "+((double)total / (double)timings.length * 1e-6)); | |
} | |
final JFrame frame = new JFrame("StackBlur"); | |
frame.setSize(width, height); | |
frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE); | |
frame.add(new StackBlurDemo(colors, width, height)); | |
frame.setVisible(true); | |
} | |
private final byte[] src; | |
private final byte[] dst; | |
private final int[] packed; | |
private final int width, height; | |
private final BufferedImage image; | |
private final StackBlur blur; | |
private StackBlurDemo(final byte[] src, final int width, final int height) { | |
this.src = src; | |
this.dst = new byte[src.length]; | |
this.width = width; | |
this.height = height; | |
this.packed = new int[width * height]; | |
this.image = new BufferedImage(width, height, BufferedImage.TYPE_INT_ARGB); | |
this.blur = new StackBlur(100); | |
applyFilter(); | |
} | |
private void applyFilter() { | |
blur.apply(src, 0, dst, 0, width, height); | |
} | |
private void updateImage() { | |
for(int i = 0, j = 0; i < packed.length; ++i) { | |
packed[i] = ((dst[j++] & 0xff) << 0x18) | ((dst[j++] & 0xff) << 0x10) | ((dst[j++] & 0xff) << 0x08) | (dst[j++] & 0xff); | |
} | |
image.setRGB(0, 0, width, height, packed, 0, width); | |
} | |
@Override | |
protected void paintComponent(Graphics g) { | |
super.paintComponent(g); | |
updateImage(); | |
g.drawImage(image, 0, 0, null); | |
} | |
private static long sum(final long[] values) { | |
long x = 0L; | |
for(final long value : values) { | |
x += value; | |
} | |
return x; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment