Last active
March 22, 2017 20:36
-
-
Save larsiusprime/72431cb19f923976ec85630fe9f854cd to your computer and use it in GitHub Desktop.
Proposed speedup for ImageDataUtil.copyPixels()
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
void ImageDataUtil::CopyPixels (Image* image, Image* sourceImage, Rectangle* sourceRect, Vector2* destPoint, Image* alphaImage, Vector2* alphaPoint, bool mergeAlpha) { | |
uint8_t* sourceData = (uint8_t*)sourceImage->buffer->data->Data (); | |
uint8_t* destData = (uint8_t*)image->buffer->data->Data (); | |
ImageDataView sourceView = ImageDataView (sourceImage, sourceRect); | |
Rectangle destRect = Rectangle (destPoint->x, destPoint->y, sourceView.width, sourceView.height); | |
ImageDataView destView = ImageDataView (image, &destRect); | |
PixelFormat sourceFormat = sourceImage->buffer->format; | |
PixelFormat destFormat = image->buffer->format; | |
bool sourcePremultiplied = sourceImage->buffer->premultiplied; | |
bool destPremultiplied = image->buffer->premultiplied; | |
int sourcePosition, destPosition; | |
RGBA sourcePixel; | |
if (!mergeAlpha || !sourceImage->buffer->transparent) { | |
if(sourceFormat == destFormat && sourcePremultiplied == destPremultiplied) { | |
//I need the syntax for an ImageBuffer.blit() call or something like that to draw the whole rectangle | |
//in one whack (as a series of single-row memcpy()'s rather than read/write each pixel by hand | |
//trying this for now: | |
for (int y = 0; y < destView.height; y++) { | |
sourcePosition = sourceView.Row (y); | |
destPosition = destView.Row (y); | |
image->buffer->BlitRow(sourceData, sourcePosition, destPosition, sourceView.width, destView.x, destView.y+y); | |
} | |
} | |
else { | |
for (int y = 0; y < destView.height; y++) { | |
sourcePosition = sourceView.Row (y); | |
destPosition = destView.Row (y); | |
for (int x = 0; x < destView.width; x++) { | |
sourcePixel.ReadUInt8 (sourceData, sourcePosition, sourceFormat, sourcePremultiplied); | |
sourcePixel.WriteUInt8 (destData, destPosition, destFormat, destPremultiplied); | |
sourcePosition += 4; | |
destPosition += 4; | |
} | |
} | |
} | |
} else { | |
float sourceAlpha, destAlpha, oneMinusSourceAlpha, blendAlpha; | |
RGBA destPixel; | |
if (alphaImage == 0) { | |
for (int y = 0; y < destView.height; y++) { | |
sourcePosition = sourceView.Row (y); | |
destPosition = destView.Row (y); | |
for (int x = 0; x < destView.width; x++) { | |
sourcePixel.ReadUInt8 (sourceData, sourcePosition, sourceFormat, sourcePremultiplied); | |
destPixel.ReadUInt8 (destData, destPosition, destFormat, destPremultiplied); | |
sourceAlpha = sourcePixel.a / 255.0; | |
destAlpha = destPixel.a / 255.0; | |
oneMinusSourceAlpha = 1 - sourceAlpha; | |
blendAlpha = sourceAlpha + (destAlpha * oneMinusSourceAlpha); | |
if (blendAlpha == 0) { | |
destPixel.Set (0, 0, 0, 0); | |
} else { | |
destPixel.r = __clamp[int (0.5 + (sourcePixel.r * sourceAlpha + destPixel.r * destAlpha * oneMinusSourceAlpha) / blendAlpha)]; | |
destPixel.g = __clamp[int (0.5 + (sourcePixel.g * sourceAlpha + destPixel.g * destAlpha * oneMinusSourceAlpha) / blendAlpha)]; | |
destPixel.b = __clamp[int (0.5 + (sourcePixel.b * sourceAlpha + destPixel.b * destAlpha * oneMinusSourceAlpha) / blendAlpha)]; | |
destPixel.a = __clamp[int (0.5 + blendAlpha * 255.0)]; | |
} | |
destPixel.WriteUInt8 (destData, destPosition, destFormat, destPremultiplied); | |
sourcePosition += 4; | |
destPosition += 4; | |
} | |
} | |
} else { | |
uint8_t* alphaData = (uint8_t*)alphaImage->buffer->data->Data (); | |
PixelFormat alphaFormat = alphaImage->buffer->format; | |
bool alphaPremultiplied = alphaImage->buffer->premultiplied; | |
Rectangle alphaRect = Rectangle (alphaPoint->x, alphaPoint->y, destView.width, destView.height); | |
ImageDataView alphaView = ImageDataView (alphaImage, &alphaRect); | |
int alphaPosition; | |
RGBA alphaPixel; | |
for (int y = 0; y < alphaView.height; y++) { | |
sourcePosition = sourceView.Row (y); | |
destPosition = destView.Row (y); | |
alphaPosition = alphaView.Row (y); | |
for (int x = 0; x < alphaView.width; x++) { | |
sourcePixel.ReadUInt8 (sourceData, sourcePosition, sourceFormat, sourcePremultiplied); | |
destPixel.ReadUInt8 (destData, destPosition, destFormat, destPremultiplied); | |
alphaPixel.ReadUInt8 (alphaData, alphaPosition, alphaFormat, alphaPremultiplied); | |
sourceAlpha = alphaPixel.a / 0xFF; | |
destAlpha = destPixel.a / 0xFF; | |
oneMinusSourceAlpha = 1 - sourceAlpha; | |
blendAlpha = sourceAlpha + (destAlpha * oneMinusSourceAlpha); | |
if (blendAlpha == 0) { | |
destPixel.Set (0, 0, 0, 0); | |
} else { | |
destPixel.r = __clamp[int (0.5 + (sourcePixel.r * sourceAlpha + destPixel.r * destAlpha * oneMinusSourceAlpha) / blendAlpha)]; | |
destPixel.g = __clamp[int (0.5 + (sourcePixel.g * sourceAlpha + destPixel.g * destAlpha * oneMinusSourceAlpha) / blendAlpha)]; | |
destPixel.b = __clamp[int (0.5 + (sourcePixel.b * sourceAlpha + destPixel.b * destAlpha * oneMinusSourceAlpha) / blendAlpha)]; | |
destPixel.a = __clamp[int (0.5 + blendAlpha * 255.0)]; | |
} | |
destPixel.WriteUInt8 (destData, destPosition, destFormat, destPremultiplied); | |
sourcePosition += 4; | |
destPosition += 4; | |
} | |
} | |
} | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
void ImageBuffer::BlitRow (const unsigned char *data, int sourcePosition, int destPosition, int sourceW, int destX, int destY) { | |
if (destX < 0 || destX + sourceW > this->width || destY < 0 || destY + 1 > this->height) { | |
return; | |
} | |
int stride = (sourceW * (((bitsPerPixel + 3) & ~0x3) >> 3)); | |
unsigned char *bytes = this->data->Data (); | |
memcpy (&bytes[destPosition], &data[sourcePosition], stride); | |
} |
I think I finally got it!
The trick was to just give up on trying to manually do the offset math, and not try to blit more than one row at a time. The blit() function uses an internal for loop for that anyway; now I just pass in the precalculated positions, get the proper stride, and blit the rows. This fast path only activates if the buffers are not merging alpha and look like they're using compatible pixel formats / premultiplication, etc.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Getting mixed results with the current implementation:
It's clearly doing something, but it's got image corruption. Not sure if my blit math is wrong, or if my conditional is not being strict enough and is allowing blits on some incompatible buffers?