Skip to content

Instantly share code, notes, and snippets.

@partybusiness
Last active March 20, 2025 16:20
Show Gist options
  • Save partybusiness/da90e9d05d24f4f5c81e5c241ac6bbb7 to your computer and use it in GitHub Desktop.
Save partybusiness/da90e9d05d24f4f5c81e5c241ac6bbb7 to your computer and use it in GitHub Desktop.
CompositorEffect that applies a JPEG-inspired bad compression.
@tool
extends CompositorEffect
class_name CompressEffect
#
#@tool
const shrink_shader_path:String = "res://compression/compress_shrink_effect.glsl"
const block_shader_path:String = "res://compression/compress_final_effect.glsl"
var rd: RenderingDevice
# texture_is_format_supported_for_usage
var shrink_shader: RID
var shrink_pipeline: RID
var block_shader: RID
var blur_pipeline: RID
# small texture shrink_shader writes to and block_shader reads from
var small_texture: RID
# multiplier to convert small texture to full screen
var text_mult:Vector2 = Vector2.ONE
# size of screen
var screen_size:Vector2i = Vector2.ZERO
var block_size:float = 16.0 # make sure this matches block size in compute shaders
var small_size:Vector2i = Vector2i(550, 320) # will get overridden with screen size
var small_groups:Vector2i:
get:
return ceil(small_size / 8.0)
func _init() -> void:
print("init")
#effect_callback_type = EFFECT_CALLBACK_TYPE_POST_TRANSPARENT
rd = RenderingServer.get_rendering_device()
RenderingServer.call_on_render_thread(_initialize_compute)
# check limits
# seems to be [count_x = 2147483647, size_x = 1536,count_inv = 1536]
var count_x:int = rd.limit_get(RenderingDevice.Limit.LIMIT_MAX_COMPUTE_WORKGROUP_COUNT_X)
var count_inv:int = rd.limit_get(RenderingDevice.Limit.LIMIT_MAX_COMPUTE_WORKGROUP_INVOCATIONS)
var size_x:int = rd.limit_get(RenderingDevice.Limit.LIMIT_MAX_COMPUTE_WORKGROUP_SIZE_X)
print("limits =[%d,%d,%d]"%[count_x, size_x, count_inv])
# cleans up rids if
func _notification(what: int) -> void:
if what == NOTIFICATION_PREDELETE:
if shrink_shader.is_valid():
rd.free_rid(shrink_shader)
if block_shader.is_valid():
rd.free_rid(block_shader)
if small_texture.is_valid():
rd.free_rid(small_texture)
#region Code in this region runs on the rendering thread.
# Compile our shader at initialization.
func _initialize_compute() -> void:
rd = RenderingServer.get_rendering_device()
print("init compute ",(rd))
if not rd:
return
# Compile our shader.
var shader_file := load(shrink_shader_path)
var shader_spirv: RDShaderSPIRV = shader_file.get_spirv()
shrink_shader = rd.shader_create_from_spirv(shader_spirv)
if shrink_shader.is_valid():
shrink_pipeline = rd.compute_pipeline_create(shrink_shader)
shader_file = load(block_shader_path)
shader_spirv = shader_file.get_spirv()
block_shader = rd.shader_create_from_spirv(shader_spirv)
if block_shader.is_valid():
blur_pipeline = rd.compute_pipeline_create(block_shader)
func set_up_screen_size(size:Vector2i) -> void:
print ("new size ",size)
screen_size = size
# generate 333x480 pixel texture
var tf:RDTextureFormat = RDTextureFormat.new()
tf.texture_type = RenderingDevice.TEXTURE_TYPE_2D
small_size = ceil((screen_size) / (block_size / 2.0)) # 2x2 for every block in source
print("made size ", small_size, " ", small_groups)
tf.width = small_size.x
tf.height = small_size.y
tf.depth = 1
tf.array_layers = 1
tf.mipmaps = 1
tf.format = RenderingDevice.DATA_FORMAT_R32G32B32A32_SFLOAT
tf.usage_bits = RenderingDevice.TEXTURE_USAGE_SAMPLING_BIT | RenderingDevice.TEXTURE_USAGE_CAN_UPDATE_BIT | RenderingDevice.TEXTURE_USAGE_STORAGE_BIT
#+ RenderingDevice.TEXTURE_USAGE_COLOR_ATTACHMENT_BIT + RenderingDevice.TEXTURE_USAGE_STORAGE_BIT + RenderingDevice.TEXTURE_USAGE_CAN_UPDATE_BIT
var render_texture:Texture2DRD = Texture2DRD.new()
render_texture.texture_rd_rid = rd.texture_create(tf, RDTextureView.new(), [])
small_texture = render_texture.texture_rd_rid
# We can use a compute shader here.
@warning_ignore("integer_division")
var x_groups := (screen_size.x - 1) / 8 + 1
@warning_ignore("integer_division")
var y_groups := (screen_size.y - 1) / 8 + 1
var z_groups := 1
text_mult = Vector2(float(screen_size.x) / float(small_size.x), float(screen_size.y) / float(small_size.y))
# Called by the rendering thread every frame.
func _render_callback(p_effect_callback_type: EffectCallbackType, p_render_data: RenderData) -> void:
if rd and p_effect_callback_type == EFFECT_CALLBACK_TYPE_POST_TRANSPARENT and blur_pipeline.is_valid():
# If you need to compare the original to the processed version, enable these and it will turn off every other second
#var second:int = floori(Time.get_ticks_msec() / 1000.0)
#if second%2 == 0:
# return
# Get our render scene buffers object, this gives us access to our render buffers.
# Note that implementation differs per renderer hence the need for the cast.
var render_scene_buffers:RenderSceneBuffers = p_render_data.get_render_scene_buffers()
if render_scene_buffers:
# Get our render size, this is the 3D render resolution!
var size: Vector2i = render_scene_buffers.get_internal_size()
if size.x == 0 and size.y == 0:
return
if size != screen_size:
set_up_screen_size(size)
# Loop through views just in case we're doing stereo rendering. No extra cost if this is mono.
var view_count: int = render_scene_buffers.get_view_count()
for view in view_count:
# Get the RID for our color image, we will be reading from and writing to it.
var colour_image: RID = render_scene_buffers.get_color_layer(view)
# Create a uniform set, this will be cached, the cache will be cleared if our viewports configuration is changed.
var small_uniform:RDUniform = RDUniform.new()
small_uniform.uniform_type = RenderingDevice.UNIFORM_TYPE_IMAGE
small_uniform.binding = 0
small_uniform.add_id(small_texture)
var colour_uniform:RDUniform = RDUniform.new()
colour_uniform.uniform_type = RenderingDevice.UNIFORM_TYPE_IMAGE
colour_uniform.binding = 1
colour_uniform.add_id(colour_image)
var shrink_uniform_set := UniformSetCacheRD.get_cache(shrink_shader, 0, [small_uniform, colour_uniform])
var blur_uniform_set := UniformSetCacheRD.get_cache(block_shader, 0, [small_uniform, colour_uniform])
# run one compute shader that saves to a small_texture and blurs chroma values
var compute_list := rd.compute_list_begin()
rd.compute_list_bind_compute_pipeline(compute_list, shrink_pipeline)
rd.compute_list_bind_uniform_set(compute_list, shrink_uniform_set, 0)
rd.compute_list_dispatch(compute_list, small_groups.x, small_groups.y, 1)
# run another compute shader that displays small_texture to full screen
rd.compute_list_bind_compute_pipeline(compute_list, blur_pipeline)
rd.compute_list_bind_uniform_set(compute_list, blur_uniform_set, 0)
@warning_ignore("integer_division")
var x_groups := (screen_size.x - 1) / 8 + 1
@warning_ignore("integer_division")
var y_groups := (screen_size.y - 1) / 8 + 1
rd.compute_list_dispatch(compute_list, x_groups, y_groups, 1)
rd.compute_list_end()
#endregion
#[compute]
#version 450
// Invocations in the (x, y, z) dimension
layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
layout(rgba16f, set = 0, binding = 0) uniform image2D small_image;
layout(rgba16f, set = 0, binding = 1) uniform image2D color_image;
vec3 yuv2rgb(vec3 yuv) {
float y = yuv.x;
float u = yuv.y;
float v = yuv.z;
vec3 rgb = vec3(
y + 1.0 / 0.877*v,
y - 0.39393*u - 0.58081*v,
y + 1.0 / 0.493*u
);
return rgb;
}
// The code we want to execute in each invocation
void main() {
const float block_size = 16.0;
ivec2 uv = ivec2(gl_GlobalInvocationID.xy);
// get offsets for four adjacent pixels
ivec2 uv_s = ivec2(gl_GlobalInvocationID.xy / block_size) * 2;
ivec2 uv_sr = ivec2(gl_GlobalInvocationID.xy / block_size) * 2 + ivec2(1, 0);
ivec2 uv_sd = ivec2(gl_GlobalInvocationID.xy / block_size) * 2 + ivec2(0, 1);
ivec2 uv_sdr = ivec2(gl_GlobalInvocationID.xy / block_size) * 2 + ivec2(1, 1);
vec2 uv_fract = vec2(fract(gl_GlobalInvocationID.x / block_size), fract(gl_GlobalInvocationID.y / block_size));
vec3 colour_tl = imageLoad(small_image, uv_s).rgb;
vec3 colour_tr = imageLoad(small_image, uv_sr).rgb;
vec3 colour_bl = imageLoad(small_image, uv_sd).rgb;
vec3 colour_br = imageLoad(small_image, uv_sdr).rgb;
// linearly interpolate between four adjacent pixels
float mixed_lum = mix(mix(colour_tl.r, colour_tr.r, uv_fract.x), mix(colour_bl.r, colour_br.r, uv_fract.x), uv_fract.y);
imageStore(color_image, uv, vec4(yuv2rgb(vec3(mixed_lum, colour_tl.gb)), 1.0));
}
#[compute]
#version 450
// Invocations in the (x, y, z) dimension
layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
layout(rgba16f, set = 0, binding = 0) uniform image2D small_image;
layout(rgba16f, set = 0, binding = 1) uniform image2D source_image;
//based on https://www.shadertoy.com/view/3lycWz
vec3 rgb2yuv(vec3 rgb) {
float y = 0.299*rgb.r + 0.587*rgb.g + 0.114*rgb.b;
return vec3(y, 0.493*(rgb.b - y), 0.877*(rgb.r - y));
}
// The code we want to execute in each invocation
void main() {
const float posterize_levels = 256.0;
const int block_size = 16;
const int corner_offset = block_size - 1;
// round this to an block_size x block_size square
ivec2 c_tl = ivec2(gl_GlobalInvocationID.xy * block_size);
ivec2 c_tr = ivec2(gl_GlobalInvocationID.xy * block_size) + ivec2(corner_offset, 0);
ivec2 c_bl = ivec2(gl_GlobalInvocationID.xy * block_size) + ivec2(0, corner_offset);
ivec2 c_br = ivec2(gl_GlobalInvocationID.xy * block_size) + ivec2(corner_offset, corner_offset);
// sample four corners of block_size x block_size square
vec3 colour_tl = rgb2yuv(imageLoad(source_image, c_tl).rgb);
vec3 colour_tr = rgb2yuv(imageLoad(source_image, c_tr).rgb);
vec3 colour_bl = rgb2yuv(imageLoad(source_image, c_bl).rgb);
vec3 colour_br = rgb2yuv(imageLoad(source_image, c_br).rgb);
// average uv of each but leave y unique
// ?? I think it's better averaging the uv instead of averaging the rgb and then getting the uv?
vec2 average_uv = (colour_tl.gb + colour_tr.gb + colour_bl.gb + colour_br.gb) / 4.0;
//average_uv = clamp(average_uv, 0.0, 1.0);
average_uv = round(average_uv * posterize_levels) / posterize_levels;
// write four corners into output
ivec2 out_uv = ivec2(gl_GlobalInvocationID.xy * 2); // stores 2x2 values for corners
imageStore(small_image, out_uv, vec4(colour_tl.r, average_uv.rg, 1.0));
imageStore(small_image, out_uv + ivec2(1,0), vec4(colour_tr.r, average_uv.rg, 1.0));
imageStore(small_image, out_uv + ivec2(0,1), vec4(colour_bl.r, average_uv.rg, 1.0));
imageStore(small_image, out_uv + ivec2(1,1), vec4(colour_br.r, average_uv.rg, 1.0));
//imageStore(small_image, out_uv + ivec2(1,1), vec4(1.0, average_uv.rg, 1.0));
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment