partybusiness · March 20, 2025 16:20
diff --git a/compress_effect.gd b/compress_effect.gd
 @tool
 extends CompositorEffect
 class_name CompressEffect
 #
 #@tool

 const shrink_shader_path:String = "res://compression/compress_shrink_effect.glsl"

 const block_shader_path:String = "res://compression/compress_final_effect.glsl"

 var rd: RenderingDevice
 # texture_is_format_supported_for_usage
 var shrink_shader: RID
 var shrink_pipeline: RID

 var block_shader: RID
 var blur_pipeline: RID

 # small texture shrink_shader writes to and block_shader reads from
 var small_texture: RID

 # multiplier to convert small texture to full screen
 var text_mult:Vector2 = Vector2.ONE

 # size of screen
 var screen_size:Vector2i = Vector2.ZERO

 var block_size:float = 16.0 # make sure this matches block size in compute shaders

 var small_size:Vector2i = Vector2i(550, 320) # will get overridden with screen size

 var small_groups:Vector2i:
 	get:
 		return ceil(small_size / 8.0)

 func _init() -> void:
 	print("init")
 	#effect_callback_type = EFFECT_CALLBACK_TYPE_POST_TRANSPARENT
 	rd = RenderingServer.get_rendering_device()
 	RenderingServer.call_on_render_thread(_initialize_compute)
 	# check limits
 	# seems to be [count_x = 2147483647, size_x = 1536,count_inv = 1536]
 	var count_x:int = rd.limit_get(RenderingDevice.Limit.LIMIT_MAX_COMPUTE_WORKGROUP_COUNT_X)
 	var count_inv:int = rd.limit_get(RenderingDevice.Limit.LIMIT_MAX_COMPUTE_WORKGROUP_INVOCATIONS)
 	var size_x:int = rd.limit_get(RenderingDevice.Limit.LIMIT_MAX_COMPUTE_WORKGROUP_SIZE_X)
 	print("limits =[%d,%d,%d]"%[count_x, size_x, count_inv])

 # cleans up rids if 
 func _notification(what: int) -> void:
 	if what == NOTIFICATION_PREDELETE:
 		if shrink_shader.is_valid():
 			rd.free_rid(shrink_shader)
 		if block_shader.is_valid():
 			rd.free_rid(block_shader)
 		if small_texture.is_valid():
 			rd.free_rid(small_texture)


 #region Code in this region runs on the rendering thread.
 # Compile our shader at initialization.
 func _initialize_compute() -> void:
 	rd = RenderingServer.get_rendering_device()
 	print("init compute ",(rd))
 	if not rd:
 		return

 	# Compile our shader.
 	var shader_file := load(shrink_shader_path)
 	var shader_spirv: RDShaderSPIRV = shader_file.get_spirv()

 	shrink_shader = rd.shader_create_from_spirv(shader_spirv)
 	if shrink_shader.is_valid():
 		shrink_pipeline = rd.compute_pipeline_create(shrink_shader)
 		
 	shader_file = load(block_shader_path)
 	shader_spirv = shader_file.get_spirv()

 	block_shader = rd.shader_create_from_spirv(shader_spirv)
 	if block_shader.is_valid():
 		blur_pipeline = rd.compute_pipeline_create(block_shader)
 		
 func set_up_screen_size(size:Vector2i) -> void:
 	print ("new size ",size)
 	screen_size = size
 	
 	# generate 333x480 pixel texture
 	var tf:RDTextureFormat = RDTextureFormat.new()
 	tf.texture_type = RenderingDevice.TEXTURE_TYPE_2D
 	small_size = ceil((screen_size) / (block_size / 2.0)) # 2x2 for every block in source
 	print("made size ", small_size, " ", small_groups)
 	tf.width = small_size.x
 	tf.height = small_size.y
 	tf.depth = 1
 	tf.array_layers = 1
 	tf.mipmaps = 1
 	tf.format = RenderingDevice.DATA_FORMAT_R32G32B32A32_SFLOAT
 	tf.usage_bits = RenderingDevice.TEXTURE_USAGE_SAMPLING_BIT | RenderingDevice.TEXTURE_USAGE_CAN_UPDATE_BIT | RenderingDevice.TEXTURE_USAGE_STORAGE_BIT
 	#+ RenderingDevice.TEXTURE_USAGE_COLOR_ATTACHMENT_BIT + RenderingDevice.TEXTURE_USAGE_STORAGE_BIT + RenderingDevice.TEXTURE_USAGE_CAN_UPDATE_BIT
 	
 	var render_texture:Texture2DRD = Texture2DRD.new()
 	render_texture.texture_rd_rid = rd.texture_create(tf, RDTextureView.new(), [])
 	small_texture = render_texture.texture_rd_rid
 	
 	
 	# We can use a compute shader here.
 	@warning_ignore("integer_division")
 	var x_groups := (screen_size.x - 1) / 8 + 1
 	@warning_ignore("integer_division")
 	var y_groups := (screen_size.y - 1) / 8 + 1
 	var z_groups := 1
 	text_mult = Vector2(float(screen_size.x) / float(small_size.x), float(screen_size.y) / float(small_size.y))
 	

 # Called by the rendering thread every frame.
 func _render_callback(p_effect_callback_type: EffectCallbackType, p_render_data: RenderData) -> void:
 	if rd and p_effect_callback_type == EFFECT_CALLBACK_TYPE_POST_TRANSPARENT and blur_pipeline.is_valid():
 		# If you need to compare the original to the processed version, enable these and it will turn off every other second
 		#var second:int = floori(Time.get_ticks_msec() / 1000.0)
 		#if second%2 == 0:
 		#	return
 		
 		
 		# Get our render scene buffers object, this gives us access to our render buffers.
 		# Note that implementation differs per renderer hence the need for the cast.
 		var render_scene_buffers:RenderSceneBuffers = p_render_data.get_render_scene_buffers()
 		if render_scene_buffers:
 			
 			# Get our render size, this is the 3D render resolution!
 			var size: Vector2i = render_scene_buffers.get_internal_size()
 			if size.x == 0 and size.y == 0:
 				return
 			if size != screen_size:
 				set_up_screen_size(size)

 			# Loop through views just in case we're doing stereo rendering. No extra cost if this is mono.
 			var view_count: int = render_scene_buffers.get_view_count()
 			for view in view_count:
 				# Get the RID for our color image, we will be reading from and writing to it.
 				var colour_image: RID = render_scene_buffers.get_color_layer(view)
 				# Create a uniform set, this will be cached, the cache will be cleared if our viewports configuration is changed.
 				var small_uniform:RDUniform = RDUniform.new()
 				small_uniform.uniform_type = RenderingDevice.UNIFORM_TYPE_IMAGE
 				small_uniform.binding = 0
 				small_uniform.add_id(small_texture)
 				
 				var colour_uniform:RDUniform = RDUniform.new()
 				colour_uniform.uniform_type = RenderingDevice.UNIFORM_TYPE_IMAGE
 				colour_uniform.binding = 1
 				colour_uniform.add_id(colour_image)
 				var shrink_uniform_set := UniformSetCacheRD.get_cache(shrink_shader, 0, [small_uniform, colour_uniform])
 				var blur_uniform_set := UniformSetCacheRD.get_cache(block_shader, 0, [small_uniform, colour_uniform])
 				
 				# run one compute shader that saves to a small_texture and blurs chroma values
 				var compute_list := rd.compute_list_begin()
 				rd.compute_list_bind_compute_pipeline(compute_list, shrink_pipeline)
 				rd.compute_list_bind_uniform_set(compute_list, shrink_uniform_set, 0)
 				rd.compute_list_dispatch(compute_list, small_groups.x, small_groups.y, 1)

 				# run another compute shader that displays small_texture to full screen
 				rd.compute_list_bind_compute_pipeline(compute_list, blur_pipeline)
 				rd.compute_list_bind_uniform_set(compute_list, blur_uniform_set, 0)
 				@warning_ignore("integer_division")
 				var x_groups := (screen_size.x - 1) / 8 + 1
 				@warning_ignore("integer_division")
 				var y_groups := (screen_size.y - 1) / 8 + 1
 				
 				rd.compute_list_dispatch(compute_list, x_groups, y_groups, 1)
 				rd.compute_list_end()
 #endregion
diff --git a/compress_final_effect.glsl b/compress_final_effect.glsl
 #[compute]
 #version 450

 // Invocations in the (x, y, z) dimension
 layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;

 layout(rgba16f, set = 0, binding = 0) uniform image2D small_image;

 layout(rgba16f, set = 0, binding = 1) uniform image2D color_image;


 vec3 yuv2rgb(vec3 yuv) {
 	float y = yuv.x;
 	float u = yuv.y;
 	float v = yuv.z;
 	vec3 rgb = vec3(
 		y + 1.0 / 0.877*v,
 		y - 0.39393*u - 0.58081*v,
 		y + 1.0 / 0.493*u
 	);
 	return rgb;
 }


 // The code we want to execute in each invocation
 void main() {
 	const float block_size = 16.0;

 	ivec2 uv = ivec2(gl_GlobalInvocationID.xy);
 	
 	// get offsets for four adjacent pixels
 	ivec2 uv_s = ivec2(gl_GlobalInvocationID.xy / block_size) * 2;
 	ivec2 uv_sr = ivec2(gl_GlobalInvocationID.xy / block_size) * 2 + ivec2(1, 0);
 	ivec2 uv_sd = ivec2(gl_GlobalInvocationID.xy / block_size) * 2 + ivec2(0, 1);
 	ivec2 uv_sdr = ivec2(gl_GlobalInvocationID.xy / block_size) * 2 + ivec2(1, 1);
 	vec2 uv_fract = vec2(fract(gl_GlobalInvocationID.x / block_size), fract(gl_GlobalInvocationID.y / block_size));
 	
 	vec3 colour_tl = imageLoad(small_image, uv_s).rgb;
 	vec3 colour_tr = imageLoad(small_image, uv_sr).rgb;
 	vec3 colour_bl = imageLoad(small_image, uv_sd).rgb;
 	vec3 colour_br = imageLoad(small_image, uv_sdr).rgb;
 	// linearly interpolate between four adjacent pixels
 	float mixed_lum = mix(mix(colour_tl.r, colour_tr.r, uv_fract.x), mix(colour_bl.r, colour_br.r, uv_fract.x), uv_fract.y);
 	imageStore(color_image, uv, vec4(yuv2rgb(vec3(mixed_lum, colour_tl.gb)), 1.0));
 }
diff --git a/compress_shrink_effect.glsl b/compress_shrink_effect.glsl
 #[compute]
 #version 450

 // Invocations in the (x, y, z) dimension
 layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;

 layout(rgba16f, set = 0, binding = 0) uniform image2D small_image;

 layout(rgba16f, set = 0, binding = 1) uniform image2D source_image;

 //based on https://www.shadertoy.com/view/3lycWz

 vec3 rgb2yuv(vec3 rgb) {
 	float y = 0.299*rgb.r + 0.587*rgb.g + 0.114*rgb.b;
 	return vec3(y, 0.493*(rgb.b - y), 0.877*(rgb.r - y));
 }


 // The code we want to execute in each invocation
 void main() {
 	const float posterize_levels = 256.0;
 	const int block_size = 16;
 	const int corner_offset = block_size - 1;
 	
 	// round this to an block_size x block_size square
 	ivec2 c_tl = ivec2(gl_GlobalInvocationID.xy * block_size);
 	ivec2 c_tr = ivec2(gl_GlobalInvocationID.xy * block_size) + ivec2(corner_offset, 0);
 	ivec2 c_bl = ivec2(gl_GlobalInvocationID.xy * block_size) + ivec2(0, corner_offset);
 	ivec2 c_br = ivec2(gl_GlobalInvocationID.xy * block_size) + ivec2(corner_offset, corner_offset);
 	
 	// sample four corners of block_size x block_size square
 	vec3 colour_tl = rgb2yuv(imageLoad(source_image, c_tl).rgb);
 	vec3 colour_tr = rgb2yuv(imageLoad(source_image, c_tr).rgb);
 	vec3 colour_bl = rgb2yuv(imageLoad(source_image, c_bl).rgb);
 	vec3 colour_br = rgb2yuv(imageLoad(source_image, c_br).rgb);
 	
 	// average uv of each but leave y unique
 	// ?? I think it's better averaging the uv instead of averaging the rgb and then getting the uv?
 	vec2 average_uv = (colour_tl.gb + colour_tr.gb + colour_bl.gb + colour_br.gb) / 4.0;
 	//average_uv = clamp(average_uv, 0.0, 1.0);
 	average_uv = round(average_uv * posterize_levels) / posterize_levels;
 	// write four corners into output
 	
 	ivec2 out_uv = ivec2(gl_GlobalInvocationID.xy * 2); // stores 2x2 values for corners
 	
 	imageStore(small_image, out_uv, vec4(colour_tl.r, average_uv.rg, 1.0));
 	imageStore(small_image, out_uv + ivec2(1,0), vec4(colour_tr.r, average_uv.rg, 1.0));
 	imageStore(small_image, out_uv + ivec2(0,1), vec4(colour_bl.r, average_uv.rg, 1.0));
 	imageStore(small_image, out_uv + ivec2(1,1), vec4(colour_br.r, average_uv.rg, 1.0));
 	//imageStore(small_image, out_uv + ivec2(1,1), vec4(1.0, average_uv.rg, 1.0));
 }
	@tool
	extends CompositorEffect
	class_name CompressEffect
	#
	#@tool

	const shrink_shader_path:String = "res://compression/compress_shrink_effect.glsl"

	const block_shader_path:String = "res://compression/compress_final_effect.glsl"

	var rd: RenderingDevice
	# texture_is_format_supported_for_usage
	var shrink_shader: RID
	var shrink_pipeline: RID

	var block_shader: RID
	var blur_pipeline: RID

	# small texture shrink_shader writes to and block_shader reads from
	var small_texture: RID

	# multiplier to convert small texture to full screen
	var text_mult:Vector2 = Vector2.ONE

	# size of screen
	var screen_size:Vector2i = Vector2.ZERO

	var block_size:float = 16.0 # make sure this matches block size in compute shaders

	var small_size:Vector2i = Vector2i(550, 320) # will get overridden with screen size

	var small_groups:Vector2i:
	get:
	return ceil(small_size / 8.0)

	func _init() -> void:
	print("init")
	#effect_callback_type = EFFECT_CALLBACK_TYPE_POST_TRANSPARENT
	rd = RenderingServer.get_rendering_device()
	RenderingServer.call_on_render_thread(_initialize_compute)
	# check limits
	# seems to be [count_x = 2147483647, size_x = 1536,count_inv = 1536]
	var count_x:int = rd.limit_get(RenderingDevice.Limit.LIMIT_MAX_COMPUTE_WORKGROUP_COUNT_X)
	var count_inv:int = rd.limit_get(RenderingDevice.Limit.LIMIT_MAX_COMPUTE_WORKGROUP_INVOCATIONS)
	var size_x:int = rd.limit_get(RenderingDevice.Limit.LIMIT_MAX_COMPUTE_WORKGROUP_SIZE_X)
	print("limits =[%d,%d,%d]"%[count_x, size_x, count_inv])

	# cleans up rids if
	func _notification(what: int) -> void:
	if what == NOTIFICATION_PREDELETE:
	if shrink_shader.is_valid():
	rd.free_rid(shrink_shader)
	if block_shader.is_valid():
	rd.free_rid(block_shader)
	if small_texture.is_valid():
	rd.free_rid(small_texture)


	#region Code in this region runs on the rendering thread.
	# Compile our shader at initialization.
	func _initialize_compute() -> void:
	rd = RenderingServer.get_rendering_device()
	print("init compute ",(rd))
	if not rd:
	return

	# Compile our shader.
	var shader_file := load(shrink_shader_path)
	var shader_spirv: RDShaderSPIRV = shader_file.get_spirv()

	shrink_shader = rd.shader_create_from_spirv(shader_spirv)
	if shrink_shader.is_valid():
	shrink_pipeline = rd.compute_pipeline_create(shrink_shader)

	shader_file = load(block_shader_path)
	shader_spirv = shader_file.get_spirv()

	block_shader = rd.shader_create_from_spirv(shader_spirv)
	if block_shader.is_valid():
	blur_pipeline = rd.compute_pipeline_create(block_shader)

	func set_up_screen_size(size:Vector2i) -> void:
	print ("new size ",size)
	screen_size = size

	# generate 333x480 pixel texture
	var tf:RDTextureFormat = RDTextureFormat.new()
	tf.texture_type = RenderingDevice.TEXTURE_TYPE_2D
	small_size = ceil((screen_size) / (block_size / 2.0)) # 2x2 for every block in source
	print("made size ", small_size, " ", small_groups)
	tf.width = small_size.x
	tf.height = small_size.y
	tf.depth = 1
	tf.array_layers = 1
	tf.mipmaps = 1
	tf.format = RenderingDevice.DATA_FORMAT_R32G32B32A32_SFLOAT
	tf.usage_bits = RenderingDevice.TEXTURE_USAGE_SAMPLING_BIT \| RenderingDevice.TEXTURE_USAGE_CAN_UPDATE_BIT \| RenderingDevice.TEXTURE_USAGE_STORAGE_BIT
	#+ RenderingDevice.TEXTURE_USAGE_COLOR_ATTACHMENT_BIT + RenderingDevice.TEXTURE_USAGE_STORAGE_BIT + RenderingDevice.TEXTURE_USAGE_CAN_UPDATE_BIT

	var render_texture:Texture2DRD = Texture2DRD.new()
	render_texture.texture_rd_rid = rd.texture_create(tf, RDTextureView.new(), [])
	small_texture = render_texture.texture_rd_rid


	# We can use a compute shader here.
	@warning_ignore("integer_division")
	var x_groups := (screen_size.x - 1) / 8 + 1
	@warning_ignore("integer_division")
	var y_groups := (screen_size.y - 1) / 8 + 1
	var z_groups := 1
	text_mult = Vector2(float(screen_size.x) / float(small_size.x), float(screen_size.y) / float(small_size.y))


	# Called by the rendering thread every frame.
	func _render_callback(p_effect_callback_type: EffectCallbackType, p_render_data: RenderData) -> void:
	if rd and p_effect_callback_type == EFFECT_CALLBACK_TYPE_POST_TRANSPARENT and blur_pipeline.is_valid():
	# If you need to compare the original to the processed version, enable these and it will turn off every other second
	#var second:int = floori(Time.get_ticks_msec() / 1000.0)
	#if second%2 == 0:
	# return


	# Get our render scene buffers object, this gives us access to our render buffers.
	# Note that implementation differs per renderer hence the need for the cast.
	var render_scene_buffers:RenderSceneBuffers = p_render_data.get_render_scene_buffers()
	if render_scene_buffers:

	# Get our render size, this is the 3D render resolution!
	var size: Vector2i = render_scene_buffers.get_internal_size()
	if size.x == 0 and size.y == 0:
	return
	if size != screen_size:
	set_up_screen_size(size)

	# Loop through views just in case we're doing stereo rendering. No extra cost if this is mono.
	var view_count: int = render_scene_buffers.get_view_count()
	for view in view_count:
	# Get the RID for our color image, we will be reading from and writing to it.
	var colour_image: RID = render_scene_buffers.get_color_layer(view)
	# Create a uniform set, this will be cached, the cache will be cleared if our viewports configuration is changed.
	var small_uniform:RDUniform = RDUniform.new()
	small_uniform.uniform_type = RenderingDevice.UNIFORM_TYPE_IMAGE
	small_uniform.binding = 0
	small_uniform.add_id(small_texture)

	var colour_uniform:RDUniform = RDUniform.new()
	colour_uniform.uniform_type = RenderingDevice.UNIFORM_TYPE_IMAGE
	colour_uniform.binding = 1
	colour_uniform.add_id(colour_image)
	var shrink_uniform_set := UniformSetCacheRD.get_cache(shrink_shader, 0, [small_uniform, colour_uniform])
	var blur_uniform_set := UniformSetCacheRD.get_cache(block_shader, 0, [small_uniform, colour_uniform])

	# run one compute shader that saves to a small_texture and blurs chroma values
	var compute_list := rd.compute_list_begin()
	rd.compute_list_bind_compute_pipeline(compute_list, shrink_pipeline)
	rd.compute_list_bind_uniform_set(compute_list, shrink_uniform_set, 0)
	rd.compute_list_dispatch(compute_list, small_groups.x, small_groups.y, 1)

	# run another compute shader that displays small_texture to full screen
	rd.compute_list_bind_compute_pipeline(compute_list, blur_pipeline)
	rd.compute_list_bind_uniform_set(compute_list, blur_uniform_set, 0)
	@warning_ignore("integer_division")
	var x_groups := (screen_size.x - 1) / 8 + 1
	@warning_ignore("integer_division")
	var y_groups := (screen_size.y - 1) / 8 + 1

	rd.compute_list_dispatch(compute_list, x_groups, y_groups, 1)
	rd.compute_list_end()
	#endregion
	#[compute]
	#version 450

	// Invocations in the (x, y, z) dimension
	layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;

	layout(rgba16f, set = 0, binding = 0) uniform image2D small_image;

	layout(rgba16f, set = 0, binding = 1) uniform image2D color_image;


	vec3 yuv2rgb(vec3 yuv) {
	float y = yuv.x;
	float u = yuv.y;
	float v = yuv.z;
	vec3 rgb = vec3(
	y + 1.0 / 0.877*v,
	y - 0.39393u - 0.58081v,
	y + 1.0 / 0.493*u
	);
	return rgb;
	}


	// The code we want to execute in each invocation
	void main() {
	const float block_size = 16.0;

	ivec2 uv = ivec2(gl_GlobalInvocationID.xy);

	// get offsets for four adjacent pixels
	ivec2 uv_s = ivec2(gl_GlobalInvocationID.xy / block_size) * 2;
	ivec2 uv_sr = ivec2(gl_GlobalInvocationID.xy / block_size) * 2 + ivec2(1, 0);
	ivec2 uv_sd = ivec2(gl_GlobalInvocationID.xy / block_size) * 2 + ivec2(0, 1);
	ivec2 uv_sdr = ivec2(gl_GlobalInvocationID.xy / block_size) * 2 + ivec2(1, 1);
	vec2 uv_fract = vec2(fract(gl_GlobalInvocationID.x / block_size), fract(gl_GlobalInvocationID.y / block_size));

	vec3 colour_tl = imageLoad(small_image, uv_s).rgb;
	vec3 colour_tr = imageLoad(small_image, uv_sr).rgb;
	vec3 colour_bl = imageLoad(small_image, uv_sd).rgb;
	vec3 colour_br = imageLoad(small_image, uv_sdr).rgb;
	// linearly interpolate between four adjacent pixels
	float mixed_lum = mix(mix(colour_tl.r, colour_tr.r, uv_fract.x), mix(colour_bl.r, colour_br.r, uv_fract.x), uv_fract.y);
	imageStore(color_image, uv, vec4(yuv2rgb(vec3(mixed_lum, colour_tl.gb)), 1.0));
	}