Created
January 10, 2020 18:20
-
-
Save brothermechanic/9795042d5f19fcc73ef0936ba2ce56a6 to your computer and use it in GitHub Desktop.
D4686_master.diff
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff -Naur a/intern/cycles/blender/addon/engine.py b/intern/cycles/blender/addon/engine.py | |
--- a/intern/cycles/blender/addon/engine.py 2020-01-10 20:37:06.000000000 +0300 | |
+++ b/intern/cycles/blender/addon/engine.py 2020-01-10 21:01:30.634277071 +0300 | |
@@ -258,6 +258,7 @@ | |
if crl.pass_debug_bvh_traversed_instances: yield ("Debug BVH Traversed Instances", "X", 'VALUE') | |
if crl.pass_debug_bvh_intersections: yield ("Debug BVH Intersections", "X", 'VALUE') | |
if crl.pass_debug_ray_bounces: yield ("Debug Ray Bounces", "X", 'VALUE') | |
+ if crl.pass_debug_sample_count: yield ("Debug Sample Count", "X", 'VALUE') | |
if crl.use_pass_volume_direct: yield ("VolumeDir", "RGB", 'COLOR') | |
if crl.use_pass_volume_indirect: yield ("VolumeInd", "RGB", 'COLOR') | |
diff -Naur a/intern/cycles/blender/addon/properties.py b/intern/cycles/blender/addon/properties.py | |
--- a/intern/cycles/blender/addon/properties.py 2020-01-10 20:37:06.000000000 +0300 | |
+++ b/intern/cycles/blender/addon/properties.py 2020-01-10 21:06:42.827616043 +0300 | |
@@ -112,6 +112,7 @@ | |
enum_sampling_pattern = ( | |
('SOBOL', "Sobol", "Use Sobol random sampling pattern"), | |
('CORRELATED_MUTI_JITTER', "Correlated Multi-Jitter", "Use Correlated Multi-Jitter random sampling pattern"), | |
+ ('PROGRESSIVE_MUTI_JITTER', "Progressive Multi-Jitter", "Use Progressive Multi-Jitter random sampling pattern"), | |
) | |
enum_integrator = ( | |
@@ -357,6 +358,26 @@ | |
default=0, | |
) | |
+ adaptive_threshold: FloatProperty( | |
+ name="Adaptive Sampling Threshold", | |
+ description="Zero for automatic setting based on AA samples", | |
+ min=0.0, max=1.0, | |
+ default=0.0, | |
+ ) | |
+ | |
+ adaptive_min_samples: IntProperty( | |
+ name="Adaptive Min Samples", | |
+ description="Minimum AA samples for adaptive sampling. Zero for automatic setting based on AA samples", | |
+ min=0, max=4096, | |
+ default=0, | |
+ ) | |
+ | |
+ use_adaptive_sampling: BoolProperty( | |
+ name="Use adaptive sampling", | |
+ description="Automatically determine the number of samples per pixel based on a variance estimation", | |
+ default=False, | |
+ ) | |
+ | |
caustics_reflective: BoolProperty( | |
name="Reflective Caustics", | |
description="Use reflective caustics, resulting in a brighter image (more noise but added realism)", | |
@@ -1285,6 +1306,12 @@ | |
default=False, | |
update=update_render_passes, | |
) | |
+ pass_debug_sample_count: BoolProperty( | |
+ name="Debug Sample Count", | |
+ description="Number of samples/camera rays per pixel", | |
+ default=False, | |
+ update=update_render_passes, | |
+ ) | |
use_pass_volume_direct: BoolProperty( | |
name="Volume Direct", | |
diff -Naur a/intern/cycles/blender/addon/ui.py b/intern/cycles/blender/addon/ui.py | |
--- a/intern/cycles/blender/addon/ui.py 2020-01-10 20:37:06.000000000 +0300 | |
+++ b/intern/cycles/blender/addon/ui.py 2020-01-10 20:42:43.454256722 +0300 | |
@@ -188,6 +188,8 @@ | |
col.prop(cscene, "aa_samples", text="Render") | |
col.prop(cscene, "preview_aa_samples", text="Viewport") | |
+ col.prop(cscene, "use_adaptive_sampling", text="Adaptive Sampling") | |
+ | |
class CYCLES_RENDER_PT_sampling_sub_samples(CyclesButtonsPanel, Panel): | |
bl_label = "Sub Samples" | |
@@ -239,7 +241,13 @@ | |
row.prop(cscene, "seed") | |
row.prop(cscene, "use_animated_seed", text="", icon='TIME') | |
- layout.prop(cscene, "sampling_pattern", text="Pattern") | |
+ col = layout.column(align=True) | |
+ col.active = not(cscene.use_adaptive_sampling) | |
+ col.prop(cscene, "sampling_pattern", text="Pattern") | |
+ col = layout.column(align=True) | |
+ col.active = cscene.use_adaptive_sampling | |
+ col.prop(cscene, "adaptive_min_samples", text="Adaptive Min Samples") | |
+ col.prop(cscene, "adaptive_threshold", text="Adaptive Threshold") | |
layout.prop(cscene, "use_square_samples") | |
@@ -803,6 +811,8 @@ | |
col.prop(cycles_view_layer, "denoising_store_passes", text="Denoising Data") | |
col = flow.column() | |
col.prop(cycles_view_layer, "pass_debug_render_time", text="Render Time") | |
+ col = flow.column() | |
+ col.prop(cycles_view_layer, "pass_debug_sample_count", text="Sample Count") | |
layout.separator() | |
diff -Naur a/intern/cycles/blender/addon/ui.py.orig b/intern/cycles/blender/addon/ui.py.orig | |
--- a/intern/cycles/blender/addon/ui.py.orig 1970-01-01 03:00:00.000000000 +0300 | |
+++ b/intern/cycles/blender/addon/ui.py.orig 2020-01-10 20:37:06.000000000 +0300 | |
@@ -0,0 +1,2356 @@ | |
+# | |
+# Copyright 2011-2013 Blender Foundation | |
+# | |
+# Licensed under the Apache License, Version 2.0 (the "License"); | |
+# you may not use this file except in compliance with the License. | |
+# You may obtain a copy of the License at | |
+# | |
+# http://www.apache.org/licenses/LICENSE-2.0 | |
+# | |
+# Unless required by applicable law or agreed to in writing, software | |
+# distributed under the License is distributed on an "AS IS" BASIS, | |
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
+# See the License for the specific language governing permissions and | |
+# limitations under the License. | |
+# | |
+ | |
+# <pep8 compliant> | |
+ | |
+import bpy | |
+from bpy_extras.node_utils import find_node_input | |
+from bl_ui.utils import PresetPanel | |
+ | |
+from bpy.types import Panel | |
+ | |
+from bl_ui.properties_grease_pencil_common import GreasePencilSimplifyPanel | |
+ | |
+ | |
+class CYCLES_PT_sampling_presets(PresetPanel, Panel): | |
+ bl_label = "Sampling Presets" | |
+ preset_subdir = "cycles/sampling" | |
+ preset_operator = "script.execute_preset" | |
+ preset_add_operator = "render.cycles_sampling_preset_add" | |
+ COMPAT_ENGINES = {'CYCLES'} | |
+ | |
+ | |
+class CYCLES_PT_integrator_presets(PresetPanel, Panel): | |
+ bl_label = "Integrator Presets" | |
+ preset_subdir = "cycles/integrator" | |
+ preset_operator = "script.execute_preset" | |
+ preset_add_operator = "render.cycles_integrator_preset_add" | |
+ COMPAT_ENGINES = {'CYCLES'} | |
+ | |
+ | |
+class CyclesButtonsPanel: | |
+ bl_space_type = "PROPERTIES" | |
+ bl_region_type = "WINDOW" | |
+ bl_context = "render" | |
+ COMPAT_ENGINES = {'CYCLES'} | |
+ | |
+ @classmethod | |
+ def poll(cls, context): | |
+ return context.engine in cls.COMPAT_ENGINES | |
+ | |
+ | |
+# Adapt properties editor panel to display in node editor. We have to | |
+# copy the class rather than inherit due to the way bpy registration works. | |
+def node_panel(cls): | |
+ node_cls = type('NODE_' + cls.__name__, cls.__bases__, dict(cls.__dict__)) | |
+ | |
+ node_cls.bl_space_type = 'NODE_EDITOR' | |
+ node_cls.bl_region_type = 'UI' | |
+ node_cls.bl_category = "Options" | |
+ if hasattr(node_cls, 'bl_parent_id'): | |
+ node_cls.bl_parent_id = 'NODE_' + node_cls.bl_parent_id | |
+ | |
+ return node_cls | |
+ | |
+ | |
+def get_device_type(context): | |
+ return context.preferences.addons[__package__].preferences.compute_device_type | |
+ | |
+ | |
+def use_cpu(context): | |
+ cscene = context.scene.cycles | |
+ | |
+ return (get_device_type(context) == 'NONE' or cscene.device == 'CPU') | |
+ | |
+ | |
+def use_opencl(context): | |
+ cscene = context.scene.cycles | |
+ | |
+ return (get_device_type(context) == 'OPENCL' and cscene.device == 'GPU') | |
+ | |
+ | |
+def use_cuda(context): | |
+ cscene = context.scene.cycles | |
+ | |
+ return (get_device_type(context) == 'CUDA' and cscene.device == 'GPU') | |
+ | |
+ | |
+def use_optix(context): | |
+ cscene = context.scene.cycles | |
+ | |
+ return (get_device_type(context) == 'OPTIX' and cscene.device == 'GPU') | |
+ | |
+ | |
+def use_branched_path(context): | |
+ cscene = context.scene.cycles | |
+ | |
+ return (cscene.progressive == 'BRANCHED_PATH' and not use_optix(context)) | |
+ | |
+ | |
+def use_sample_all_lights(context): | |
+ cscene = context.scene.cycles | |
+ | |
+ return cscene.sample_all_lights_direct or cscene.sample_all_lights_indirect | |
+ | |
+ | |
+def show_device_active(context): | |
+ cscene = context.scene.cycles | |
+ if cscene.device != 'GPU': | |
+ return True | |
+ return context.preferences.addons[__package__].preferences.has_active_device() | |
+ | |
+ | |
+def draw_samples_info(layout, context): | |
+ cscene = context.scene.cycles | |
+ integrator = cscene.progressive | |
+ | |
+ # Calculate sample values | |
+ if integrator == 'PATH': | |
+ aa = cscene.samples | |
+ if cscene.use_square_samples: | |
+ aa = aa * aa | |
+ else: | |
+ aa = cscene.aa_samples | |
+ d = cscene.diffuse_samples | |
+ g = cscene.glossy_samples | |
+ t = cscene.transmission_samples | |
+ ao = cscene.ao_samples | |
+ ml = cscene.mesh_light_samples | |
+ sss = cscene.subsurface_samples | |
+ vol = cscene.volume_samples | |
+ | |
+ if cscene.use_square_samples: | |
+ aa = aa * aa | |
+ d = d * d | |
+ g = g * g | |
+ t = t * t | |
+ ao = ao * ao | |
+ ml = ml * ml | |
+ sss = sss * sss | |
+ vol = vol * vol | |
+ | |
+ # Draw interface | |
+ # Do not draw for progressive, when Square Samples are disabled | |
+ if use_branched_path(context) or (cscene.use_square_samples and integrator == 'PATH'): | |
+ col = layout.column(align=True) | |
+ col.scale_y = 0.6 | |
+ col.label(text="Total Samples:") | |
+ col.separator() | |
+ if integrator == 'PATH': | |
+ col.label(text="%s AA" % aa) | |
+ else: | |
+ col.label(text="%s AA, %s Diffuse, %s Glossy, %s Transmission" % | |
+ (aa, d * aa, g * aa, t * aa)) | |
+ col.separator() | |
+ col.label(text="%s AO, %s Mesh Light, %s Subsurface, %s Volume" % | |
+ (ao * aa, ml * aa, sss * aa, vol * aa)) | |
+ | |
+ | |
+class CYCLES_RENDER_PT_sampling(CyclesButtonsPanel, Panel): | |
+ bl_label = "Sampling" | |
+ | |
+ def draw_header_preset(self, context): | |
+ CYCLES_PT_sampling_presets.draw_panel_header(self.layout) | |
+ | |
+ def draw(self, context): | |
+ layout = self.layout | |
+ | |
+ scene = context.scene | |
+ cscene = scene.cycles | |
+ | |
+ layout.use_property_split = True | |
+ layout.use_property_decorate = False | |
+ | |
+ if not use_optix(context): | |
+ layout.prop(cscene, "progressive") | |
+ | |
+ if cscene.progressive == 'PATH' or use_branched_path(context) is False: | |
+ col = layout.column(align=True) | |
+ col.prop(cscene, "samples", text="Render") | |
+ col.prop(cscene, "preview_samples", text="Viewport") | |
+ | |
+ draw_samples_info(layout, context) | |
+ else: | |
+ col = layout.column(align=True) | |
+ col.prop(cscene, "aa_samples", text="Render") | |
+ col.prop(cscene, "preview_aa_samples", text="Viewport") | |
+ | |
+ | |
+class CYCLES_RENDER_PT_sampling_sub_samples(CyclesButtonsPanel, Panel): | |
+ bl_label = "Sub Samples" | |
+ bl_parent_id = "CYCLES_RENDER_PT_sampling" | |
+ | |
+ @classmethod | |
+ def poll(cls, context): | |
+ scene = context.scene | |
+ cscene = scene.cycles | |
+ return cscene.progressive != 'PATH' and use_branched_path(context) | |
+ | |
+ def draw(self, context): | |
+ layout = self.layout | |
+ layout.use_property_split = True | |
+ layout.use_property_decorate = False | |
+ | |
+ scene = context.scene | |
+ cscene = scene.cycles | |
+ | |
+ col = layout.column(align=True) | |
+ col.prop(cscene, "diffuse_samples", text="Diffuse") | |
+ col.prop(cscene, "glossy_samples", text="Glossy") | |
+ col.prop(cscene, "transmission_samples", text="Transmission") | |
+ col.prop(cscene, "ao_samples", text="AO") | |
+ | |
+ sub = col.row(align=True) | |
+ sub.active = use_sample_all_lights(context) | |
+ sub.prop(cscene, "mesh_light_samples", text="Mesh Light") | |
+ col.prop(cscene, "subsurface_samples", text="Subsurface") | |
+ col.prop(cscene, "volume_samples", text="Volume") | |
+ | |
+ draw_samples_info(layout, context) | |
+ | |
+ | |
+class CYCLES_RENDER_PT_sampling_advanced(CyclesButtonsPanel, Panel): | |
+ bl_label = "Advanced" | |
+ bl_parent_id = "CYCLES_RENDER_PT_sampling" | |
+ bl_options = {'DEFAULT_CLOSED'} | |
+ | |
+ def draw(self, context): | |
+ layout = self.layout | |
+ layout.use_property_split = True | |
+ layout.use_property_decorate = False | |
+ | |
+ scene = context.scene | |
+ cscene = scene.cycles | |
+ | |
+ row = layout.row(align=True) | |
+ row.prop(cscene, "seed") | |
+ row.prop(cscene, "use_animated_seed", text="", icon='TIME') | |
+ | |
+ layout.prop(cscene, "sampling_pattern", text="Pattern") | |
+ | |
+ layout.prop(cscene, "use_square_samples") | |
+ | |
+ layout.separator() | |
+ | |
+ col = layout.column(align=True) | |
+ col.prop(cscene, "min_light_bounces") | |
+ col.prop(cscene, "min_transparent_bounces") | |
+ col.prop(cscene, "light_sampling_threshold", text="Light Threshold") | |
+ | |
+ if cscene.progressive != 'PATH' and use_branched_path(context): | |
+ col = layout.column(align=True) | |
+ col.prop(cscene, "sample_all_lights_direct") | |
+ col.prop(cscene, "sample_all_lights_indirect") | |
+ | |
+ for view_layer in scene.view_layers: | |
+ if view_layer.samples > 0: | |
+ layout.separator() | |
+ layout.row().prop(cscene, "use_layer_samples") | |
+ break | |
+ | |
+ | |
+class CYCLES_RENDER_PT_sampling_total(CyclesButtonsPanel, Panel): | |
+ bl_label = "Total Samples" | |
+ bl_parent_id = "CYCLES_RENDER_PT_sampling" | |
+ | |
+ @classmethod | |
+ def poll(cls, context): | |
+ scene = context.scene | |
+ cscene = scene.cycles | |
+ | |
+ if cscene.use_square_samples: | |
+ return True | |
+ | |
+ return cscene.progressive != 'PATH' and use_branched_path(context) | |
+ | |
+ def draw(self, context): | |
+ layout = self.layout | |
+ cscene = context.scene.cycles | |
+ integrator = cscene.progressive | |
+ | |
+ # Calculate sample values | |
+ if integrator == 'PATH': | |
+ aa = cscene.samples | |
+ if cscene.use_square_samples: | |
+ aa = aa * aa | |
+ else: | |
+ aa = cscene.aa_samples | |
+ d = cscene.diffuse_samples | |
+ g = cscene.glossy_samples | |
+ t = cscene.transmission_samples | |
+ ao = cscene.ao_samples | |
+ ml = cscene.mesh_light_samples | |
+ sss = cscene.subsurface_samples | |
+ vol = cscene.volume_samples | |
+ | |
+ if cscene.use_square_samples: | |
+ aa = aa * aa | |
+ d = d * d | |
+ g = g * g | |
+ t = t * t | |
+ ao = ao * ao | |
+ ml = ml * ml | |
+ sss = sss * sss | |
+ vol = vol * vol | |
+ | |
+ col = layout.column(align=True) | |
+ col.scale_y = 0.6 | |
+ if integrator == 'PATH': | |
+ col.label(text="%s AA" % aa) | |
+ else: | |
+ col.label(text="%s AA, %s Diffuse, %s Glossy, %s Transmission" % | |
+ (aa, d * aa, g * aa, t * aa)) | |
+ col.separator() | |
+ col.label(text="%s AO, %s Mesh Light, %s Subsurface, %s Volume" % | |
+ (ao * aa, ml * aa, sss * aa, vol * aa)) | |
+ | |
+ | |
+class CYCLES_RENDER_PT_subdivision(CyclesButtonsPanel, Panel): | |
+ bl_label = "Subdivision" | |
+ bl_options = {'DEFAULT_CLOSED'} | |
+ | |
+ @classmethod | |
+ def poll(cls, context): | |
+ return (context.scene.render.engine == 'CYCLES') and (context.scene.cycles.feature_set == 'EXPERIMENTAL') | |
+ | |
+ def draw(self, context): | |
+ layout = self.layout | |
+ layout.use_property_split = True | |
+ layout.use_property_decorate = False | |
+ | |
+ scene = context.scene | |
+ cscene = scene.cycles | |
+ | |
+ col = layout.column() | |
+ sub = col.column(align=True) | |
+ sub.prop(cscene, "dicing_rate", text="Dicing Rate Render") | |
+ sub.prop(cscene, "preview_dicing_rate", text="Preview") | |
+ | |
+ col.separator() | |
+ | |
+ col.prop(cscene, "offscreen_dicing_scale", text="Offscreen Scale") | |
+ col.prop(cscene, "max_subdivisions") | |
+ | |
+ col.prop(cscene, "dicing_camera") | |
+ | |
+ | |
+class CYCLES_RENDER_PT_hair(CyclesButtonsPanel, Panel): | |
+ bl_label = "Hair" | |
+ bl_options = {'DEFAULT_CLOSED'} | |
+ | |
+ def draw_header(self, context): | |
+ layout = self.layout | |
+ scene = context.scene | |
+ ccscene = scene.cycles_curves | |
+ | |
+ layout.prop(ccscene, "use_curves", text="") | |
+ | |
+ def draw(self, context): | |
+ layout = self.layout | |
+ layout.use_property_split = True | |
+ layout.use_property_decorate = False | |
+ | |
+ scene = context.scene | |
+ ccscene = scene.cycles_curves | |
+ | |
+ layout.active = ccscene.use_curves | |
+ | |
+ col = layout.column() | |
+ col.prop(ccscene, "shape", text="Shape") | |
+ if not (ccscene.primitive in {'CURVE_SEGMENTS', 'LINE_SEGMENTS'} and ccscene.shape == 'RIBBONS'): | |
+ col.prop(ccscene, "cull_backfacing", text="Cull back-faces") | |
+ col.prop(ccscene, "primitive", text="Primitive") | |
+ | |
+ if ccscene.primitive == 'TRIANGLES' and ccscene.shape == 'THICK': | |
+ col.prop(ccscene, "resolution", text="Resolution") | |
+ elif ccscene.primitive == 'CURVE_SEGMENTS': | |
+ col.prop(ccscene, "subdivisions", text="Curve subdivisions") | |
+ | |
+ | |
+class CYCLES_RENDER_PT_volumes(CyclesButtonsPanel, Panel): | |
+ bl_label = "Volumes" | |
+ bl_options = {'DEFAULT_CLOSED'} | |
+ | |
+ def draw(self, context): | |
+ layout = self.layout | |
+ layout.use_property_split = True | |
+ layout.use_property_decorate = False | |
+ | |
+ scene = context.scene | |
+ cscene = scene.cycles | |
+ | |
+ col = layout.column() | |
+ col.prop(cscene, "volume_step_size", text="Step Size") | |
+ col.prop(cscene, "volume_max_steps", text="Max Steps") | |
+ | |
+ | |
+class CYCLES_RENDER_PT_light_paths(CyclesButtonsPanel, Panel): | |
+ bl_label = "Light Paths" | |
+ bl_options = {'DEFAULT_CLOSED'} | |
+ | |
+ def draw_header_preset(self, context): | |
+ CYCLES_PT_integrator_presets.draw_panel_header(self.layout) | |
+ | |
+ def draw(self, context): | |
+ pass | |
+ | |
+ | |
+class CYCLES_RENDER_PT_light_paths_max_bounces(CyclesButtonsPanel, Panel): | |
+ bl_label = "Max Bounces" | |
+ bl_parent_id = "CYCLES_RENDER_PT_light_paths" | |
+ | |
+ def draw(self, context): | |
+ layout = self.layout | |
+ layout.use_property_split = True | |
+ layout.use_property_decorate = False | |
+ | |
+ scene = context.scene | |
+ cscene = scene.cycles | |
+ | |
+ col = layout.column(align=True) | |
+ col.prop(cscene, "max_bounces", text="Total") | |
+ | |
+ col = layout.column(align=True) | |
+ col.prop(cscene, "diffuse_bounces", text="Diffuse") | |
+ col.prop(cscene, "glossy_bounces", text="Glossy") | |
+ col.prop(cscene, "transparent_max_bounces", text="Transparency") | |
+ col.prop(cscene, "transmission_bounces", text="Transmission") | |
+ col.prop(cscene, "volume_bounces", text="Volume") | |
+ | |
+ | |
+class CYCLES_RENDER_PT_light_paths_clamping(CyclesButtonsPanel, Panel): | |
+ bl_label = "Clamping" | |
+ bl_parent_id = "CYCLES_RENDER_PT_light_paths" | |
+ | |
+ def draw(self, context): | |
+ layout = self.layout | |
+ layout.use_property_split = True | |
+ layout.use_property_decorate = False | |
+ | |
+ scene = context.scene | |
+ cscene = scene.cycles | |
+ | |
+ col = layout.column(align=True) | |
+ col.prop(cscene, "sample_clamp_direct", text="Direct Light") | |
+ col.prop(cscene, "sample_clamp_indirect", text="Indirect Light") | |
+ | |
+ | |
+class CYCLES_RENDER_PT_light_paths_caustics(CyclesButtonsPanel, Panel): | |
+ bl_label = "Caustics" | |
+ bl_parent_id = "CYCLES_RENDER_PT_light_paths" | |
+ | |
+ def draw(self, context): | |
+ layout = self.layout | |
+ layout.use_property_split = True | |
+ layout.use_property_decorate = False | |
+ | |
+ scene = context.scene | |
+ cscene = scene.cycles | |
+ | |
+ col = layout.column() | |
+ col.prop(cscene, "blur_glossy") | |
+ col.prop(cscene, "caustics_reflective") | |
+ col.prop(cscene, "caustics_refractive") | |
+ | |
+ | |
+class CYCLES_RENDER_PT_motion_blur(CyclesButtonsPanel, Panel): | |
+ bl_label = "Motion Blur" | |
+ bl_options = {'DEFAULT_CLOSED'} | |
+ | |
+ def draw_header(self, context): | |
+ rd = context.scene.render | |
+ | |
+ self.layout.prop(rd, "use_motion_blur", text="") | |
+ | |
+ def draw(self, context): | |
+ layout = self.layout | |
+ layout.use_property_split = True | |
+ layout.use_property_decorate = False | |
+ | |
+ scene = context.scene | |
+ cscene = scene.cycles | |
+ rd = scene.render | |
+ layout.active = rd.use_motion_blur | |
+ | |
+ col = layout.column() | |
+ col.prop(cscene, "motion_blur_position", text="Position") | |
+ col.prop(rd, "motion_blur_shutter") | |
+ col.separator() | |
+ col.prop(cscene, "rolling_shutter_type", text="Rolling Shutter") | |
+ sub = col.column() | |
+ sub.active = cscene.rolling_shutter_type != 'NONE' | |
+ sub.prop(cscene, "rolling_shutter_duration") | |
+ | |
+ | |
+class CYCLES_RENDER_PT_motion_blur_curve(CyclesButtonsPanel, Panel): | |
+ bl_label = "Shutter Curve" | |
+ bl_parent_id = "CYCLES_RENDER_PT_motion_blur" | |
+ bl_options = {'DEFAULT_CLOSED'} | |
+ | |
+ def draw(self, context): | |
+ layout = self.layout | |
+ layout.use_property_split = True | |
+ layout.use_property_decorate = False | |
+ | |
+ scene = context.scene | |
+ rd = scene.render | |
+ layout.active = rd.use_motion_blur | |
+ | |
+ col = layout.column() | |
+ | |
+ col.template_curve_mapping(rd, "motion_blur_shutter_curve") | |
+ | |
+ col = layout.column(align=True) | |
+ row = col.row(align=True) | |
+ row.operator("render.shutter_curve_preset", icon='SMOOTHCURVE', text="").shape = 'SMOOTH' | |
+ row.operator("render.shutter_curve_preset", icon='SPHERECURVE', text="").shape = 'ROUND' | |
+ row.operator("render.shutter_curve_preset", icon='ROOTCURVE', text="").shape = 'ROOT' | |
+ row.operator("render.shutter_curve_preset", icon='SHARPCURVE', text="").shape = 'SHARP' | |
+ row.operator("render.shutter_curve_preset", icon='LINCURVE', text="").shape = 'LINE' | |
+ row.operator("render.shutter_curve_preset", icon='NOCURVE', text="").shape = 'MAX' | |
+ | |
+ | |
+class CYCLES_RENDER_PT_film(CyclesButtonsPanel, Panel): | |
+ bl_label = "Film" | |
+ bl_options = {'DEFAULT_CLOSED'} | |
+ | |
+ def draw(self, context): | |
+ layout = self.layout | |
+ layout.use_property_split = True | |
+ layout.use_property_decorate = False | |
+ scene = context.scene | |
+ cscene = scene.cycles | |
+ | |
+ col = layout.column() | |
+ col.prop(cscene, "film_exposure") | |
+ | |
+ | |
+class CYCLES_RENDER_PT_film_transparency(CyclesButtonsPanel, Panel): | |
+ bl_label = "Transparent" | |
+ bl_parent_id = "CYCLES_RENDER_PT_film" | |
+ | |
+ def draw_header(self, context): | |
+ layout = self.layout | |
+ | |
+ scene = context.scene | |
+ rd = scene.render | |
+ | |
+ layout.prop(rd, "film_transparent", text="") | |
+ | |
+ def draw(self, context): | |
+ layout = self.layout | |
+ layout.use_property_split = True | |
+ layout.use_property_decorate = False | |
+ scene = context.scene | |
+ rd = scene.render | |
+ cscene = scene.cycles | |
+ | |
+ layout.active = rd.film_transparent | |
+ | |
+ col = layout.column() | |
+ col.prop(cscene, "film_transparent_glass", text="Transparent Glass") | |
+ | |
+ sub = col.column() | |
+ sub.active = rd.film_transparent and cscene.film_transparent_glass | |
+ sub.prop(cscene, "film_transparent_roughness", text="Roughness Threshold") | |
+ | |
+ | |
+class CYCLES_RENDER_PT_film_pixel_filter(CyclesButtonsPanel, Panel): | |
+ bl_label = "Pixel Filter" | |
+ bl_parent_id = "CYCLES_RENDER_PT_film" | |
+ | |
+ def draw(self, context): | |
+ layout = self.layout | |
+ layout.use_property_split = True | |
+ layout.use_property_decorate = False | |
+ scene = context.scene | |
+ cscene = scene.cycles | |
+ | |
+ col = layout.column() | |
+ col.prop(cscene, "pixel_filter_type", text="Type") | |
+ if cscene.pixel_filter_type != 'BOX': | |
+ col.prop(cscene, "filter_width", text="Width") | |
+ | |
+ | |
+class CYCLES_RENDER_PT_performance(CyclesButtonsPanel, Panel): | |
+ bl_label = "Performance" | |
+ bl_options = {'DEFAULT_CLOSED'} | |
+ | |
+ def draw(self, context): | |
+ pass | |
+ | |
+ | |
+class CYCLES_RENDER_PT_performance_threads(CyclesButtonsPanel, Panel): | |
+ bl_label = "Threads" | |
+ bl_parent_id = "CYCLES_RENDER_PT_performance" | |
+ | |
+ def draw(self, context): | |
+ layout = self.layout | |
+ layout.use_property_split = True | |
+ layout.use_property_decorate = False | |
+ | |
+ scene = context.scene | |
+ rd = scene.render | |
+ | |
+ col = layout.column() | |
+ | |
+ col.prop(rd, "threads_mode") | |
+ sub = col.column(align=True) | |
+ sub.enabled = rd.threads_mode == 'FIXED' | |
+ sub.prop(rd, "threads") | |
+ | |
+ | |
+class CYCLES_RENDER_PT_performance_tiles(CyclesButtonsPanel, Panel): | |
+ bl_label = "Tiles" | |
+ bl_parent_id = "CYCLES_RENDER_PT_performance" | |
+ | |
+ def draw(self, context): | |
+ layout = self.layout | |
+ layout.use_property_split = True | |
+ layout.use_property_decorate = False | |
+ | |
+ scene = context.scene | |
+ rd = scene.render | |
+ cscene = scene.cycles | |
+ | |
+ col = layout.column() | |
+ | |
+ sub = col.column(align=True) | |
+ sub.prop(rd, "tile_x", text="Tiles X") | |
+ sub.prop(rd, "tile_y", text="Y") | |
+ col.prop(cscene, "tile_order", text="Order") | |
+ | |
+ sub = col.column() | |
+ sub.active = not rd.use_save_buffers | |
+ for view_layer in scene.view_layers: | |
+ if view_layer.cycles.use_denoising: | |
+ sub.active = False | |
+ sub.prop(cscene, "use_progressive_refine") | |
+ | |
+ | |
+class CYCLES_RENDER_PT_performance_acceleration_structure(CyclesButtonsPanel, Panel): | |
+ bl_label = "Acceleration Structure" | |
+ bl_parent_id = "CYCLES_RENDER_PT_performance" | |
+ | |
+ def draw(self, context): | |
+ import _cycles | |
+ | |
+ layout = self.layout | |
+ layout.use_property_split = True | |
+ layout.use_property_decorate = False | |
+ | |
+ scene = context.scene | |
+ cscene = scene.cycles | |
+ | |
+ col = layout.column() | |
+ | |
+ if _cycles.with_embree: | |
+ row = col.row() | |
+ row.active = use_cpu(context) | |
+ row.prop(cscene, "use_bvh_embree") | |
+ col.prop(cscene, "debug_use_spatial_splits") | |
+ sub = col.column() | |
+ sub.active = not cscene.use_bvh_embree or not _cycles.with_embree | |
+ sub.prop(cscene, "debug_use_hair_bvh") | |
+ sub = col.column() | |
+ sub.active = not cscene.debug_use_spatial_splits and not cscene.use_bvh_embree | |
+ sub.prop(cscene, "debug_bvh_time_steps") | |
+ | |
+ | |
+class CYCLES_RENDER_PT_performance_final_render(CyclesButtonsPanel, Panel): | |
+ bl_label = "Final Render" | |
+ bl_parent_id = "CYCLES_RENDER_PT_performance" | |
+ | |
+ def draw(self, context): | |
+ layout = self.layout | |
+ layout.use_property_split = True | |
+ layout.use_property_decorate = False | |
+ | |
+ scene = context.scene | |
+ rd = scene.render | |
+ | |
+ col = layout.column() | |
+ | |
+ col.prop(rd, "use_save_buffers") | |
+ col.prop(rd, "use_persistent_data", text="Persistent Images") | |
+ | |
+ | |
+class CYCLES_RENDER_PT_performance_viewport(CyclesButtonsPanel, Panel): | |
+ bl_label = "Viewport" | |
+ bl_parent_id = "CYCLES_RENDER_PT_performance" | |
+ | |
+ def draw(self, context): | |
+ layout = self.layout | |
+ layout.use_property_split = True | |
+ layout.use_property_decorate = False | |
+ | |
+ scene = context.scene | |
+ rd = scene.render | |
+ cscene = scene.cycles | |
+ | |
+ col = layout.column() | |
+ col.prop(rd, "preview_pixel_size", text="Pixel Size") | |
+ col.prop(cscene, "preview_start_resolution", text="Start Pixels") | |
+ | |
+ | |
+class CYCLES_RENDER_PT_filter(CyclesButtonsPanel, Panel): | |
+ bl_label = "Filter" | |
+ bl_options = {'DEFAULT_CLOSED'} | |
+ bl_context = "view_layer" | |
+ | |
+ def draw(self, context): | |
+ layout = self.layout | |
+ layout.use_property_split = True | |
+ layout.use_property_decorate = False | |
+ | |
+ with_freestyle = bpy.app.build_options.freestyle | |
+ | |
+ scene = context.scene | |
+ rd = scene.render | |
+ view_layer = context.view_layer | |
+ | |
+ flow = layout.grid_flow(row_major=True, columns=0, even_columns=True, even_rows=False, align=False) | |
+ | |
+ col = flow.column() | |
+ col.prop(view_layer, "use_sky", text="Environment") | |
+ col = flow.column() | |
+ col.prop(view_layer, "use_ao", text="Ambient Occlusion") | |
+ col = flow.column() | |
+ col.prop(view_layer, "use_solid", text="Surfaces") | |
+ col = flow.column() | |
+ col.prop(view_layer, "use_strand", text="Hair") | |
+ if with_freestyle: | |
+ col = flow.column() | |
+ col.prop(view_layer, "use_freestyle", text="Freestyle") | |
+ col.active = rd.use_freestyle | |
+ | |
+ | |
+class CYCLES_RENDER_PT_override(CyclesButtonsPanel, Panel): | |
+ bl_label = "Override" | |
+ bl_options = {'DEFAULT_CLOSED'} | |
+ bl_context = "view_layer" | |
+ | |
+ def draw(self, context): | |
+ layout = self.layout | |
+ layout.use_property_split = True | |
+ layout.use_property_decorate = False | |
+ | |
+ view_layer = context.view_layer | |
+ | |
+ layout.prop(view_layer, "material_override") | |
+ layout.prop(view_layer, "samples") | |
+ | |
+ | |
+class CYCLES_RENDER_PT_passes(CyclesButtonsPanel, Panel): | |
+ bl_label = "Passes" | |
+ bl_context = "view_layer" | |
+ | |
+ def draw(self, context): | |
+ pass | |
+ | |
+ | |
+class CYCLES_RENDER_PT_passes_data(CyclesButtonsPanel, Panel): | |
+ bl_label = "Data" | |
+ bl_context = "view_layer" | |
+ bl_parent_id = "CYCLES_RENDER_PT_passes" | |
+ | |
+ def draw(self, context): | |
+ layout = self.layout | |
+ layout.use_property_split = True | |
+ layout.use_property_decorate = False | |
+ | |
+ scene = context.scene | |
+ rd = scene.render | |
+ view_layer = context.view_layer | |
+ cycles_view_layer = view_layer.cycles | |
+ | |
+ flow = layout.grid_flow(row_major=True, columns=0, even_columns=True, even_rows=False, align=False) | |
+ col = flow.column() | |
+ col.prop(view_layer, "use_pass_combined") | |
+ col = flow.column() | |
+ col.prop(view_layer, "use_pass_z") | |
+ col = flow.column() | |
+ col.prop(view_layer, "use_pass_mist") | |
+ col = flow.column() | |
+ col.prop(view_layer, "use_pass_normal") | |
+ col = flow.column() | |
+ col.prop(view_layer, "use_pass_vector") | |
+ col.active = not rd.use_motion_blur | |
+ col = flow.column() | |
+ col.prop(view_layer, "use_pass_uv") | |
+ col = flow.column() | |
+ col.prop(view_layer, "use_pass_object_index") | |
+ col = flow.column() | |
+ col.prop(view_layer, "use_pass_material_index") | |
+ | |
+ layout.separator() | |
+ | |
+ flow = layout.grid_flow(row_major=True, columns=0, even_columns=True, even_rows=False, align=False) | |
+ col = flow.column() | |
+ col.prop(cycles_view_layer, "denoising_store_passes", text="Denoising Data") | |
+ col = flow.column() | |
+ col.prop(cycles_view_layer, "pass_debug_render_time", text="Render Time") | |
+ | |
+ layout.separator() | |
+ | |
+ layout.prop(view_layer, "pass_alpha_threshold") | |
+ | |
+ | |
+class CYCLES_RENDER_PT_passes_light(CyclesButtonsPanel, Panel): | |
+ bl_label = "Light" | |
+ bl_context = "view_layer" | |
+ bl_parent_id = "CYCLES_RENDER_PT_passes" | |
+ | |
+ def draw(self, context): | |
+ layout = self.layout | |
+ layout.use_property_split = True | |
+ layout.use_property_decorate = False | |
+ | |
+ view_layer = context.view_layer | |
+ cycles_view_layer = view_layer.cycles | |
+ | |
+ split = layout.split(factor=0.35) | |
+ split.use_property_split = False | |
+ split.label(text="Diffuse") | |
+ row = split.row(align=True) | |
+ row.prop(view_layer, "use_pass_diffuse_direct", text="Direct", toggle=True) | |
+ row.prop(view_layer, "use_pass_diffuse_indirect", text="Indirect", toggle=True) | |
+ row.prop(view_layer, "use_pass_diffuse_color", text="Color", toggle=True) | |
+ | |
+ split = layout.split(factor=0.35) | |
+ split.use_property_split = False | |
+ split.label(text="Glossy") | |
+ row = split.row(align=True) | |
+ row.prop(view_layer, "use_pass_glossy_direct", text="Direct", toggle=True) | |
+ row.prop(view_layer, "use_pass_glossy_indirect", text="Indirect", toggle=True) | |
+ row.prop(view_layer, "use_pass_glossy_color", text="Color", toggle=True) | |
+ | |
+ split = layout.split(factor=0.35) | |
+ split.use_property_split = False | |
+ split.label(text="Transmission") | |
+ row = split.row(align=True) | |
+ row.prop(view_layer, "use_pass_transmission_direct", text="Direct", toggle=True) | |
+ row.prop(view_layer, "use_pass_transmission_indirect", text="Indirect", toggle=True) | |
+ row.prop(view_layer, "use_pass_transmission_color", text="Color", toggle=True) | |
+ | |
+ split = layout.split(factor=0.35) | |
+ split.use_property_split = False | |
+ split.label(text="Subsurface") | |
+ row = split.row(align=True) | |
+ row.prop(view_layer, "use_pass_subsurface_direct", text="Direct", toggle=True) | |
+ row.prop(view_layer, "use_pass_subsurface_indirect", text="Indirect", toggle=True) | |
+ row.prop(view_layer, "use_pass_subsurface_color", text="Color", toggle=True) | |
+ | |
+ split = layout.split(factor=0.35) | |
+ split.use_property_split = False | |
+ split.label(text="Volume") | |
+ row = split.row(align=True) | |
+ row.prop(cycles_view_layer, "use_pass_volume_direct", text="Direct", toggle=True) | |
+ row.prop(cycles_view_layer, "use_pass_volume_indirect", text="Indirect", toggle=True) | |
+ | |
+ col = layout.column(align=True) | |
+ col.prop(view_layer, "use_pass_emit", text="Emission") | |
+ col.prop(view_layer, "use_pass_environment") | |
+ col.prop(view_layer, "use_pass_shadow") | |
+ col.prop(view_layer, "use_pass_ambient_occlusion", text="Ambient Occlusion") | |
+ | |
+ | |
+class CYCLES_RENDER_PT_passes_crypto(CyclesButtonsPanel, Panel): | |
+ bl_label = "Cryptomatte" | |
+ bl_context = "view_layer" | |
+ bl_parent_id = "CYCLES_RENDER_PT_passes" | |
+ | |
+ def draw(self, context): | |
+ import _cycles | |
+ | |
+ layout = self.layout | |
+ layout.use_property_split = True | |
+ layout.use_property_decorate = False | |
+ | |
+ cycles_view_layer = context.view_layer.cycles | |
+ | |
+ row = layout.row(align=True) | |
+ row.use_property_split = False | |
+ row.prop(cycles_view_layer, "use_pass_crypto_object", text="Object", toggle=True) | |
+ row.prop(cycles_view_layer, "use_pass_crypto_material", text="Material", toggle=True) | |
+ row.prop(cycles_view_layer, "use_pass_crypto_asset", text="Asset", toggle=True) | |
+ | |
+ layout.prop(cycles_view_layer, "pass_crypto_depth", text="Levels") | |
+ | |
+ row = layout.row(align=True) | |
+ row.active = use_cpu(context) | |
+ row.prop(cycles_view_layer, "pass_crypto_accurate", text="Accurate Mode") | |
+ | |
+ | |
+class CYCLES_RENDER_PT_passes_debug(CyclesButtonsPanel, Panel): | |
+ bl_label = "Debug" | |
+ bl_context = "view_layer" | |
+ bl_parent_id = "CYCLES_RENDER_PT_passes" | |
+ | |
+ @classmethod | |
+ def poll(cls, context): | |
+ import _cycles | |
+ return _cycles.with_cycles_debug | |
+ | |
+ def draw(self, context): | |
+ layout = self.layout | |
+ layout.use_property_split = True | |
+ layout.use_property_decorate = False | |
+ | |
+ cycles_view_layer = context.view_layer.cycles | |
+ | |
+ layout.prop(cycles_view_layer, "pass_debug_bvh_traversed_nodes") | |
+ layout.prop(cycles_view_layer, "pass_debug_bvh_traversed_instances") | |
+ layout.prop(cycles_view_layer, "pass_debug_bvh_intersections") | |
+ layout.prop(cycles_view_layer, "pass_debug_ray_bounces") | |
+ | |
+ | |
+class CYCLES_RENDER_UL_aov(bpy.types.UIList): | |
+ def draw_item(self, context, layout, data, item, icon, active_data, active_propname): | |
+ row = layout.row() | |
+ split = row.split(factor=0.65) | |
+ icon = 'ERROR' if item.conflict else 'NONE' | |
+ split.row().prop(item, "name", text="", icon=icon, emboss=False) | |
+ split.row().prop(item, "type", text="", emboss=False) | |
+ | |
+ | |
+class CYCLES_RENDER_PT_passes_aov(CyclesButtonsPanel, Panel): | |
+ bl_label = "Shader AOV" | |
+ bl_context = "view_layer" | |
+ bl_parent_id = "CYCLES_RENDER_PT_passes" | |
+ | |
+ def draw(self, context): | |
+ layout = self.layout | |
+ layout.use_property_split = True | |
+ layout.use_property_decorate = False | |
+ | |
+ cycles_view_layer = context.view_layer.cycles | |
+ | |
+ row = layout.row() | |
+ col = row.column() | |
+ col.template_list("CYCLES_RENDER_UL_aov", "aovs", cycles_view_layer, "aovs", cycles_view_layer, "active_aov", rows=2) | |
+ | |
+ col = row.column() | |
+ sub = col.column(align=True) | |
+ sub.operator("cycles.add_aov", icon='ADD', text="") | |
+ sub.operator("cycles.remove_aov", icon='REMOVE', text="") | |
+ | |
+ if cycles_view_layer.active_aov < len(cycles_view_layer.aovs): | |
+ active_aov = cycles_view_layer.aovs[cycles_view_layer.active_aov] | |
+ if active_aov.conflict: | |
+ layout.label(text=active_aov.conflict, icon='ERROR') | |
+ | |
+ | |
+class CYCLES_RENDER_PT_denoising(CyclesButtonsPanel, Panel): | |
+ bl_label = "Denoising" | |
+ bl_context = "view_layer" | |
+ bl_options = {'DEFAULT_CLOSED'} | |
+ | |
+ def draw_header(self, context): | |
+ scene = context.scene | |
+ view_layer = context.view_layer | |
+ cycles_view_layer = view_layer.cycles | |
+ layout = self.layout | |
+ | |
+ layout.prop(cycles_view_layer, "use_denoising", text="") | |
+ | |
+ def draw(self, context): | |
+ layout = self.layout | |
+ layout.use_property_split = True | |
+ layout.use_property_decorate = False | |
+ | |
+ scene = context.scene | |
+ view_layer = context.view_layer | |
+ cycles_view_layer = view_layer.cycles | |
+ | |
+ split = layout.split() | |
+ split.active = cycles_view_layer.use_denoising | |
+ | |
+ col = split.column(align=True) | |
+ | |
+ if use_optix(context): | |
+ col.prop(cycles_view_layer, "use_optix_denoising", text="OptiX AI Denoising") | |
+ | |
+ if cycles_view_layer.use_optix_denoising: | |
+ col.prop(cycles_view_layer, "denoising_optix_input_passes") | |
+ return | |
+ | |
+ col.separator(factor=2.0) | |
+ | |
+ col.prop(cycles_view_layer, "denoising_radius", text="Radius") | |
+ col.prop(cycles_view_layer, "denoising_strength", slider=True, text="Strength") | |
+ col.prop(cycles_view_layer, "denoising_feature_strength", slider=True, text="Feature Strength") | |
+ col.prop(cycles_view_layer, "denoising_relative_pca") | |
+ | |
+ layout.separator() | |
+ | |
+ split = layout.split(factor=0.5) | |
+ split.active = cycles_view_layer.use_denoising or cycles_view_layer.denoising_store_passes | |
+ | |
+ col = split.column() | |
+ col.alignment = 'RIGHT' | |
+ col.label(text="Diffuse") | |
+ | |
+ row = split.row(align=True) | |
+ row.use_property_split = False | |
+ row.prop(cycles_view_layer, "denoising_diffuse_direct", text="Direct", toggle=True) | |
+ row.prop(cycles_view_layer, "denoising_diffuse_indirect", text="Indirect", toggle=True) | |
+ | |
+ split = layout.split(factor=0.5) | |
+ split.active = cycles_view_layer.use_denoising or cycles_view_layer.denoising_store_passes | |
+ | |
+ col = split.column() | |
+ col.alignment = 'RIGHT' | |
+ col.label(text="Glossy") | |
+ | |
+ row = split.row(align=True) | |
+ row.use_property_split = False | |
+ row.prop(cycles_view_layer, "denoising_glossy_direct", text="Direct", toggle=True) | |
+ row.prop(cycles_view_layer, "denoising_glossy_indirect", text="Indirect", toggle=True) | |
+ | |
+ split = layout.split(factor=0.5) | |
+ split.active = cycles_view_layer.use_denoising or cycles_view_layer.denoising_store_passes | |
+ | |
+ col = split.column() | |
+ col.alignment = 'RIGHT' | |
+ col.label(text="Transmission") | |
+ | |
+ row = split.row(align=True) | |
+ row.use_property_split = False | |
+ row.prop(cycles_view_layer, "denoising_transmission_direct", text="Direct", toggle=True) | |
+ row.prop(cycles_view_layer, "denoising_transmission_indirect", text="Indirect", toggle=True) | |
+ | |
+ split = layout.split(factor=0.5) | |
+ split.active = cycles_view_layer.use_denoising or cycles_view_layer.denoising_store_passes | |
+ | |
+ col = split.column() | |
+ col.alignment = 'RIGHT' | |
+ col.label(text="Subsurface") | |
+ | |
+ row = split.row(align=True) | |
+ row.use_property_split = False | |
+ row.prop(cycles_view_layer, "denoising_subsurface_direct", text="Direct", toggle=True) | |
+ row.prop(cycles_view_layer, "denoising_subsurface_indirect", text="Indirect", toggle=True) | |
+ | |
+ | |
+class CYCLES_PT_post_processing(CyclesButtonsPanel, Panel): | |
+ bl_label = "Post Processing" | |
+ bl_options = {'DEFAULT_CLOSED'} | |
+ bl_context = "output" | |
+ | |
+ def draw(self, context): | |
+ layout = self.layout | |
+ layout.use_property_split = True | |
+ layout.use_property_decorate = False | |
+ | |
+ rd = context.scene.render | |
+ | |
+ col = layout.column(align=True) | |
+ col.prop(rd, "use_compositing") | |
+ col.prop(rd, "use_sequencer") | |
+ | |
+ layout.prop(rd, "dither_intensity", text="Dither", slider=True) | |
+ | |
+ | |
+class CYCLES_CAMERA_PT_dof(CyclesButtonsPanel, Panel): | |
+ bl_label = "Depth of Field" | |
+ bl_context = "data" | |
+ | |
+ @classmethod | |
+ def poll(cls, context): | |
+ return context.camera and CyclesButtonsPanel.poll(context) | |
+ | |
+ def draw_header(self, context): | |
+ cam = context.camera | |
+ dof = cam.dof | |
+ self.layout.prop(dof, "use_dof", text="") | |
+ | |
+ def draw(self, context): | |
+ layout = self.layout | |
+ layout.use_property_split = True | |
+ | |
+ cam = context.camera | |
+ dof = cam.dof | |
+ layout.active = dof.use_dof | |
+ | |
+ split = layout.split() | |
+ | |
+ col = split.column() | |
+ col.prop(dof, "focus_object", text="Focus Object") | |
+ | |
+ sub = col.row() | |
+ sub.active = dof.focus_object is None | |
+ sub.prop(dof, "focus_distance", text="Distance") | |
+ | |
+ | |
+class CYCLES_CAMERA_PT_dof_aperture(CyclesButtonsPanel, Panel): | |
+ bl_label = "Aperture" | |
+ bl_parent_id = "CYCLES_CAMERA_PT_dof" | |
+ | |
+ @classmethod | |
+ def poll(cls, context): | |
+ return context.camera and CyclesButtonsPanel.poll(context) | |
+ | |
+ def draw(self, context): | |
+ layout = self.layout | |
+ layout.use_property_split = True | |
+ | |
+ cam = context.camera | |
+ dof = cam.dof | |
+ layout.active = dof.use_dof | |
+ flow = layout.grid_flow(row_major=True, columns=0, even_columns=True, even_rows=False, align=False) | |
+ | |
+ col = flow.column() | |
+ col.prop(dof, "aperture_fstop") | |
+ col.prop(dof, "aperture_blades") | |
+ col.prop(dof, "aperture_rotation") | |
+ col.prop(dof, "aperture_ratio") | |
+ | |
+ | |
+class CYCLES_PT_context_material(CyclesButtonsPanel, Panel): | |
+ bl_label = "" | |
+ bl_context = "material" | |
+ bl_options = {'HIDE_HEADER'} | |
+ | |
+ @classmethod | |
+ def poll(cls, context): | |
+ if context.active_object and context.active_object.type == 'GPENCIL': | |
+ return False | |
+ else: | |
+ return (context.material or context.object) and CyclesButtonsPanel.poll(context) | |
+ | |
+ def draw(self, context): | |
+ layout = self.layout | |
+ | |
+ mat = context.material | |
+ ob = context.object | |
+ slot = context.material_slot | |
+ space = context.space_data | |
+ | |
+ if ob: | |
+ is_sortable = len(ob.material_slots) > 1 | |
+ rows = 1 | |
+ if (is_sortable): | |
+ rows = 4 | |
+ | |
+ row = layout.row() | |
+ | |
+ row.template_list("MATERIAL_UL_matslots", "", ob, "material_slots", ob, "active_material_index", rows=rows) | |
+ | |
+ col = row.column(align=True) | |
+ col.operator("object.material_slot_add", icon='ADD', text="") | |
+ col.operator("object.material_slot_remove", icon='REMOVE', text="") | |
+ | |
+ col.menu("MATERIAL_MT_context_menu", icon='DOWNARROW_HLT', text="") | |
+ | |
+ if is_sortable: | |
+ col.separator() | |
+ | |
+ col.operator("object.material_slot_move", icon='TRIA_UP', text="").direction = 'UP' | |
+ col.operator("object.material_slot_move", icon='TRIA_DOWN', text="").direction = 'DOWN' | |
+ | |
+ if ob.mode == 'EDIT': | |
+ row = layout.row(align=True) | |
+ row.operator("object.material_slot_assign", text="Assign") | |
+ row.operator("object.material_slot_select", text="Select") | |
+ row.operator("object.material_slot_deselect", text="Deselect") | |
+ | |
+ split = layout.split(factor=0.65) | |
+ | |
+ if ob: | |
+ split.template_ID(ob, "active_material", new="material.new") | |
+ row = split.row() | |
+ | |
+ if slot: | |
+ row.prop(slot, "link", text="") | |
+ else: | |
+ row.label() | |
+ elif mat: | |
+ split.template_ID(space, "pin_id") | |
+ split.separator() | |
+ | |
+ | |
+class CYCLES_OBJECT_PT_motion_blur(CyclesButtonsPanel, Panel): | |
+ bl_label = "Motion Blur" | |
+ bl_context = "object" | |
+ bl_options = {'DEFAULT_CLOSED'} | |
+ | |
+ @classmethod | |
+ def poll(cls, context): | |
+ ob = context.object | |
+ if CyclesButtonsPanel.poll(context) and ob: | |
+ if ob.type in {'MESH', 'CURVE', 'CURVE', 'SURFACE', 'FONT', 'META', 'CAMERA'}: | |
+ return True | |
+ if ob.instance_type == 'COLLECTION' and ob.instance_collection: | |
+ return True | |
+ # TODO(sergey): More duplicator types here? | |
+ return False | |
+ | |
+ def draw_header(self, context): | |
+ layout = self.layout | |
+ | |
+ rd = context.scene.render | |
+ # scene = context.scene | |
+ | |
+ layout.active = rd.use_motion_blur | |
+ | |
+ ob = context.object | |
+ cob = ob.cycles | |
+ | |
+ layout.prop(cob, "use_motion_blur", text="") | |
+ | |
+ def draw(self, context): | |
+ layout = self.layout | |
+ | |
+ rd = context.scene.render | |
+ # scene = context.scene | |
+ | |
+ ob = context.object | |
+ cob = ob.cycles | |
+ | |
+ layout.active = (rd.use_motion_blur and cob.use_motion_blur) | |
+ | |
+ row = layout.row() | |
+ if ob.type != 'CAMERA': | |
+ row.prop(cob, "use_deform_motion", text="Deformation") | |
+ row.prop(cob, "motion_steps", text="Steps") | |
+ | |
+ | |
+def has_geometry_visibility(ob): | |
+ return ob and ((ob.type in {'MESH', 'CURVE', 'SURFACE', 'FONT', 'META', 'LIGHT'}) or | |
+ (ob.instance_type == 'COLLECTION' and ob.instance_collection)) | |
+ | |
+ | |
+class CYCLES_OBJECT_PT_visibility(CyclesButtonsPanel, Panel): | |
+ bl_label = "Visibility" | |
+ bl_context = "object" | |
+ bl_options = {'DEFAULT_CLOSED'} | |
+ | |
+ @classmethod | |
+ def poll(cls, context): | |
+ return CyclesButtonsPanel.poll(context) and (context.object) | |
+ | |
+ def draw(self, context): | |
+ layout = self.layout | |
+ layout.use_property_split = True | |
+ | |
+ flow = layout.grid_flow(row_major=False, columns=0, even_columns=True, even_rows=False, align=False) | |
+ layout = self.layout | |
+ ob = context.object | |
+ | |
+ col = flow.column() | |
+ col.prop(ob, "hide_viewport", text="Show in Viewports", invert_checkbox=True, toggle=False) | |
+ col = flow.column() | |
+ col.prop(ob, "hide_render", text="Show in Renders", invert_checkbox=True, toggle=False) | |
+ col = flow.column() | |
+ col.prop(ob, "hide_select", text="Selectable", invert_checkbox=True, toggle=False) | |
+ | |
+ if has_geometry_visibility(ob): | |
+ cob = ob.cycles | |
+ col = flow.column() | |
+ col.prop(cob, "is_shadow_catcher") | |
+ col = flow.column() | |
+ col.prop(cob, "is_holdout") | |
+ | |
+ | |
+class CYCLES_OBJECT_PT_visibility_ray_visibility(CyclesButtonsPanel, Panel): | |
+ bl_label = "Ray Visibility" | |
+ bl_parent_id = "CYCLES_OBJECT_PT_visibility" | |
+ bl_context = "object" | |
+ | |
+ @classmethod | |
+ def poll(cls, context): | |
+ ob = context.object | |
+ return CyclesButtonsPanel.poll(context) and has_geometry_visibility(ob) | |
+ | |
+ def draw(self, context): | |
+ layout = self.layout | |
+ layout.use_property_split = True | |
+ layout.use_property_decorate = False | |
+ | |
+ scene = context.scene | |
+ ob = context.object | |
+ cob = ob.cycles | |
+ visibility = ob.cycles_visibility | |
+ | |
+ flow = layout.grid_flow(row_major=True, columns=0, even_columns=True, even_rows=False, align=False) | |
+ | |
+ col = flow.column() | |
+ col.prop(visibility, "camera") | |
+ col = flow.column() | |
+ col.prop(visibility, "diffuse") | |
+ col = flow.column() | |
+ col.prop(visibility, "glossy") | |
+ col = flow.column() | |
+ col.prop(visibility, "transmission") | |
+ col = flow.column() | |
+ col.prop(visibility, "scatter") | |
+ | |
+ if ob.type != 'LIGHT': | |
+ col = flow.column() | |
+ col.prop(visibility, "shadow") | |
+ | |
+ layout.separator() | |
+ | |
+ | |
+class CYCLES_OBJECT_PT_visibility_culling(CyclesButtonsPanel, Panel): | |
+ bl_label = "Culling" | |
+ bl_parent_id = "CYCLES_OBJECT_PT_visibility" | |
+ bl_context = "object" | |
+ | |
+ @classmethod | |
+ def poll(cls, context): | |
+ ob = context.object | |
+ return CyclesButtonsPanel.poll(context) and has_geometry_visibility(ob) | |
+ | |
+ def draw(self, context): | |
+ layout = self.layout | |
+ layout.use_property_split = True | |
+ layout.use_property_decorate = False | |
+ | |
+ scene = context.scene | |
+ cscene = scene.cycles | |
+ ob = context.object | |
+ cob = ob.cycles | |
+ | |
+ flow = layout.grid_flow(row_major=True, columns=0, even_columns=True, even_rows=False, align=False) | |
+ | |
+ col = flow.column() | |
+ col.active = scene.render.use_simplify and cscene.use_camera_cull | |
+ col.prop(cob, "use_camera_cull") | |
+ | |
+ col = flow.column() | |
+ col.active = scene.render.use_simplify and cscene.use_distance_cull | |
+ col.prop(cob, "use_distance_cull") | |
+ | |
+ | |
+def panel_node_draw(layout, id_data, output_type, input_name): | |
+ if not id_data.use_nodes: | |
+ layout.operator("cycles.use_shading_nodes", icon='NODETREE') | |
+ return False | |
+ | |
+ ntree = id_data.node_tree | |
+ | |
+ node = ntree.get_output_node('CYCLES') | |
+ if node: | |
+ input = find_node_input(node, input_name) | |
+ if input: | |
+ layout.template_node_view(ntree, node, input) | |
+ else: | |
+ layout.label(text="Incompatible output node") | |
+ else: | |
+ layout.label(text="No output node") | |
+ | |
+ return True | |
+ | |
+ | |
+class CYCLES_LIGHT_PT_preview(CyclesButtonsPanel, Panel): | |
+ bl_label = "Preview" | |
+ bl_context = "data" | |
+ bl_options = {'DEFAULT_CLOSED'} | |
+ | |
+ @classmethod | |
+ def poll(cls, context): | |
+ return ( | |
+ context.light and | |
+ not ( | |
+ context.light.type == 'AREA' and | |
+ context.light.cycles.is_portal | |
+ ) and | |
+ CyclesButtonsPanel.poll(context) | |
+ ) | |
+ | |
+ def draw(self, context): | |
+ self.layout.template_preview(context.light) | |
+ | |
+ | |
+class CYCLES_LIGHT_PT_light(CyclesButtonsPanel, Panel): | |
+ bl_label = "Light" | |
+ bl_context = "data" | |
+ | |
+ @classmethod | |
+ def poll(cls, context): | |
+ return context.light and CyclesButtonsPanel.poll(context) | |
+ | |
+ def draw(self, context): | |
+ layout = self.layout | |
+ | |
+ light = context.light | |
+ clamp = light.cycles | |
+ | |
+ layout.use_property_decorate = False | |
+ | |
+ if self.bl_space_type == 'PROPERTIES': | |
+ layout.row().prop(light, "type", expand=True) | |
+ layout.use_property_split = True | |
+ else: | |
+ layout.use_property_split = True | |
+ layout.row().prop(light, "type") | |
+ | |
+ col = layout.column() | |
+ | |
+ col.prop(light, "color") | |
+ col.prop(light, "energy") | |
+ col.separator() | |
+ | |
+ if light.type in {'POINT', 'SPOT'}: | |
+ col.prop(light, "shadow_soft_size", text="Size") | |
+ elif light.type == 'SUN': | |
+ col.prop(light, "angle") | |
+ elif light.type == 'AREA': | |
+ col.prop(light, "shape", text="Shape") | |
+ sub = col.column(align=True) | |
+ | |
+ if light.shape in {'SQUARE', 'DISK'}: | |
+ sub.prop(light, "size") | |
+ elif light.shape in {'RECTANGLE', 'ELLIPSE'}: | |
+ sub.prop(light, "size", text="Size X") | |
+ sub.prop(light, "size_y", text="Y") | |
+ | |
+ if not (light.type == 'AREA' and clamp.is_portal): | |
+ sub = col.column() | |
+ if use_branched_path(context): | |
+ subsub = sub.row(align=True) | |
+ subsub.active = use_sample_all_lights(context) | |
+ subsub.prop(clamp, "samples") | |
+ sub.prop(clamp, "max_bounces") | |
+ | |
+ sub = col.column(align=True) | |
+ sub.active = not (light.type == 'AREA' and clamp.is_portal) | |
+ sub.prop(clamp, "cast_shadow") | |
+ sub.prop(clamp, "use_multiple_importance_sampling", text="Multiple Importance") | |
+ | |
+ if light.type == 'AREA': | |
+ col.prop(clamp, "is_portal", text="Portal") | |
+ | |
+ | |
+class CYCLES_LIGHT_PT_nodes(CyclesButtonsPanel, Panel): | |
+ bl_label = "Nodes" | |
+ bl_context = "data" | |
+ | |
+ @classmethod | |
+ def poll(cls, context): | |
+ return context.light and not (context.light.type == 'AREA' and | |
+ context.light.cycles.is_portal) and \ | |
+ CyclesButtonsPanel.poll(context) | |
+ | |
+ def draw(self, context): | |
+ layout = self.layout | |
+ | |
+ light = context.light | |
+ panel_node_draw(layout, light, 'OUTPUT_LIGHT', 'Surface') | |
+ | |
+ | |
+class CYCLES_LIGHT_PT_spot(CyclesButtonsPanel, Panel): | |
+ bl_label = "Spot Shape" | |
+ bl_context = "data" | |
+ | |
+ @classmethod | |
+ def poll(cls, context): | |
+ light = context.light | |
+ return (light and light.type == 'SPOT') and CyclesButtonsPanel.poll(context) | |
+ | |
+ def draw(self, context): | |
+ layout = self.layout | |
+ light = context.light | |
+ layout.use_property_split = True | |
+ layout.use_property_decorate = False | |
+ | |
+ col = layout.column() | |
+ col.prop(light, "spot_size", text="Size") | |
+ col.prop(light, "spot_blend", text="Blend", slider=True) | |
+ col.prop(light, "show_cone") | |
+ | |
+ | |
+class CYCLES_WORLD_PT_preview(CyclesButtonsPanel, Panel): | |
+ bl_label = "Preview" | |
+ bl_context = "world" | |
+ bl_options = {'DEFAULT_CLOSED'} | |
+ | |
+ @classmethod | |
+ def poll(cls, context): | |
+ return context.world and CyclesButtonsPanel.poll(context) | |
+ | |
+ def draw(self, context): | |
+ self.layout.template_preview(context.world) | |
+ | |
+ | |
+class CYCLES_WORLD_PT_surface(CyclesButtonsPanel, Panel): | |
+ bl_label = "Surface" | |
+ bl_context = "world" | |
+ | |
+ @classmethod | |
+ def poll(cls, context): | |
+ return context.world and CyclesButtonsPanel.poll(context) | |
+ | |
+ def draw(self, context): | |
+ layout = self.layout | |
+ | |
+ world = context.world | |
+ | |
+ if not panel_node_draw(layout, world, 'OUTPUT_WORLD', 'Surface'): | |
+ layout.prop(world, "color") | |
+ | |
+ | |
+class CYCLES_WORLD_PT_volume(CyclesButtonsPanel, Panel): | |
+ bl_label = "Volume" | |
+ bl_context = "world" | |
+ bl_options = {'DEFAULT_CLOSED'} | |
+ | |
+ @classmethod | |
+ def poll(cls, context): | |
+ world = context.world | |
+ return world and world.node_tree and CyclesButtonsPanel.poll(context) | |
+ | |
+ def draw(self, context): | |
+ layout = self.layout | |
+ | |
+ world = context.world | |
+ panel_node_draw(layout, world, 'OUTPUT_WORLD', 'Volume') | |
+ | |
+ | |
+class CYCLES_WORLD_PT_ambient_occlusion(CyclesButtonsPanel, Panel): | |
+ bl_label = "Ambient Occlusion" | |
+ bl_context = "world" | |
+ bl_options = {'DEFAULT_CLOSED'} | |
+ | |
+ @classmethod | |
+ def poll(cls, context): | |
+ return context.world and CyclesButtonsPanel.poll(context) | |
+ | |
+ def draw_header(self, context): | |
+ light = context.world.light_settings | |
+ self.layout.prop(light, "use_ambient_occlusion", text="") | |
+ | |
+ def draw(self, context): | |
+ layout = self.layout | |
+ layout.use_property_split = True | |
+ layout.use_property_decorate = False | |
+ | |
+ light = context.world.light_settings | |
+ scene = context.scene | |
+ | |
+ col = layout.column() | |
+ sub = col.column() | |
+ sub.active = light.use_ambient_occlusion or scene.render.use_simplify | |
+ sub.prop(light, "ao_factor", text="Factor") | |
+ col.prop(light, "distance", text="Distance") | |
+ | |
+ | |
+class CYCLES_WORLD_PT_mist(CyclesButtonsPanel, Panel): | |
+ bl_label = "Mist Pass" | |
+ bl_context = "world" | |
+ bl_options = {'DEFAULT_CLOSED'} | |
+ | |
+ @classmethod | |
+ def poll(cls, context): | |
+ if CyclesButtonsPanel.poll(context): | |
+ if context.world: | |
+ for view_layer in context.scene.view_layers: | |
+ if view_layer.use_pass_mist: | |
+ return True | |
+ | |
+ return False | |
+ | |
+ def draw(self, context): | |
+ layout = self.layout | |
+ | |
+ world = context.world | |
+ | |
+ split = layout.split(align=True) | |
+ split.prop(world.mist_settings, "start") | |
+ split.prop(world.mist_settings, "depth") | |
+ | |
+ layout.prop(world.mist_settings, "falloff") | |
+ | |
+ | |
+class CYCLES_WORLD_PT_ray_visibility(CyclesButtonsPanel, Panel): | |
+ bl_label = "Ray Visibility" | |
+ bl_context = "world" | |
+ bl_options = {'DEFAULT_CLOSED'} | |
+ | |
+ @classmethod | |
+ def poll(cls, context): | |
+ return CyclesButtonsPanel.poll(context) and context.world | |
+ | |
+ def draw(self, context): | |
+ layout = self.layout | |
+ | |
+ world = context.world | |
+ visibility = world.cycles_visibility | |
+ | |
+ flow = layout.column_flow() | |
+ | |
+ flow.prop(visibility, "camera") | |
+ flow.prop(visibility, "diffuse") | |
+ flow.prop(visibility, "glossy") | |
+ flow.prop(visibility, "transmission") | |
+ flow.prop(visibility, "scatter") | |
+ | |
+ | |
+class CYCLES_WORLD_PT_settings(CyclesButtonsPanel, Panel): | |
+ bl_label = "Settings" | |
+ bl_context = "world" | |
+ bl_options = {'DEFAULT_CLOSED'} | |
+ | |
+ @classmethod | |
+ def poll(cls, context): | |
+ return context.world and CyclesButtonsPanel.poll(context) | |
+ | |
+ def draw(self, context): | |
+ layout = self.layout | |
+ layout.use_property_split = True | |
+ layout.use_property_decorate = False | |
+ | |
+ layout.column() | |
+ | |
+ | |
+class CYCLES_WORLD_PT_settings_surface(CyclesButtonsPanel, Panel): | |
+ bl_label = "Surface" | |
+ bl_parent_id = "CYCLES_WORLD_PT_settings" | |
+ bl_context = "world" | |
+ | |
+ @classmethod | |
+ def poll(cls, context): | |
+ return context.world and CyclesButtonsPanel.poll(context) | |
+ | |
+ def draw(self, context): | |
+ layout = self.layout | |
+ layout.use_property_split = True | |
+ layout.use_property_decorate = False | |
+ | |
+ world = context.world | |
+ cworld = world.cycles | |
+ | |
+ col = layout.column() | |
+ col.prop(cworld, "sampling_method", text="Sampling") | |
+ | |
+ sub = col.column() | |
+ sub.active = cworld.sampling_method != 'NONE' | |
+ subsub = sub.row(align=True) | |
+ subsub.active = cworld.sampling_method == 'MANUAL' | |
+ subsub.prop(cworld, "sample_map_resolution") | |
+ if use_branched_path(context): | |
+ subsub = sub.column(align=True) | |
+ subsub.active = use_sample_all_lights(context) | |
+ subsub.prop(cworld, "samples") | |
+ sub.prop(cworld, "max_bounces") | |
+ | |
+ | |
+class CYCLES_WORLD_PT_settings_volume(CyclesButtonsPanel, Panel): | |
+ bl_label = "Volume" | |
+ bl_parent_id = "CYCLES_WORLD_PT_settings" | |
+ bl_context = "world" | |
+ | |
+ @classmethod | |
+ def poll(cls, context): | |
+ return context.world and CyclesButtonsPanel.poll(context) | |
+ | |
+ def draw(self, context): | |
+ layout = self.layout | |
+ layout.use_property_split = True | |
+ layout.use_property_decorate = False | |
+ | |
+ world = context.world | |
+ cworld = world.cycles | |
+ | |
+ col = layout.column() | |
+ | |
+ sub = col.column() | |
+ sub.active = use_cpu(context) | |
+ sub.prop(cworld, "volume_sampling", text="Sampling") | |
+ col.prop(cworld, "volume_interpolation", text="Interpolation") | |
+ col.prop(cworld, "homogeneous_volume", text="Homogeneous") | |
+ | |
+ | |
+class CYCLES_MATERIAL_PT_preview(CyclesButtonsPanel, Panel): | |
+ bl_label = "Preview" | |
+ bl_context = "material" | |
+ bl_options = {'DEFAULT_CLOSED'} | |
+ | |
+ @classmethod | |
+ def poll(cls, context): | |
+ mat = context.material | |
+ return mat and (not mat.grease_pencil) and CyclesButtonsPanel.poll(context) | |
+ | |
+ def draw(self, context): | |
+ self.layout.template_preview(context.material) | |
+ | |
+ | |
+class CYCLES_MATERIAL_PT_surface(CyclesButtonsPanel, Panel): | |
+ bl_label = "Surface" | |
+ bl_context = "material" | |
+ | |
+ @classmethod | |
+ def poll(cls, context): | |
+ mat = context.material | |
+ return mat and (not mat.grease_pencil) and CyclesButtonsPanel.poll(context) | |
+ | |
+ def draw(self, context): | |
+ layout = self.layout | |
+ | |
+ mat = context.material | |
+ if not panel_node_draw(layout, mat, 'OUTPUT_MATERIAL', 'Surface'): | |
+ layout.prop(mat, "diffuse_color") | |
+ | |
+ | |
+class CYCLES_MATERIAL_PT_volume(CyclesButtonsPanel, Panel): | |
+ bl_label = "Volume" | |
+ bl_context = "material" | |
+ bl_options = {'DEFAULT_CLOSED'} | |
+ | |
+ @classmethod | |
+ def poll(cls, context): | |
+ mat = context.material | |
+ return mat and (not mat.grease_pencil) and mat.node_tree and CyclesButtonsPanel.poll(context) | |
+ | |
+ def draw(self, context): | |
+ layout = self.layout | |
+ | |
+ mat = context.material | |
+ # cmat = mat.cycles | |
+ | |
+ panel_node_draw(layout, mat, 'OUTPUT_MATERIAL', 'Volume') | |
+ | |
+ | |
+class CYCLES_MATERIAL_PT_displacement(CyclesButtonsPanel, Panel): | |
+ bl_label = "Displacement" | |
+ bl_context = "material" | |
+ | |
+ @classmethod | |
+ def poll(cls, context): | |
+ mat = context.material | |
+ return mat and (not mat.grease_pencil) and mat.node_tree and CyclesButtonsPanel.poll(context) | |
+ | |
+ def draw(self, context): | |
+ layout = self.layout | |
+ | |
+ mat = context.material | |
+ panel_node_draw(layout, mat, 'OUTPUT_MATERIAL', 'Displacement') | |
+ | |
+ | |
+class CYCLES_MATERIAL_PT_settings(CyclesButtonsPanel, Panel): | |
+ bl_label = "Settings" | |
+ bl_context = "material" | |
+ bl_options = {'DEFAULT_CLOSED'} | |
+ | |
+ @classmethod | |
+ def poll(cls, context): | |
+ mat = context.material | |
+ return mat and (not mat.grease_pencil) and CyclesButtonsPanel.poll(context) | |
+ | |
+ @staticmethod | |
+ def draw_shared(self, mat): | |
+ layout = self.layout | |
+ layout.use_property_split = True | |
+ layout.use_property_decorate = False | |
+ | |
+ layout.prop(mat, "pass_index") | |
+ | |
+ def draw(self, context): | |
+ self.draw_shared(self, context.material) | |
+ | |
+ | |
+class CYCLES_MATERIAL_PT_settings_surface(CyclesButtonsPanel, Panel): | |
+ bl_label = "Surface" | |
+ bl_parent_id = "CYCLES_MATERIAL_PT_settings" | |
+ bl_context = "material" | |
+ | |
+ @staticmethod | |
+ def draw_shared(self, mat): | |
+ layout = self.layout | |
+ layout.use_property_split = True | |
+ layout.use_property_decorate = False | |
+ | |
+ cmat = mat.cycles | |
+ | |
+ col = layout.column() | |
+ col.prop(cmat, "sample_as_light", text="Multiple Importance") | |
+ col.prop(cmat, "use_transparent_shadow") | |
+ col.prop(cmat, "displacement_method", text="Displacement") | |
+ | |
+ def draw(self, context): | |
+ self.draw_shared(self, context.material) | |
+ | |
+ | |
+class CYCLES_MATERIAL_PT_settings_volume(CyclesButtonsPanel, Panel): | |
+ bl_label = "Volume" | |
+ bl_parent_id = "CYCLES_MATERIAL_PT_settings" | |
+ bl_context = "material" | |
+ | |
+ @staticmethod | |
+ def draw_shared(self, context, mat): | |
+ layout = self.layout | |
+ layout.use_property_split = True | |
+ layout.use_property_decorate = False | |
+ | |
+ cmat = mat.cycles | |
+ | |
+ col = layout.column() | |
+ sub = col.column() | |
+ sub.active = use_cpu(context) | |
+ sub.prop(cmat, "volume_sampling", text="Sampling") | |
+ col.prop(cmat, "volume_interpolation", text="Interpolation") | |
+ col.prop(cmat, "homogeneous_volume", text="Homogeneous") | |
+ | |
+ def draw(self, context): | |
+ self.draw_shared(self, context, context.material) | |
+ | |
+ | |
+class CYCLES_RENDER_PT_bake(CyclesButtonsPanel, Panel): | |
+ bl_label = "Bake" | |
+ bl_context = "render" | |
+ bl_options = {'DEFAULT_CLOSED'} | |
+ COMPAT_ENGINES = {'CYCLES'} | |
+ | |
+ @classmethod | |
+ def poll(cls, context): | |
+ return CyclesButtonsPanel.poll(context) and not use_optix(context) | |
+ | |
+ def draw(self, context): | |
+ layout = self.layout | |
+ layout.use_property_split = True | |
+ layout.use_property_decorate = False # No animation. | |
+ | |
+ scene = context.scene | |
+ cscene = scene.cycles | |
+ cbk = scene.render.bake | |
+ rd = scene.render | |
+ | |
+ if rd.use_bake_multires: | |
+ layout.operator("object.bake_image", icon='RENDER_STILL') | |
+ layout.prop(rd, "use_bake_multires") | |
+ layout.prop(rd, "bake_type") | |
+ | |
+ else: | |
+ layout.operator("object.bake", icon='RENDER_STILL').type = cscene.bake_type | |
+ layout.prop(rd, "use_bake_multires") | |
+ layout.prop(cscene, "bake_type") | |
+ | |
+ | |
+class CYCLES_RENDER_PT_bake_influence(CyclesButtonsPanel, Panel): | |
+ bl_label = "Influence" | |
+ bl_context = "render" | |
+ bl_parent_id = "CYCLES_RENDER_PT_bake" | |
+ COMPAT_ENGINES = {'CYCLES'} | |
+ @classmethod | |
+ def poll(cls, context): | |
+ scene = context.scene | |
+ cscene = scene.cycles | |
+ rd = scene.render | |
+ if rd.use_bake_multires == False and cscene.bake_type in { | |
+ 'NORMAL', 'COMBINED', 'DIFFUSE', 'GLOSSY', 'TRANSMISSION', 'SUBSURFACE'}: | |
+ return True | |
+ | |
+ def draw(self, context): | |
+ layout = self.layout | |
+ layout.use_property_split = True | |
+ layout.use_property_decorate = False # No animation. | |
+ | |
+ scene = context.scene | |
+ cscene = scene.cycles | |
+ cbk = scene.render.bake | |
+ rd = scene.render | |
+ | |
+ col = layout.column() | |
+ | |
+ if cscene.bake_type == 'NORMAL': | |
+ col.prop(cbk, "normal_space", text="Space") | |
+ | |
+ sub = col.column(align=True) | |
+ sub.prop(cbk, "normal_r", text="Swizzle R") | |
+ sub.prop(cbk, "normal_g", text="G") | |
+ sub.prop(cbk, "normal_b", text="B") | |
+ | |
+ elif cscene.bake_type == 'COMBINED': | |
+ row = col.row(align=True) | |
+ row.use_property_split = False | |
+ row.prop(cbk, "use_pass_direct", toggle=True) | |
+ row.prop(cbk, "use_pass_indirect", toggle=True) | |
+ | |
+ flow = col.grid_flow(row_major=False, columns=0, even_columns=False, even_rows=False, align=True) | |
+ | |
+ flow.active = cbk.use_pass_direct or cbk.use_pass_indirect | |
+ flow.prop(cbk, "use_pass_diffuse") | |
+ flow.prop(cbk, "use_pass_glossy") | |
+ flow.prop(cbk, "use_pass_transmission") | |
+ flow.prop(cbk, "use_pass_subsurface") | |
+ flow.prop(cbk, "use_pass_ambient_occlusion") | |
+ flow.prop(cbk, "use_pass_emit") | |
+ | |
+ elif cscene.bake_type in {'DIFFUSE', 'GLOSSY', 'TRANSMISSION', 'SUBSURFACE'}: | |
+ row = col.row(align=True) | |
+ row.use_property_split = False | |
+ row.prop(cbk, "use_pass_direct", toggle=True) | |
+ row.prop(cbk, "use_pass_indirect", toggle=True) | |
+ row.prop(cbk, "use_pass_color", toggle=True) | |
+ | |
+ | |
+class CYCLES_RENDER_PT_bake_selected_to_active(CyclesButtonsPanel, Panel): | |
+ bl_label = "Selected to Active" | |
+ bl_context = "render" | |
+ bl_parent_id = "CYCLES_RENDER_PT_bake" | |
+ bl_options = {'DEFAULT_CLOSED'} | |
+ COMPAT_ENGINES = {'CYCLES'} | |
+ | |
+ @classmethod | |
+ def poll(cls, context): | |
+ scene = context.scene | |
+ rd = scene.render | |
+ return rd.use_bake_multires == False | |
+ | |
+ def draw_header(self, context): | |
+ scene = context.scene | |
+ cbk = scene.render.bake | |
+ self.layout.prop(cbk, "use_selected_to_active", text="") | |
+ | |
+ def draw(self, context): | |
+ layout = self.layout | |
+ layout.use_property_split = True | |
+ layout.use_property_decorate = False # No animation. | |
+ | |
+ scene = context.scene | |
+ cscene = scene.cycles | |
+ cbk = scene.render.bake | |
+ rd = scene.render | |
+ | |
+ layout.active = cbk.use_selected_to_active | |
+ col = layout.column() | |
+ | |
+ col.prop(cbk, "use_cage", text="Cage") | |
+ if cbk.use_cage: | |
+ col.prop(cbk, "cage_extrusion", text="Extrusion") | |
+ col.prop(cbk, "cage_object", text="Cage Object") | |
+ else: | |
+ col.prop(cbk, "cage_extrusion", text="Ray Distance") | |
+ | |
+ | |
+class CYCLES_RENDER_PT_bake_output(CyclesButtonsPanel, Panel): | |
+ bl_label = "Output" | |
+ bl_context = "render" | |
+ bl_parent_id = "CYCLES_RENDER_PT_bake" | |
+ COMPAT_ENGINES = {'CYCLES'} | |
+ | |
+ def draw(self, context): | |
+ layout = self.layout | |
+ layout.use_property_split = True | |
+ layout.use_property_decorate = False # No animation. | |
+ | |
+ scene = context.scene | |
+ cscene = scene.cycles | |
+ cbk = scene.render.bake | |
+ rd = scene.render | |
+ | |
+ if rd.use_bake_multires: | |
+ layout.prop(rd, "bake_margin") | |
+ layout.prop(rd, "use_bake_clear", text="Clear Image") | |
+ | |
+ if rd.bake_type == 'DISPLACEMENT': | |
+ layout.prop(rd, "use_bake_lores_mesh") | |
+ else: | |
+ | |
+ layout.prop(cbk, "margin") | |
+ layout.prop(cbk, "use_clear", text="Clear Image") | |
+ | |
+ | |
+class CYCLES_RENDER_PT_debug(CyclesButtonsPanel, Panel): | |
+ bl_label = "Debug" | |
+ bl_context = "render" | |
+ bl_options = {'DEFAULT_CLOSED'} | |
+ COMPAT_ENGINES = {'CYCLES'} | |
+ | |
+ @classmethod | |
+ def poll(cls, context): | |
+ return CyclesButtonsPanel.poll(context) and bpy.app.debug_value == 256 | |
+ | |
+ def draw(self, context): | |
+ layout = self.layout | |
+ | |
+ scene = context.scene | |
+ cscene = scene.cycles | |
+ | |
+ col = layout.column() | |
+ | |
+ col.label(text="CPU Flags:") | |
+ row = col.row(align=True) | |
+ row.prop(cscene, "debug_use_cpu_sse2", toggle=True) | |
+ row.prop(cscene, "debug_use_cpu_sse3", toggle=True) | |
+ row.prop(cscene, "debug_use_cpu_sse41", toggle=True) | |
+ row.prop(cscene, "debug_use_cpu_avx", toggle=True) | |
+ row.prop(cscene, "debug_use_cpu_avx2", toggle=True) | |
+ col.prop(cscene, "debug_bvh_layout") | |
+ col.prop(cscene, "debug_use_cpu_split_kernel") | |
+ | |
+ col.separator() | |
+ | |
+ col = layout.column() | |
+ col.label(text="CUDA Flags:") | |
+ col.prop(cscene, "debug_use_cuda_adaptive_compile") | |
+ col.prop(cscene, "debug_use_cuda_split_kernel") | |
+ | |
+ col.separator() | |
+ | |
+ col = layout.column() | |
+ col.label(text="OptiX Flags:") | |
+ col.prop(cscene, "debug_optix_cuda_streams") | |
+ | |
+ col.separator() | |
+ | |
+ col = layout.column() | |
+ col.label(text="OpenCL Flags:") | |
+ col.prop(cscene, "debug_opencl_device_type", text="Device") | |
+ col.prop(cscene, "debug_use_opencl_debug", text="Debug") | |
+ col.prop(cscene, "debug_opencl_mem_limit") | |
+ | |
+ col.separator() | |
+ | |
+ col = layout.column() | |
+ col.prop(cscene, "debug_bvh_type") | |
+ | |
+ | |
+class CYCLES_RENDER_PT_simplify(CyclesButtonsPanel, Panel): | |
+ bl_label = "Simplify" | |
+ bl_context = "render" | |
+ bl_options = {'DEFAULT_CLOSED'} | |
+ COMPAT_ENGINES = {'CYCLES'} | |
+ | |
+ def draw_header(self, context): | |
+ rd = context.scene.render | |
+ self.layout.prop(rd, "use_simplify", text="") | |
+ | |
+ def draw(self, context): | |
+ pass | |
+ | |
+ | |
+class CYCLES_RENDER_PT_simplify_viewport(CyclesButtonsPanel, Panel): | |
+ bl_label = "Viewport" | |
+ bl_context = "render" | |
+ bl_parent_id = "CYCLES_RENDER_PT_simplify" | |
+ COMPAT_ENGINES = {'CYCLES'} | |
+ | |
+ def draw(self, context): | |
+ layout = self.layout | |
+ layout.use_property_split = True | |
+ layout.use_property_decorate = False | |
+ | |
+ scene = context.scene | |
+ rd = scene.render | |
+ cscene = scene.cycles | |
+ | |
+ layout.active = rd.use_simplify | |
+ | |
+ col = layout.column() | |
+ col.prop(rd, "simplify_subdivision", text="Max Subdivision") | |
+ col.prop(rd, "simplify_child_particles", text="Child Particles") | |
+ col.prop(cscene, "texture_limit", text="Texture Limit") | |
+ col.prop(cscene, "ao_bounces", text="AO Bounces") | |
+ col.prop(rd, "use_simplify_smoke_highres") | |
+ | |
+class CYCLES_RENDER_PT_simplify_render(CyclesButtonsPanel, Panel): | |
+ bl_label = "Render" | |
+ bl_context = "render" | |
+ bl_parent_id = "CYCLES_RENDER_PT_simplify" | |
+ COMPAT_ENGINES = {'CYCLES'} | |
+ | |
+ def draw(self, context): | |
+ layout = self.layout | |
+ layout.use_property_split = True | |
+ layout.use_property_decorate = False | |
+ | |
+ scene = context.scene | |
+ rd = scene.render | |
+ cscene = scene.cycles | |
+ | |
+ layout.active = rd.use_simplify | |
+ | |
+ col = layout.column() | |
+ | |
+ col.prop(rd, "simplify_subdivision_render", text="Max Subdivision") | |
+ col.prop(rd, "simplify_child_particles_render", text="Child Particles") | |
+ col.prop(cscene, "texture_limit_render", text="Texture Limit") | |
+ col.prop(cscene, "ao_bounces_render", text="AO Bounces") | |
+ | |
+ | |
+class CYCLES_RENDER_PT_simplify_culling(CyclesButtonsPanel, Panel): | |
+ bl_label = "Culling" | |
+ bl_context = "render" | |
+ bl_parent_id = "CYCLES_RENDER_PT_simplify" | |
+ bl_options = {'DEFAULT_CLOSED'} | |
+ COMPAT_ENGINES = {'CYCLES'} | |
+ | |
+ def draw(self, context): | |
+ layout = self.layout | |
+ layout.use_property_split = True | |
+ layout.use_property_decorate = False | |
+ | |
+ scene = context.scene | |
+ rd = scene.render | |
+ cscene = scene.cycles | |
+ | |
+ layout.active = rd.use_simplify | |
+ | |
+ col = layout.column() | |
+ col.prop(cscene, "use_camera_cull") | |
+ sub = col.column() | |
+ sub.active = cscene.use_camera_cull | |
+ sub.prop(cscene, "camera_cull_margin") | |
+ | |
+ col = layout.column() | |
+ col.prop(cscene, "use_distance_cull") | |
+ sub = col.column() | |
+ sub.active = cscene.use_distance_cull | |
+ sub.prop(cscene, "distance_cull_margin", text="Distance") | |
+ | |
+ | |
+class CYCLES_VIEW3D_PT_shading_render_pass(Panel): | |
+ bl_space_type = 'VIEW_3D' | |
+ bl_region_type = 'HEADER' | |
+ bl_label = "Render Pass" | |
+ bl_parent_id = 'VIEW3D_PT_shading' | |
+ COMPAT_ENGINES = {'CYCLES'} | |
+ | |
+ @classmethod | |
+ def poll(cls, context): | |
+ return (context.engine in cls.COMPAT_ENGINES | |
+ and context.space_data.shading.type == 'RENDERED') | |
+ | |
+ def draw(self, context): | |
+ shading = context.space_data.shading | |
+ | |
+ layout = self.layout | |
+ layout.prop(shading.cycles, "render_pass", text="") | |
+ | |
+ | |
+class CYCLES_VIEW3D_PT_shading_lighting(Panel): | |
+ bl_space_type = 'VIEW_3D' | |
+ bl_region_type = 'HEADER' | |
+ bl_label = "Lighting" | |
+ bl_parent_id = 'VIEW3D_PT_shading' | |
+ COMPAT_ENGINES = {'CYCLES'} | |
+ | |
+ @classmethod | |
+ def poll(cls, context): | |
+ return (context.engine in cls.COMPAT_ENGINES | |
+ and context.space_data.shading.type == 'RENDERED') | |
+ | |
+ def draw(self, context): | |
+ layout = self.layout | |
+ col = layout.column() | |
+ split = col.split(factor=0.9) | |
+ | |
+ shading = context.space_data.shading | |
+ col.prop(shading, "use_scene_lights_render") | |
+ col.prop(shading, "use_scene_world_render") | |
+ | |
+ if not shading.use_scene_world_render: | |
+ col = layout.column() | |
+ split = col.split(factor=0.9) | |
+ | |
+ col = split.column() | |
+ sub = col.row() | |
+ sub.scale_y = 0.6 | |
+ sub.template_icon_view(shading, "studio_light", scale_popup=3) | |
+ | |
+ col = split.column() | |
+ col.operator("preferences.studiolight_show", emboss=False, text="", icon='PREFERENCES') | |
+ | |
+ split = layout.split(factor=0.9) | |
+ col = split.column() | |
+ col.prop(shading, "studiolight_rotate_z", text="Rotation") | |
+ col.prop(shading, "studiolight_intensity") | |
+ col.prop(shading, "studiolight_background_alpha") | |
+ | |
+class CYCLES_VIEW3D_PT_simplify_greasepencil(CyclesButtonsPanel, Panel, GreasePencilSimplifyPanel): | |
+ bl_label = "Grease Pencil" | |
+ bl_parent_id = "CYCLES_RENDER_PT_simplify" | |
+ COMPAT_ENGINES = {'CYCLES'} | |
+ bl_options = {'DEFAULT_CLOSED'} | |
+ | |
+def draw_device(self, context): | |
+ scene = context.scene | |
+ layout = self.layout | |
+ layout.use_property_split = True | |
+ layout.use_property_decorate = False | |
+ | |
+ if context.engine == 'CYCLES': | |
+ from . import engine | |
+ cscene = scene.cycles | |
+ | |
+ col = layout.column() | |
+ col.prop(cscene, "feature_set") | |
+ | |
+ scene = context.scene | |
+ | |
+ col = layout.column() | |
+ col.active = show_device_active(context) | |
+ col.prop(cscene, "device") | |
+ | |
+ from . import engine | |
+ if engine.with_osl() and use_cpu(context): | |
+ col.prop(cscene, "shading_system") | |
+ | |
+ | |
+def draw_pause(self, context): | |
+ layout = self.layout | |
+ scene = context.scene | |
+ | |
+ if context.engine == "CYCLES": | |
+ view = context.space_data | |
+ | |
+ if view.shading.type == 'RENDERED': | |
+ cscene = scene.cycles | |
+ layout.prop(cscene, "preview_pause", icon='PLAY' if cscene.preview_pause else 'PAUSE', text="") | |
+ | |
+ | |
+def get_panels(): | |
+ exclude_panels = { | |
+ 'DATA_PT_area', | |
+ 'DATA_PT_camera_dof', | |
+ 'DATA_PT_falloff_curve', | |
+ 'DATA_PT_light', | |
+ 'DATA_PT_preview', | |
+ 'DATA_PT_spot', | |
+ 'MATERIAL_PT_context_material', | |
+ 'MATERIAL_PT_preview', | |
+ 'NODE_DATA_PT_light', | |
+ 'NODE_DATA_PT_spot', | |
+ 'OBJECT_PT_visibility', | |
+ 'VIEWLAYER_PT_filter', | |
+ 'VIEWLAYER_PT_layer_passes', | |
+ 'RENDER_PT_post_processing', | |
+ 'RENDER_PT_simplify', | |
+ } | |
+ | |
+ panels = [] | |
+ for panel in bpy.types.Panel.__subclasses__(): | |
+ if hasattr(panel, 'COMPAT_ENGINES') and 'BLENDER_RENDER' in panel.COMPAT_ENGINES: | |
+ if panel.__name__ not in exclude_panels: | |
+ panels.append(panel) | |
+ | |
+ return panels | |
+ | |
+ | |
+classes = ( | |
+ CYCLES_PT_sampling_presets, | |
+ CYCLES_PT_integrator_presets, | |
+ CYCLES_RENDER_PT_sampling, | |
+ CYCLES_RENDER_PT_sampling_sub_samples, | |
+ CYCLES_RENDER_PT_sampling_advanced, | |
+ CYCLES_RENDER_PT_light_paths, | |
+ CYCLES_RENDER_PT_light_paths_max_bounces, | |
+ CYCLES_RENDER_PT_light_paths_clamping, | |
+ CYCLES_RENDER_PT_light_paths_caustics, | |
+ CYCLES_RENDER_PT_volumes, | |
+ CYCLES_RENDER_PT_subdivision, | |
+ CYCLES_RENDER_PT_hair, | |
+ CYCLES_RENDER_PT_simplify, | |
+ CYCLES_RENDER_PT_simplify_viewport, | |
+ CYCLES_RENDER_PT_simplify_render, | |
+ CYCLES_RENDER_PT_simplify_culling, | |
+ CYCLES_VIEW3D_PT_simplify_greasepencil, | |
+ CYCLES_VIEW3D_PT_shading_lighting, | |
+ CYCLES_VIEW3D_PT_shading_render_pass, | |
+ CYCLES_RENDER_PT_motion_blur, | |
+ CYCLES_RENDER_PT_motion_blur_curve, | |
+ CYCLES_RENDER_PT_film, | |
+ CYCLES_RENDER_PT_film_pixel_filter, | |
+ CYCLES_RENDER_PT_film_transparency, | |
+ CYCLES_RENDER_PT_performance, | |
+ CYCLES_RENDER_PT_performance_threads, | |
+ CYCLES_RENDER_PT_performance_tiles, | |
+ CYCLES_RENDER_PT_performance_acceleration_structure, | |
+ CYCLES_RENDER_PT_performance_final_render, | |
+ CYCLES_RENDER_PT_performance_viewport, | |
+ CYCLES_RENDER_PT_passes, | |
+ CYCLES_RENDER_PT_passes_data, | |
+ CYCLES_RENDER_PT_passes_light, | |
+ CYCLES_RENDER_PT_passes_crypto, | |
+ CYCLES_RENDER_PT_passes_debug, | |
+ CYCLES_RENDER_UL_aov, | |
+ CYCLES_RENDER_PT_passes_aov, | |
+ CYCLES_RENDER_PT_filter, | |
+ CYCLES_RENDER_PT_override, | |
+ CYCLES_RENDER_PT_denoising, | |
+ CYCLES_PT_post_processing, | |
+ CYCLES_CAMERA_PT_dof, | |
+ CYCLES_CAMERA_PT_dof_aperture, | |
+ CYCLES_PT_context_material, | |
+ CYCLES_OBJECT_PT_motion_blur, | |
+ CYCLES_OBJECT_PT_visibility, | |
+ CYCLES_OBJECT_PT_visibility_ray_visibility, | |
+ CYCLES_OBJECT_PT_visibility_culling, | |
+ CYCLES_LIGHT_PT_preview, | |
+ CYCLES_LIGHT_PT_light, | |
+ CYCLES_LIGHT_PT_nodes, | |
+ CYCLES_LIGHT_PT_spot, | |
+ CYCLES_WORLD_PT_preview, | |
+ CYCLES_WORLD_PT_surface, | |
+ CYCLES_WORLD_PT_volume, | |
+ CYCLES_WORLD_PT_ambient_occlusion, | |
+ CYCLES_WORLD_PT_mist, | |
+ CYCLES_WORLD_PT_ray_visibility, | |
+ CYCLES_WORLD_PT_settings, | |
+ CYCLES_WORLD_PT_settings_surface, | |
+ CYCLES_WORLD_PT_settings_volume, | |
+ CYCLES_MATERIAL_PT_preview, | |
+ CYCLES_MATERIAL_PT_surface, | |
+ CYCLES_MATERIAL_PT_volume, | |
+ CYCLES_MATERIAL_PT_displacement, | |
+ CYCLES_MATERIAL_PT_settings, | |
+ CYCLES_MATERIAL_PT_settings_surface, | |
+ CYCLES_MATERIAL_PT_settings_volume, | |
+ CYCLES_RENDER_PT_bake, | |
+ CYCLES_RENDER_PT_bake_influence, | |
+ CYCLES_RENDER_PT_bake_selected_to_active, | |
+ CYCLES_RENDER_PT_bake_output, | |
+ CYCLES_RENDER_PT_debug, | |
+ node_panel(CYCLES_MATERIAL_PT_settings), | |
+ node_panel(CYCLES_MATERIAL_PT_settings_surface), | |
+ node_panel(CYCLES_MATERIAL_PT_settings_volume), | |
+ node_panel(CYCLES_WORLD_PT_ray_visibility), | |
+ node_panel(CYCLES_WORLD_PT_settings), | |
+ node_panel(CYCLES_WORLD_PT_settings_surface), | |
+ node_panel(CYCLES_WORLD_PT_settings_volume), | |
+ node_panel(CYCLES_LIGHT_PT_light), | |
+ node_panel(CYCLES_LIGHT_PT_spot), | |
+) | |
+ | |
+ | |
+def register(): | |
+ from bpy.utils import register_class | |
+ | |
+ bpy.types.RENDER_PT_context.append(draw_device) | |
+ bpy.types.VIEW3D_HT_header.append(draw_pause) | |
+ | |
+ for panel in get_panels(): | |
+ panel.COMPAT_ENGINES.add('CYCLES') | |
+ | |
+ for cls in classes: | |
+ register_class(cls) | |
+ | |
+ | |
+def unregister(): | |
+ from bpy.utils import unregister_class | |
+ | |
+ bpy.types.RENDER_PT_context.remove(draw_device) | |
+ bpy.types.VIEW3D_HT_header.remove(draw_pause) | |
+ | |
+ for panel in get_panels(): | |
+ if 'CYCLES' in panel.COMPAT_ENGINES: | |
+ panel.COMPAT_ENGINES.remove('CYCLES') | |
+ | |
+ for cls in classes: | |
+ unregister_class(cls) | |
diff -Naur a/intern/cycles/blender/blender_session.cpp b/intern/cycles/blender/blender_session.cpp | |
--- a/intern/cycles/blender/blender_session.cpp 2020-01-10 20:37:06.000000000 +0300 | |
+++ b/intern/cycles/blender/blender_session.cpp 2020-01-10 20:42:43.457590054 +0300 | |
@@ -474,7 +474,8 @@ | |
b_rlay_name = b_view_layer.name(); | |
/* add passes */ | |
- vector<Pass> passes = sync->sync_render_passes(b_rlay, b_view_layer); | |
+ vector<Pass> passes = sync->sync_render_passes( | |
+ b_rlay, b_view_layer, session_params.adaptive_sampling); | |
buffer_params.passes = passes; | |
PointerRNA crl = RNA_pointer_get(&b_view_layer.ptr, "cycles"); | |
diff -Naur a/intern/cycles/blender/blender_session.cpp.orig b/intern/cycles/blender/blender_session.cpp.orig | |
--- a/intern/cycles/blender/blender_session.cpp.orig 1970-01-01 03:00:00.000000000 +0300 | |
+++ b/intern/cycles/blender/blender_session.cpp.orig 2020-01-10 20:37:06.000000000 +0300 | |
@@ -0,0 +1,1513 @@ | |
+/* | |
+ * Copyright 2011-2013 Blender Foundation | |
+ * | |
+ * Licensed under the Apache License, Version 2.0 (the "License"); | |
+ * you may not use this file except in compliance with the License. | |
+ * You may obtain a copy of the License at | |
+ * | |
+ * http://www.apache.org/licenses/LICENSE-2.0 | |
+ * | |
+ * Unless required by applicable law or agreed to in writing, software | |
+ * distributed under the License is distributed on an "AS IS" BASIS, | |
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
+ * See the License for the specific language governing permissions and | |
+ * limitations under the License. | |
+ */ | |
+ | |
+#include <stdlib.h> | |
+ | |
+#include "device/device.h" | |
+#include "render/background.h" | |
+#include "render/buffers.h" | |
+#include "render/camera.h" | |
+#include "render/colorspace.h" | |
+#include "render/film.h" | |
+#include "render/integrator.h" | |
+#include "render/light.h" | |
+#include "render/mesh.h" | |
+#include "render/object.h" | |
+#include "render/scene.h" | |
+#include "render/session.h" | |
+#include "render/shader.h" | |
+#include "render/stats.h" | |
+ | |
+#include "util/util_algorithm.h" | |
+#include "util/util_color.h" | |
+#include "util/util_foreach.h" | |
+#include "util/util_function.h" | |
+#include "util/util_hash.h" | |
+#include "util/util_logging.h" | |
+#include "util/util_murmurhash.h" | |
+#include "util/util_progress.h" | |
+#include "util/util_time.h" | |
+ | |
+#include "blender/blender_sync.h" | |
+#include "blender/blender_session.h" | |
+#include "blender/blender_util.h" | |
+ | |
+CCL_NAMESPACE_BEGIN | |
+ | |
+bool BlenderSession::headless = false; | |
+int BlenderSession::num_resumable_chunks = 0; | |
+int BlenderSession::current_resumable_chunk = 0; | |
+int BlenderSession::start_resumable_chunk = 0; | |
+int BlenderSession::end_resumable_chunk = 0; | |
+bool BlenderSession::print_render_stats = false; | |
+ | |
+BlenderSession::BlenderSession(BL::RenderEngine &b_engine, | |
+ BL::Preferences &b_userpref, | |
+ BL::BlendData &b_data, | |
+ bool preview_osl) | |
+ : session(NULL), | |
+ sync(NULL), | |
+ b_engine(b_engine), | |
+ b_userpref(b_userpref), | |
+ b_data(b_data), | |
+ b_render(b_engine.render()), | |
+ b_depsgraph(PointerRNA_NULL), | |
+ b_scene(PointerRNA_NULL), | |
+ b_v3d(PointerRNA_NULL), | |
+ b_rv3d(PointerRNA_NULL), | |
+ width(0), | |
+ height(0), | |
+ preview_osl(preview_osl), | |
+ python_thread_state(NULL) | |
+{ | |
+ /* offline render */ | |
+ background = true; | |
+ last_redraw_time = 0.0; | |
+ start_resize_time = 0.0; | |
+ last_status_time = 0.0; | |
+} | |
+ | |
+BlenderSession::BlenderSession(BL::RenderEngine &b_engine, | |
+ BL::Preferences &b_userpref, | |
+ BL::BlendData &b_data, | |
+ BL::SpaceView3D &b_v3d, | |
+ BL::RegionView3D &b_rv3d, | |
+ int width, | |
+ int height) | |
+ : session(NULL), | |
+ sync(NULL), | |
+ b_engine(b_engine), | |
+ b_userpref(b_userpref), | |
+ b_data(b_data), | |
+ b_render(b_engine.render()), | |
+ b_depsgraph(PointerRNA_NULL), | |
+ b_scene(PointerRNA_NULL), | |
+ b_v3d(b_v3d), | |
+ b_rv3d(b_rv3d), | |
+ width(width), | |
+ height(height), | |
+ preview_osl(false), | |
+ python_thread_state(NULL) | |
+{ | |
+ /* 3d view render */ | |
+ background = false; | |
+ last_redraw_time = 0.0; | |
+ start_resize_time = 0.0; | |
+ last_status_time = 0.0; | |
+} | |
+ | |
+BlenderSession::~BlenderSession() | |
+{ | |
+ free_session(); | |
+} | |
+ | |
+void BlenderSession::create_session() | |
+{ | |
+ SessionParams session_params = BlenderSync::get_session_params( | |
+ b_engine, b_userpref, b_scene, background); | |
+ SceneParams scene_params = BlenderSync::get_scene_params(b_scene, background); | |
+ bool session_pause = BlenderSync::get_session_pause(b_scene, background); | |
+ | |
+ /* reset status/progress */ | |
+ last_status = ""; | |
+ last_error = ""; | |
+ last_progress = -1.0f; | |
+ start_resize_time = 0.0; | |
+ | |
+ /* create session */ | |
+ session = new Session(session_params); | |
+ session->scene = scene; | |
+ session->progress.set_update_callback(function_bind(&BlenderSession::tag_redraw, this)); | |
+ session->progress.set_cancel_callback(function_bind(&BlenderSession::test_cancel, this)); | |
+ session->set_pause(session_pause); | |
+ | |
+ /* create scene */ | |
+ scene = new Scene(scene_params, session->device); | |
+ scene->name = b_scene.name(); | |
+ | |
+ /* setup callbacks for builtin image support */ | |
+ scene->image_manager->builtin_image_info_cb = function_bind( | |
+ &BlenderSession::builtin_image_info, this, _1, _2, _3); | |
+ scene->image_manager->builtin_image_pixels_cb = function_bind( | |
+ &BlenderSession::builtin_image_pixels, this, _1, _2, _3, _4, _5, _6, _7); | |
+ scene->image_manager->builtin_image_float_pixels_cb = function_bind( | |
+ &BlenderSession::builtin_image_float_pixels, this, _1, _2, _3, _4, _5, _6, _7); | |
+ | |
+ session->scene = scene; | |
+ | |
+ /* There is no single depsgraph to use for the entire render. | |
+ * So we need to handle this differently. | |
+ * | |
+ * We could loop over the final render result render layers in pipeline and keep Cycles unaware | |
+ * of multiple layers, or perhaps move syncing further down in the pipeline. | |
+ */ | |
+ /* create sync */ | |
+ sync = new BlenderSync(b_engine, b_data, b_scene, scene, !background, session->progress); | |
+ BL::Object b_camera_override(b_engine.camera_override()); | |
+ if (b_v3d) { | |
+ sync->sync_view(b_v3d, b_rv3d, width, height); | |
+ } | |
+ else { | |
+ sync->sync_camera(b_render, b_camera_override, width, height, ""); | |
+ } | |
+ | |
+ /* set buffer parameters */ | |
+ BufferParams buffer_params = BlenderSync::get_buffer_params( | |
+ b_render, b_v3d, b_rv3d, scene->camera, width, height); | |
+ session->reset(buffer_params, session_params.samples); | |
+ | |
+ b_engine.use_highlight_tiles(session_params.progressive_refine == false); | |
+ | |
+ update_resumable_tile_manager(session_params.samples); | |
+} | |
+ | |
+void BlenderSession::reset_session(BL::BlendData &b_data, BL::Depsgraph &b_depsgraph) | |
+{ | |
+ this->b_data = b_data; | |
+ this->b_depsgraph = b_depsgraph; | |
+ this->b_scene = b_depsgraph.scene_eval(); | |
+ | |
+ if (preview_osl) { | |
+ PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles"); | |
+ RNA_boolean_set(&cscene, "shading_system", preview_osl); | |
+ } | |
+ | |
+ if (b_v3d) { | |
+ this->b_render = b_scene.render(); | |
+ } | |
+ else { | |
+ this->b_render = b_engine.render(); | |
+ width = render_resolution_x(b_render); | |
+ height = render_resolution_y(b_render); | |
+ } | |
+ | |
+ bool is_new_session = (session == NULL); | |
+ if (is_new_session) { | |
+ /* Initialize session and remember it was just created so not to | |
+ * re-create it below. | |
+ */ | |
+ create_session(); | |
+ } | |
+ | |
+ if (b_v3d) { | |
+ /* NOTE: We need to create session, but all the code from below | |
+ * will make viewport render to stuck on initialization. | |
+ */ | |
+ return; | |
+ } | |
+ | |
+ SessionParams session_params = BlenderSync::get_session_params( | |
+ b_engine, b_userpref, b_scene, background); | |
+ SceneParams scene_params = BlenderSync::get_scene_params(b_scene, background); | |
+ | |
+ if (scene->params.modified(scene_params) || session->params.modified(session_params) || | |
+ !scene_params.persistent_data) { | |
+ /* if scene or session parameters changed, it's easier to simply re-create | |
+ * them rather than trying to distinguish which settings need to be updated | |
+ */ | |
+ if (!is_new_session) { | |
+ free_session(); | |
+ create_session(); | |
+ } | |
+ return; | |
+ } | |
+ | |
+ session->progress.reset(); | |
+ scene->reset(); | |
+ | |
+ session->tile_manager.set_tile_order(session_params.tile_order); | |
+ | |
+ /* peak memory usage should show current render peak, not peak for all renders | |
+ * made by this render session | |
+ */ | |
+ session->stats.mem_peak = session->stats.mem_used; | |
+ | |
+ /* There is no single depsgraph to use for the entire render. | |
+ * See note on create_session(). | |
+ */ | |
+ /* sync object should be re-created */ | |
+ sync = new BlenderSync(b_engine, b_data, b_scene, scene, !background, session->progress); | |
+ | |
+ BL::SpaceView3D b_null_space_view3d(PointerRNA_NULL); | |
+ BL::RegionView3D b_null_region_view3d(PointerRNA_NULL); | |
+ BufferParams buffer_params = BlenderSync::get_buffer_params( | |
+ b_render, b_null_space_view3d, b_null_region_view3d, scene->camera, width, height); | |
+ session->reset(buffer_params, session_params.samples); | |
+ | |
+ b_engine.use_highlight_tiles(session_params.progressive_refine == false); | |
+ | |
+ /* reset time */ | |
+ start_resize_time = 0.0; | |
+} | |
+ | |
+void BlenderSession::free_session() | |
+{ | |
+ if (sync) | |
+ delete sync; | |
+ | |
+ delete session; | |
+} | |
+ | |
+static ShaderEvalType get_shader_type(const string &pass_type) | |
+{ | |
+ const char *shader_type = pass_type.c_str(); | |
+ | |
+ /* data passes */ | |
+ if (strcmp(shader_type, "NORMAL") == 0) | |
+ return SHADER_EVAL_NORMAL; | |
+ else if (strcmp(shader_type, "UV") == 0) | |
+ return SHADER_EVAL_UV; | |
+ else if (strcmp(shader_type, "ROUGHNESS") == 0) | |
+ return SHADER_EVAL_ROUGHNESS; | |
+ else if (strcmp(shader_type, "DIFFUSE_COLOR") == 0) | |
+ return SHADER_EVAL_DIFFUSE_COLOR; | |
+ else if (strcmp(shader_type, "GLOSSY_COLOR") == 0) | |
+ return SHADER_EVAL_GLOSSY_COLOR; | |
+ else if (strcmp(shader_type, "TRANSMISSION_COLOR") == 0) | |
+ return SHADER_EVAL_TRANSMISSION_COLOR; | |
+ else if (strcmp(shader_type, "SUBSURFACE_COLOR") == 0) | |
+ return SHADER_EVAL_SUBSURFACE_COLOR; | |
+ else if (strcmp(shader_type, "EMIT") == 0) | |
+ return SHADER_EVAL_EMISSION; | |
+ | |
+ /* light passes */ | |
+ else if (strcmp(shader_type, "AO") == 0) | |
+ return SHADER_EVAL_AO; | |
+ else if (strcmp(shader_type, "COMBINED") == 0) | |
+ return SHADER_EVAL_COMBINED; | |
+ else if (strcmp(shader_type, "SHADOW") == 0) | |
+ return SHADER_EVAL_SHADOW; | |
+ else if (strcmp(shader_type, "DIFFUSE") == 0) | |
+ return SHADER_EVAL_DIFFUSE; | |
+ else if (strcmp(shader_type, "GLOSSY") == 0) | |
+ return SHADER_EVAL_GLOSSY; | |
+ else if (strcmp(shader_type, "TRANSMISSION") == 0) | |
+ return SHADER_EVAL_TRANSMISSION; | |
+ else if (strcmp(shader_type, "SUBSURFACE") == 0) | |
+ return SHADER_EVAL_SUBSURFACE; | |
+ | |
+ /* extra */ | |
+ else if (strcmp(shader_type, "ENVIRONMENT") == 0) | |
+ return SHADER_EVAL_ENVIRONMENT; | |
+ | |
+ else | |
+ return SHADER_EVAL_BAKE; | |
+} | |
+ | |
+static BL::RenderResult begin_render_result(BL::RenderEngine &b_engine, | |
+ int x, | |
+ int y, | |
+ int w, | |
+ int h, | |
+ const char *layername, | |
+ const char *viewname) | |
+{ | |
+ return b_engine.begin_result(x, y, w, h, layername, viewname); | |
+} | |
+ | |
+static void end_render_result(BL::RenderEngine &b_engine, | |
+ BL::RenderResult &b_rr, | |
+ bool cancel, | |
+ bool highlight, | |
+ bool do_merge_results) | |
+{ | |
+ b_engine.end_result(b_rr, (int)cancel, (int)highlight, (int)do_merge_results); | |
+} | |
+ | |
+void BlenderSession::do_write_update_render_tile(RenderTile &rtile, | |
+ bool do_update_only, | |
+ bool highlight) | |
+{ | |
+ int x = rtile.x - session->tile_manager.params.full_x; | |
+ int y = rtile.y - session->tile_manager.params.full_y; | |
+ int w = rtile.w; | |
+ int h = rtile.h; | |
+ | |
+ /* get render result */ | |
+ BL::RenderResult b_rr = begin_render_result( | |
+ b_engine, x, y, w, h, b_rlay_name.c_str(), b_rview_name.c_str()); | |
+ | |
+ /* can happen if the intersected rectangle gives 0 width or height */ | |
+ if (b_rr.ptr.data == NULL) { | |
+ return; | |
+ } | |
+ | |
+ BL::RenderResult::layers_iterator b_single_rlay; | |
+ b_rr.layers.begin(b_single_rlay); | |
+ | |
+ /* layer will be missing if it was disabled in the UI */ | |
+ if (b_single_rlay == b_rr.layers.end()) | |
+ return; | |
+ | |
+ BL::RenderLayer b_rlay = *b_single_rlay; | |
+ | |
+ if (do_update_only) { | |
+ /* Sample would be zero at initial tile update, which is only needed | |
+ * to tag tile form blender side as IN PROGRESS for proper highlight | |
+ * no buffers should be sent to blender yet. For denoise we also | |
+ * keep showing the noisy buffers until denoise is done. */ | |
+ bool merge = (rtile.sample != 0) && (rtile.task != RenderTile::DENOISE); | |
+ | |
+ if (merge) { | |
+ update_render_result(b_rlay, rtile); | |
+ } | |
+ | |
+ end_render_result(b_engine, b_rr, true, highlight, merge); | |
+ } | |
+ else { | |
+ /* Write final render result. */ | |
+ write_render_result(b_rlay, rtile); | |
+ end_render_result(b_engine, b_rr, false, false, true); | |
+ } | |
+} | |
+ | |
+void BlenderSession::write_render_tile(RenderTile &rtile) | |
+{ | |
+ do_write_update_render_tile(rtile, false, false); | |
+} | |
+ | |
+void BlenderSession::update_render_tile(RenderTile &rtile, bool highlight) | |
+{ | |
+ /* use final write for preview renders, otherwise render result wouldn't be | |
+ * be updated in blender side | |
+ * would need to be investigated a bit further, but for now shall be fine | |
+ */ | |
+ if (!b_engine.is_preview()) | |
+ do_write_update_render_tile(rtile, true, highlight); | |
+ else | |
+ do_write_update_render_tile(rtile, false, false); | |
+} | |
+ | |
+static void add_cryptomatte_layer(BL::RenderResult &b_rr, string name, string manifest) | |
+{ | |
+ string identifier = string_printf("%08x", util_murmur_hash3(name.c_str(), name.length(), 0)); | |
+ string prefix = "cryptomatte/" + identifier.substr(0, 7) + "/"; | |
+ | |
+ render_add_metadata(b_rr, prefix + "name", name); | |
+ render_add_metadata(b_rr, prefix + "hash", "MurmurHash3_32"); | |
+ render_add_metadata(b_rr, prefix + "conversion", "uint32_to_float32"); | |
+ render_add_metadata(b_rr, prefix + "manifest", manifest); | |
+} | |
+ | |
+void BlenderSession::stamp_view_layer_metadata(Scene *scene, const string &view_layer_name) | |
+{ | |
+ BL::RenderResult b_rr = b_engine.get_result(); | |
+ string prefix = "cycles." + view_layer_name + "."; | |
+ | |
+ /* Configured number of samples for the view layer. */ | |
+ b_rr.stamp_data_add_field((prefix + "samples").c_str(), | |
+ to_string(session->params.samples).c_str()); | |
+ | |
+ /* Store ranged samples information. */ | |
+ if (session->tile_manager.range_num_samples != -1) { | |
+ b_rr.stamp_data_add_field((prefix + "range_start_sample").c_str(), | |
+ to_string(session->tile_manager.range_start_sample).c_str()); | |
+ b_rr.stamp_data_add_field((prefix + "range_num_samples").c_str(), | |
+ to_string(session->tile_manager.range_num_samples).c_str()); | |
+ } | |
+ | |
+ /* Write cryptomatte metadata. */ | |
+ if (scene->film->cryptomatte_passes & CRYPT_OBJECT) { | |
+ add_cryptomatte_layer(b_rr, | |
+ view_layer_name + ".CryptoObject", | |
+ scene->object_manager->get_cryptomatte_objects(scene)); | |
+ } | |
+ if (scene->film->cryptomatte_passes & CRYPT_MATERIAL) { | |
+ add_cryptomatte_layer(b_rr, | |
+ view_layer_name + ".CryptoMaterial", | |
+ scene->shader_manager->get_cryptomatte_materials(scene)); | |
+ } | |
+ if (scene->film->cryptomatte_passes & CRYPT_ASSET) { | |
+ add_cryptomatte_layer(b_rr, | |
+ view_layer_name + ".CryptoAsset", | |
+ scene->object_manager->get_cryptomatte_assets(scene)); | |
+ } | |
+ | |
+ /* Store synchronization and bare-render times. */ | |
+ double total_time, render_time; | |
+ session->progress.get_time(total_time, render_time); | |
+ b_rr.stamp_data_add_field((prefix + "total_time").c_str(), | |
+ time_human_readable_from_seconds(total_time).c_str()); | |
+ b_rr.stamp_data_add_field((prefix + "render_time").c_str(), | |
+ time_human_readable_from_seconds(render_time).c_str()); | |
+ b_rr.stamp_data_add_field((prefix + "synchronization_time").c_str(), | |
+ time_human_readable_from_seconds(total_time - render_time).c_str()); | |
+} | |
+ | |
+void BlenderSession::render(BL::Depsgraph &b_depsgraph_) | |
+{ | |
+ b_depsgraph = b_depsgraph_; | |
+ | |
+ /* set callback to write out render results */ | |
+ session->write_render_tile_cb = function_bind(&BlenderSession::write_render_tile, this, _1); | |
+ session->update_render_tile_cb = function_bind( | |
+ &BlenderSession::update_render_tile, this, _1, _2); | |
+ | |
+ /* get buffer parameters */ | |
+ SessionParams session_params = BlenderSync::get_session_params( | |
+ b_engine, b_userpref, b_scene, background); | |
+ BufferParams buffer_params = BlenderSync::get_buffer_params( | |
+ b_render, b_v3d, b_rv3d, scene->camera, width, height); | |
+ | |
+ /* render each layer */ | |
+ BL::ViewLayer b_view_layer = b_depsgraph.view_layer_eval(); | |
+ | |
+ /* temporary render result to find needed passes and views */ | |
+ BL::RenderResult b_rr = begin_render_result( | |
+ b_engine, 0, 0, 1, 1, b_view_layer.name().c_str(), NULL); | |
+ BL::RenderResult::layers_iterator b_single_rlay; | |
+ b_rr.layers.begin(b_single_rlay); | |
+ BL::RenderLayer b_rlay = *b_single_rlay; | |
+ b_rlay_name = b_view_layer.name(); | |
+ | |
+ /* add passes */ | |
+ vector<Pass> passes = sync->sync_render_passes(b_rlay, b_view_layer); | |
+ buffer_params.passes = passes; | |
+ | |
+ PointerRNA crl = RNA_pointer_get(&b_view_layer.ptr, "cycles"); | |
+ bool use_denoising = get_boolean(crl, "use_denoising"); | |
+ bool use_optix_denoising = get_boolean(crl, "use_optix_denoising"); | |
+ bool write_denoising_passes = get_boolean(crl, "denoising_store_passes"); | |
+ | |
+ buffer_params.denoising_data_pass = use_denoising || write_denoising_passes; | |
+ buffer_params.denoising_clean_pass = (scene->film->denoising_flags & DENOISING_CLEAN_ALL_PASSES); | |
+ buffer_params.denoising_prefiltered_pass = write_denoising_passes && !use_optix_denoising; | |
+ | |
+ session->params.run_denoising = use_denoising || write_denoising_passes; | |
+ session->params.full_denoising = use_denoising && !use_optix_denoising; | |
+ session->params.optix_denoising = use_denoising && use_optix_denoising; | |
+ session->params.write_denoising_passes = write_denoising_passes && !use_optix_denoising; | |
+ session->params.denoising.radius = get_int(crl, "denoising_radius"); | |
+ session->params.denoising.strength = get_float(crl, "denoising_strength"); | |
+ session->params.denoising.feature_strength = get_float(crl, "denoising_feature_strength"); | |
+ session->params.denoising.relative_pca = get_boolean(crl, "denoising_relative_pca"); | |
+ session->params.denoising.optix_input_passes = get_enum(crl, "denoising_optix_input_passes"); | |
+ session->tile_manager.schedule_denoising = session->params.run_denoising; | |
+ | |
+ scene->film->denoising_data_pass = buffer_params.denoising_data_pass; | |
+ scene->film->denoising_clean_pass = buffer_params.denoising_clean_pass; | |
+ scene->film->denoising_prefiltered_pass = buffer_params.denoising_prefiltered_pass; | |
+ | |
+ scene->film->pass_alpha_threshold = b_view_layer.pass_alpha_threshold(); | |
+ scene->film->tag_passes_update(scene, passes); | |
+ scene->film->tag_update(scene); | |
+ scene->integrator->tag_update(scene); | |
+ | |
+ BL::RenderResult::views_iterator b_view_iter; | |
+ | |
+ int num_views = 0; | |
+ for (b_rr.views.begin(b_view_iter); b_view_iter != b_rr.views.end(); ++b_view_iter) { | |
+ num_views++; | |
+ } | |
+ | |
+ int view_index = 0; | |
+ for (b_rr.views.begin(b_view_iter); b_view_iter != b_rr.views.end(); | |
+ ++b_view_iter, ++view_index) { | |
+ b_rview_name = b_view_iter->name(); | |
+ | |
+ /* set the current view */ | |
+ b_engine.active_view_set(b_rview_name.c_str()); | |
+ | |
+ /* update scene */ | |
+ BL::Object b_camera_override(b_engine.camera_override()); | |
+ sync->sync_camera(b_render, b_camera_override, width, height, b_rview_name.c_str()); | |
+ sync->sync_data( | |
+ b_render, b_depsgraph, b_v3d, b_camera_override, width, height, &python_thread_state); | |
+ builtin_images_load(); | |
+ | |
+ /* Attempt to free all data which is held by Blender side, since at this | |
+ * point we know that we've got everything to render current view layer. | |
+ */ | |
+ /* At the moment we only free if we are not doing multi-view | |
+ * (or if we are rendering the last view). See T58142/D4239 for discussion. | |
+ */ | |
+ if (view_index == num_views - 1) { | |
+ free_blender_memory_if_possible(); | |
+ } | |
+ | |
+ /* Make sure all views have different noise patterns. - hardcoded value just to make it random | |
+ */ | |
+ if (view_index != 0) { | |
+ scene->integrator->seed += hash_uint2(scene->integrator->seed, | |
+ hash_uint2(view_index * 0xdeadbeef, 0)); | |
+ scene->integrator->tag_update(scene); | |
+ } | |
+ | |
+ /* Update number of samples per layer. */ | |
+ int samples = sync->get_layer_samples(); | |
+ bool bound_samples = sync->get_layer_bound_samples(); | |
+ int effective_layer_samples; | |
+ | |
+ if (samples != 0 && (!bound_samples || (samples < session_params.samples))) | |
+ effective_layer_samples = samples; | |
+ else | |
+ effective_layer_samples = session_params.samples; | |
+ | |
+ /* Update tile manager if we're doing resumable render. */ | |
+ update_resumable_tile_manager(effective_layer_samples); | |
+ | |
+ /* Update session itself. */ | |
+ session->reset(buffer_params, effective_layer_samples); | |
+ | |
+ /* render */ | |
+ session->start(); | |
+ session->wait(); | |
+ | |
+ if (!b_engine.is_preview() && background && print_render_stats) { | |
+ RenderStats stats; | |
+ session->collect_statistics(&stats); | |
+ printf("Render statistics:\n%s\n", stats.full_report().c_str()); | |
+ } | |
+ | |
+ if (session->progress.get_cancel()) | |
+ break; | |
+ } | |
+ | |
+ /* add metadata */ | |
+ stamp_view_layer_metadata(scene, b_rlay_name); | |
+ | |
+ /* free result without merging */ | |
+ end_render_result(b_engine, b_rr, true, true, false); | |
+ | |
+ double total_time, render_time; | |
+ session->progress.get_time(total_time, render_time); | |
+ VLOG(1) << "Total render time: " << total_time; | |
+ VLOG(1) << "Render time (without synchronization): " << render_time; | |
+ | |
+ /* clear callback */ | |
+ session->write_render_tile_cb = function_null; | |
+ session->update_render_tile_cb = function_null; | |
+ | |
+ /* TODO: find a way to clear this data for persistent data render */ | |
+#if 0 | |
+ /* free all memory used (host and device), so we wouldn't leave render | |
+ * engine with extra memory allocated | |
+ */ | |
+ | |
+ session->device_free(); | |
+ | |
+ delete sync; | |
+ sync = NULL; | |
+#endif | |
+} | |
+ | |
+static void populate_bake_data(BakeData *data, | |
+ const int object_id, | |
+ BL::BakePixel &pixel_array, | |
+ const int num_pixels) | |
+{ | |
+ BL::BakePixel bp = pixel_array; | |
+ | |
+ int i; | |
+ for (i = 0; i < num_pixels; i++) { | |
+ if (bp.object_id() == object_id) { | |
+ data->set(i, bp.primitive_id(), bp.uv(), bp.du_dx(), bp.du_dy(), bp.dv_dx(), bp.dv_dy()); | |
+ } | |
+ else { | |
+ data->set_null(i); | |
+ } | |
+ bp = bp.next(); | |
+ } | |
+} | |
+ | |
+static int bake_pass_filter_get(const int pass_filter) | |
+{ | |
+ int flag = BAKE_FILTER_NONE; | |
+ | |
+ if ((pass_filter & BL::BakeSettings::pass_filter_DIRECT) != 0) | |
+ flag |= BAKE_FILTER_DIRECT; | |
+ if ((pass_filter & BL::BakeSettings::pass_filter_INDIRECT) != 0) | |
+ flag |= BAKE_FILTER_INDIRECT; | |
+ if ((pass_filter & BL::BakeSettings::pass_filter_COLOR) != 0) | |
+ flag |= BAKE_FILTER_COLOR; | |
+ | |
+ if ((pass_filter & BL::BakeSettings::pass_filter_DIFFUSE) != 0) | |
+ flag |= BAKE_FILTER_DIFFUSE; | |
+ if ((pass_filter & BL::BakeSettings::pass_filter_GLOSSY) != 0) | |
+ flag |= BAKE_FILTER_GLOSSY; | |
+ if ((pass_filter & BL::BakeSettings::pass_filter_TRANSMISSION) != 0) | |
+ flag |= BAKE_FILTER_TRANSMISSION; | |
+ if ((pass_filter & BL::BakeSettings::pass_filter_SUBSURFACE) != 0) | |
+ flag |= BAKE_FILTER_SUBSURFACE; | |
+ | |
+ if ((pass_filter & BL::BakeSettings::pass_filter_EMIT) != 0) | |
+ flag |= BAKE_FILTER_EMISSION; | |
+ if ((pass_filter & BL::BakeSettings::pass_filter_AO) != 0) | |
+ flag |= BAKE_FILTER_AO; | |
+ | |
+ return flag; | |
+} | |
+ | |
+void BlenderSession::bake(BL::Depsgraph &b_depsgraph_, | |
+ BL::Object &b_object, | |
+ const string &pass_type, | |
+ const int pass_filter, | |
+ const int object_id, | |
+ BL::BakePixel &pixel_array, | |
+ const size_t num_pixels, | |
+ const int /*depth*/, | |
+ float result[]) | |
+{ | |
+ b_depsgraph = b_depsgraph_; | |
+ | |
+ ShaderEvalType shader_type = get_shader_type(pass_type); | |
+ | |
+ /* Set baking flag in advance, so kernel loading can check if we need | |
+ * any baking capabilities. | |
+ */ | |
+ scene->bake_manager->set_baking(true); | |
+ | |
+ /* ensure kernels are loaded before we do any scene updates */ | |
+ session->load_kernels(); | |
+ | |
+ if (shader_type == SHADER_EVAL_UV) { | |
+ /* force UV to be available */ | |
+ Pass::add(PASS_UV, scene->film->passes); | |
+ } | |
+ | |
+ int bake_pass_filter = bake_pass_filter_get(pass_filter); | |
+ bake_pass_filter = BakeManager::shader_type_to_pass_filter(shader_type, bake_pass_filter); | |
+ | |
+ /* force use_light_pass to be true if we bake more than just colors */ | |
+ if (bake_pass_filter & ~BAKE_FILTER_COLOR) { | |
+ Pass::add(PASS_LIGHT, scene->film->passes); | |
+ } | |
+ | |
+ /* create device and update scene */ | |
+ scene->film->tag_update(scene); | |
+ scene->integrator->tag_update(scene); | |
+ | |
+ if (!session->progress.get_cancel()) { | |
+ /* update scene */ | |
+ BL::Object b_camera_override(b_engine.camera_override()); | |
+ sync->sync_camera(b_render, b_camera_override, width, height, ""); | |
+ sync->sync_data( | |
+ b_render, b_depsgraph, b_v3d, b_camera_override, width, height, &python_thread_state); | |
+ builtin_images_load(); | |
+ } | |
+ | |
+ BakeData *bake_data = NULL; | |
+ | |
+ if (!session->progress.get_cancel()) { | |
+ /* get buffer parameters */ | |
+ SessionParams session_params = BlenderSync::get_session_params( | |
+ b_engine, b_userpref, b_scene, background); | |
+ BufferParams buffer_params = BlenderSync::get_buffer_params( | |
+ b_render, b_v3d, b_rv3d, scene->camera, width, height); | |
+ | |
+ scene->bake_manager->set_shader_limit((size_t)b_engine.tile_x(), (size_t)b_engine.tile_y()); | |
+ | |
+ /* set number of samples */ | |
+ session->tile_manager.set_samples(session_params.samples); | |
+ session->reset(buffer_params, session_params.samples); | |
+ session->update_scene(); | |
+ | |
+ /* find object index. todo: is arbitrary - copied from mesh_displace.cpp */ | |
+ size_t object_index = OBJECT_NONE; | |
+ int tri_offset = 0; | |
+ | |
+ for (size_t i = 0; i < scene->objects.size(); i++) { | |
+ if (strcmp(scene->objects[i]->name.c_str(), b_object.name().c_str()) == 0) { | |
+ object_index = i; | |
+ tri_offset = scene->objects[i]->mesh->tri_offset; | |
+ break; | |
+ } | |
+ } | |
+ | |
+ /* Object might have been disabled for rendering or excluded in some | |
+ * other way, in that case Blender will report a warning afterwards. */ | |
+ if (object_index != OBJECT_NONE) { | |
+ int object = object_index; | |
+ | |
+ bake_data = scene->bake_manager->init(object, tri_offset, num_pixels); | |
+ populate_bake_data(bake_data, object_id, pixel_array, num_pixels); | |
+ } | |
+ | |
+ /* set number of samples */ | |
+ session->tile_manager.set_samples(session_params.samples); | |
+ session->reset(buffer_params, session_params.samples); | |
+ session->update_scene(); | |
+ | |
+ session->progress.set_update_callback( | |
+ function_bind(&BlenderSession::update_bake_progress, this)); | |
+ } | |
+ | |
+ /* Perform bake. Check cancel to avoid crash with incomplete scene data. */ | |
+ if (!session->progress.get_cancel() && bake_data) { | |
+ scene->bake_manager->bake(scene->device, | |
+ &scene->dscene, | |
+ scene, | |
+ session->progress, | |
+ shader_type, | |
+ bake_pass_filter, | |
+ bake_data, | |
+ result); | |
+ } | |
+ | |
+ /* free all memory used (host and device), so we wouldn't leave render | |
+ * engine with extra memory allocated | |
+ */ | |
+ | |
+ session->device_free(); | |
+ | |
+ delete sync; | |
+ sync = NULL; | |
+} | |
+ | |
+void BlenderSession::do_write_update_render_result(BL::RenderLayer &b_rlay, | |
+ RenderTile &rtile, | |
+ bool do_update_only) | |
+{ | |
+ RenderBuffers *buffers = rtile.buffers; | |
+ | |
+ /* copy data from device */ | |
+ if (!buffers->copy_from_device()) | |
+ return; | |
+ | |
+ float exposure = scene->film->exposure; | |
+ | |
+ vector<float> pixels(rtile.w * rtile.h * 4); | |
+ | |
+ /* Adjust absolute sample number to the range. */ | |
+ int sample = rtile.sample; | |
+ const int range_start_sample = session->tile_manager.range_start_sample; | |
+ if (range_start_sample != -1) { | |
+ sample -= range_start_sample; | |
+ } | |
+ | |
+ if (!do_update_only) { | |
+ /* copy each pass */ | |
+ BL::RenderLayer::passes_iterator b_iter; | |
+ | |
+ for (b_rlay.passes.begin(b_iter); b_iter != b_rlay.passes.end(); ++b_iter) { | |
+ BL::RenderPass b_pass(*b_iter); | |
+ int components = b_pass.channels(); | |
+ | |
+ /* Copy pixels from regular render passes. */ | |
+ bool read = buffers->get_pass_rect(b_pass.name(), exposure, sample, components, &pixels[0]); | |
+ | |
+ /* If denoising pass, */ | |
+ if (!read) { | |
+ int denoising_offset = BlenderSync::get_denoising_pass(b_pass); | |
+ if (denoising_offset >= 0) { | |
+ read = buffers->get_denoising_pass_rect( | |
+ denoising_offset, exposure, sample, components, &pixels[0]); | |
+ } | |
+ } | |
+ | |
+ if (!read) { | |
+ memset(&pixels[0], 0, pixels.size() * sizeof(float)); | |
+ } | |
+ | |
+ b_pass.rect(&pixels[0]); | |
+ } | |
+ } | |
+ else { | |
+ /* copy combined pass */ | |
+ BL::RenderPass b_combined_pass(b_rlay.passes.find_by_name("Combined", b_rview_name.c_str())); | |
+ if (buffers->get_pass_rect("Combined", exposure, sample, 4, &pixels[0])) | |
+ b_combined_pass.rect(&pixels[0]); | |
+ } | |
+} | |
+ | |
+void BlenderSession::write_render_result(BL::RenderLayer &b_rlay, RenderTile &rtile) | |
+{ | |
+ do_write_update_render_result(b_rlay, rtile, false); | |
+} | |
+ | |
+void BlenderSession::update_render_result(BL::RenderLayer &b_rlay, RenderTile &rtile) | |
+{ | |
+ do_write_update_render_result(b_rlay, rtile, true); | |
+} | |
+ | |
+void BlenderSession::synchronize(BL::Depsgraph &b_depsgraph_) | |
+{ | |
+ /* only used for viewport render */ | |
+ if (!b_v3d) | |
+ return; | |
+ | |
+ /* on session/scene parameter changes, we recreate session entirely */ | |
+ SessionParams session_params = BlenderSync::get_session_params( | |
+ b_engine, b_userpref, b_scene, background); | |
+ SceneParams scene_params = BlenderSync::get_scene_params(b_scene, background); | |
+ bool session_pause = BlenderSync::get_session_pause(b_scene, background); | |
+ | |
+ if (session->params.modified(session_params) || scene->params.modified(scene_params)) { | |
+ free_session(); | |
+ create_session(); | |
+ return; | |
+ } | |
+ | |
+ /* increase samples, but never decrease */ | |
+ session->set_samples(session_params.samples); | |
+ session->set_pause(session_pause); | |
+ | |
+ /* copy recalc flags, outside of mutex so we can decide to do the real | |
+ * synchronization at a later time to not block on running updates */ | |
+ sync->sync_recalc(b_depsgraph_, b_v3d); | |
+ | |
+ /* don't do synchronization if on pause */ | |
+ if (session_pause) { | |
+ tag_update(); | |
+ return; | |
+ } | |
+ | |
+ /* try to acquire mutex. if we don't want to or can't, come back later */ | |
+ if (!session->ready_to_reset() || !session->scene->mutex.try_lock()) { | |
+ tag_update(); | |
+ return; | |
+ } | |
+ | |
+ /* data and camera synchronize */ | |
+ b_depsgraph = b_depsgraph_; | |
+ | |
+ BL::Object b_camera_override(b_engine.camera_override()); | |
+ sync->sync_data( | |
+ b_render, b_depsgraph, b_v3d, b_camera_override, width, height, &python_thread_state); | |
+ | |
+ if (b_rv3d) | |
+ sync->sync_view(b_v3d, b_rv3d, width, height); | |
+ else | |
+ sync->sync_camera(b_render, b_camera_override, width, height, ""); | |
+ | |
+ /* reset if needed */ | |
+ if (scene->need_reset()) { | |
+ BufferParams buffer_params = BlenderSync::get_buffer_params( | |
+ b_render, b_v3d, b_rv3d, scene->camera, width, height); | |
+ session->reset(buffer_params, session_params.samples); | |
+ | |
+ /* After session reset, so device is not accessing image data anymore. */ | |
+ builtin_images_load(); | |
+ | |
+ /* reset time */ | |
+ start_resize_time = 0.0; | |
+ } | |
+ | |
+ /* unlock */ | |
+ session->scene->mutex.unlock(); | |
+ | |
+ /* Start rendering thread, if it's not running already. Do this | |
+ * after all scene data has been synced at least once. */ | |
+ session->start(); | |
+} | |
+ | |
+bool BlenderSession::draw(int w, int h) | |
+{ | |
+ /* pause in redraw in case update is not being called due to final render */ | |
+ session->set_pause(BlenderSync::get_session_pause(b_scene, background)); | |
+ | |
+ /* before drawing, we verify camera and viewport size changes, because | |
+ * we do not get update callbacks for those, we must detect them here */ | |
+ if (session->ready_to_reset()) { | |
+ bool reset = false; | |
+ | |
+ /* if dimensions changed, reset */ | |
+ if (width != w || height != h) { | |
+ if (start_resize_time == 0.0) { | |
+ /* don't react immediately to resizes to avoid flickery resizing | |
+ * of the viewport, and some window managers changing the window | |
+ * size temporarily on unminimize */ | |
+ start_resize_time = time_dt(); | |
+ tag_redraw(); | |
+ } | |
+ else if (time_dt() - start_resize_time < 0.2) { | |
+ tag_redraw(); | |
+ } | |
+ else { | |
+ width = w; | |
+ height = h; | |
+ reset = true; | |
+ } | |
+ } | |
+ | |
+ /* try to acquire mutex. if we can't, come back later */ | |
+ if (!session->scene->mutex.try_lock()) { | |
+ tag_update(); | |
+ } | |
+ else { | |
+ /* update camera from 3d view */ | |
+ | |
+ sync->sync_view(b_v3d, b_rv3d, width, height); | |
+ | |
+ if (scene->camera->need_update) | |
+ reset = true; | |
+ | |
+ session->scene->mutex.unlock(); | |
+ } | |
+ | |
+ /* reset if requested */ | |
+ if (reset) { | |
+ SessionParams session_params = BlenderSync::get_session_params( | |
+ b_engine, b_userpref, b_scene, background); | |
+ BufferParams buffer_params = BlenderSync::get_buffer_params( | |
+ b_render, b_v3d, b_rv3d, scene->camera, width, height); | |
+ bool session_pause = BlenderSync::get_session_pause(b_scene, background); | |
+ | |
+ if (session_pause == false) { | |
+ session->reset(buffer_params, session_params.samples); | |
+ start_resize_time = 0.0; | |
+ } | |
+ } | |
+ } | |
+ else { | |
+ tag_update(); | |
+ } | |
+ | |
+ /* update status and progress for 3d view draw */ | |
+ update_status_progress(); | |
+ | |
+ /* draw */ | |
+ BufferParams buffer_params = BlenderSync::get_buffer_params( | |
+ b_render, b_v3d, b_rv3d, scene->camera, width, height); | |
+ DeviceDrawParams draw_params; | |
+ | |
+ if (session->params.display_buffer_linear) { | |
+ draw_params.bind_display_space_shader_cb = function_bind( | |
+ &BL::RenderEngine::bind_display_space_shader, &b_engine, b_scene); | |
+ draw_params.unbind_display_space_shader_cb = function_bind( | |
+ &BL::RenderEngine::unbind_display_space_shader, &b_engine); | |
+ } | |
+ | |
+ return !session->draw(buffer_params, draw_params); | |
+} | |
+ | |
+void BlenderSession::get_status(string &status, string &substatus) | |
+{ | |
+ session->progress.get_status(status, substatus); | |
+} | |
+ | |
+void BlenderSession::get_kernel_status(string &kernel_status) | |
+{ | |
+ session->progress.get_kernel_status(kernel_status); | |
+} | |
+ | |
+void BlenderSession::get_progress(float &progress, double &total_time, double &render_time) | |
+{ | |
+ session->progress.get_time(total_time, render_time); | |
+ progress = session->progress.get_progress(); | |
+} | |
+ | |
+void BlenderSession::update_bake_progress() | |
+{ | |
+ float progress = session->progress.get_progress(); | |
+ | |
+ if (progress != last_progress) { | |
+ b_engine.update_progress(progress); | |
+ last_progress = progress; | |
+ } | |
+} | |
+ | |
+void BlenderSession::update_status_progress() | |
+{ | |
+ string timestatus, status, substatus, kernel_status; | |
+ string scene_status = ""; | |
+ float progress; | |
+ double total_time, remaining_time = 0, render_time; | |
+ float mem_used = (float)session->stats.mem_used / 1024.0f / 1024.0f; | |
+ float mem_peak = (float)session->stats.mem_peak / 1024.0f / 1024.0f; | |
+ | |
+ get_status(status, substatus); | |
+ get_kernel_status(kernel_status); | |
+ get_progress(progress, total_time, render_time); | |
+ | |
+ if (progress > 0) | |
+ remaining_time = (1.0 - (double)progress) * (render_time / (double)progress); | |
+ | |
+ if (background) { | |
+ scene_status += " | " + scene->name; | |
+ if (b_rlay_name != "") | |
+ scene_status += ", " + b_rlay_name; | |
+ | |
+ if (b_rview_name != "") | |
+ scene_status += ", " + b_rview_name; | |
+ | |
+ if (remaining_time > 0) { | |
+ timestatus += "Remaining:" + time_human_readable_from_seconds(remaining_time) + " | "; | |
+ } | |
+ | |
+ timestatus += string_printf("Mem:%.2fM, Peak:%.2fM", (double)mem_used, (double)mem_peak); | |
+ | |
+ if (status.size() > 0) | |
+ status = " | " + status; | |
+ if (substatus.size() > 0) | |
+ status += " | " + substatus; | |
+ if (kernel_status.size() > 0) | |
+ status += " | " + kernel_status; | |
+ } | |
+ | |
+ double current_time = time_dt(); | |
+ /* When rendering in a window, redraw the status at least once per second to keep the elapsed and | |
+ * remaining time up-to-date. For headless rendering, only report when something significant | |
+ * changes to keep the console output readable. */ | |
+ if (status != last_status || (!headless && (current_time - last_status_time) > 1.0)) { | |
+ b_engine.update_stats("", (timestatus + scene_status + status).c_str()); | |
+ b_engine.update_memory_stats(mem_used, mem_peak); | |
+ last_status = status; | |
+ last_status_time = current_time; | |
+ } | |
+ if (progress != last_progress) { | |
+ b_engine.update_progress(progress); | |
+ last_progress = progress; | |
+ } | |
+ | |
+ if (session->progress.get_error()) { | |
+ string error = session->progress.get_error_message(); | |
+ if (error != last_error) { | |
+ /* TODO(sergey): Currently C++ RNA API doesn't let us to | |
+ * use mnemonic name for the variable. Would be nice to | |
+ * have this figured out. | |
+ * | |
+ * For until then, 1 << 5 means RPT_ERROR. | |
+ */ | |
+ b_engine.report(1 << 5, error.c_str()); | |
+ b_engine.error_set(error.c_str()); | |
+ last_error = error; | |
+ } | |
+ } | |
+} | |
+ | |
+void BlenderSession::tag_update() | |
+{ | |
+ /* tell blender that we want to get another update callback */ | |
+ b_engine.tag_update(); | |
+} | |
+ | |
+void BlenderSession::tag_redraw() | |
+{ | |
+ if (background) { | |
+ /* update stats and progress, only for background here because | |
+ * in 3d view we do it in draw for thread safety reasons */ | |
+ update_status_progress(); | |
+ | |
+ /* offline render, redraw if timeout passed */ | |
+ if (time_dt() - last_redraw_time > 1.0) { | |
+ b_engine.tag_redraw(); | |
+ last_redraw_time = time_dt(); | |
+ } | |
+ } | |
+ else { | |
+ /* tell blender that we want to redraw */ | |
+ b_engine.tag_redraw(); | |
+ } | |
+} | |
+ | |
+void BlenderSession::test_cancel() | |
+{ | |
+ /* test if we need to cancel rendering */ | |
+ if (background) | |
+ if (b_engine.test_break()) | |
+ session->progress.set_cancel("Cancelled"); | |
+} | |
+ | |
+/* builtin image file name is actually an image datablock name with | |
+ * absolute sequence frame number concatenated via '@' character | |
+ * | |
+ * this function splits frame from builtin name | |
+ */ | |
+int BlenderSession::builtin_image_frame(const string &builtin_name) | |
+{ | |
+ int last = builtin_name.find_last_of('@'); | |
+ return atoi(builtin_name.substr(last + 1, builtin_name.size() - last - 1).c_str()); | |
+} | |
+ | |
+void BlenderSession::builtin_image_info(const string &builtin_name, | |
+ void *builtin_data, | |
+ ImageMetaData &metadata) | |
+{ | |
+ /* empty image */ | |
+ metadata.width = 1; | |
+ metadata.height = 1; | |
+ | |
+ if (!builtin_data) | |
+ return; | |
+ | |
+ /* recover ID pointer */ | |
+ PointerRNA ptr; | |
+ RNA_id_pointer_create((ID *)builtin_data, &ptr); | |
+ BL::ID b_id(ptr); | |
+ | |
+ if (b_id.is_a(&RNA_Image)) { | |
+ /* image data */ | |
+ BL::Image b_image(b_id); | |
+ | |
+ metadata.builtin_free_cache = !b_image.has_data(); | |
+ metadata.is_float = b_image.is_float(); | |
+ metadata.width = b_image.size()[0]; | |
+ metadata.height = b_image.size()[1]; | |
+ metadata.depth = 1; | |
+ metadata.channels = b_image.channels(); | |
+ | |
+ if (metadata.is_float) { | |
+ /* Float images are already converted on the Blender side, | |
+ * no need to do anything in Cycles. */ | |
+ metadata.colorspace = u_colorspace_raw; | |
+ } | |
+ } | |
+ else if (b_id.is_a(&RNA_Object)) { | |
+ /* smoke volume data */ | |
+ BL::Object b_ob(b_id); | |
+ BL::FluidDomainSettings b_domain = object_fluid_domain_find(b_ob); | |
+ | |
+ metadata.is_float = true; | |
+ metadata.depth = 1; | |
+ metadata.channels = 1; | |
+ | |
+ if (!b_domain) | |
+ return; | |
+ | |
+ if (builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_DENSITY) || | |
+ builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_FLAME) || | |
+ builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_HEAT) || | |
+ builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_TEMPERATURE)) | |
+ metadata.channels = 1; | |
+ else if (builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_COLOR)) | |
+ metadata.channels = 4; | |
+ else if (builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_VELOCITY)) | |
+ metadata.channels = 3; | |
+ else | |
+ return; | |
+ | |
+ int3 resolution = get_int3(b_domain.domain_resolution()); | |
+ int amplify = (b_domain.use_noise()) ? b_domain.noise_scale() : 1; | |
+ | |
+ /* Velocity and heat data is always low-resolution. */ | |
+ if (builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_VELOCITY) || | |
+ builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_HEAT)) { | |
+ amplify = 1; | |
+ } | |
+ | |
+ metadata.width = resolution.x * amplify; | |
+ metadata.height = resolution.y * amplify; | |
+ metadata.depth = resolution.z * amplify; | |
+ } | |
+ else { | |
+ /* TODO(sergey): Check we're indeed in shader node tree. */ | |
+ PointerRNA ptr; | |
+ RNA_pointer_create(NULL, &RNA_Node, builtin_data, &ptr); | |
+ BL::Node b_node(ptr); | |
+ if (b_node.is_a(&RNA_ShaderNodeTexPointDensity)) { | |
+ BL::ShaderNodeTexPointDensity b_point_density_node(b_node); | |
+ metadata.channels = 4; | |
+ metadata.width = b_point_density_node.resolution(); | |
+ metadata.height = metadata.width; | |
+ metadata.depth = metadata.width; | |
+ metadata.is_float = true; | |
+ } | |
+ } | |
+} | |
+ | |
+bool BlenderSession::builtin_image_pixels(const string &builtin_name, | |
+ void *builtin_data, | |
+ int tile, | |
+ unsigned char *pixels, | |
+ const size_t pixels_size, | |
+ const bool associate_alpha, | |
+ const bool free_cache) | |
+{ | |
+ if (!builtin_data) { | |
+ return false; | |
+ } | |
+ | |
+ const int frame = builtin_image_frame(builtin_name); | |
+ | |
+ PointerRNA ptr; | |
+ RNA_id_pointer_create((ID *)builtin_data, &ptr); | |
+ BL::Image b_image(ptr); | |
+ | |
+ const int width = b_image.size()[0]; | |
+ const int height = b_image.size()[1]; | |
+ const int channels = b_image.channels(); | |
+ | |
+ unsigned char *image_pixels = image_get_pixels_for_frame(b_image, frame, tile); | |
+ const size_t num_pixels = ((size_t)width) * height; | |
+ | |
+ if (image_pixels && num_pixels * channels == pixels_size) { | |
+ memcpy(pixels, image_pixels, pixels_size * sizeof(unsigned char)); | |
+ } | |
+ else { | |
+ if (channels == 1) { | |
+ memset(pixels, 0, pixels_size * sizeof(unsigned char)); | |
+ } | |
+ else { | |
+ const size_t num_pixels_safe = pixels_size / channels; | |
+ unsigned char *cp = pixels; | |
+ for (size_t i = 0; i < num_pixels_safe; i++, cp += channels) { | |
+ cp[0] = 255; | |
+ cp[1] = 0; | |
+ cp[2] = 255; | |
+ if (channels == 4) { | |
+ cp[3] = 255; | |
+ } | |
+ } | |
+ } | |
+ } | |
+ | |
+ if (image_pixels) { | |
+ MEM_freeN(image_pixels); | |
+ } | |
+ | |
+ /* Free image buffers to save memory during render. */ | |
+ if (free_cache) { | |
+ b_image.buffers_free(); | |
+ } | |
+ | |
+ if (associate_alpha) { | |
+ /* Premultiply, byte images are always straight for Blender. */ | |
+ unsigned char *cp = pixels; | |
+ for (size_t i = 0; i < num_pixels; i++, cp += channels) { | |
+ cp[0] = (cp[0] * cp[3]) >> 8; | |
+ cp[1] = (cp[1] * cp[3]) >> 8; | |
+ cp[2] = (cp[2] * cp[3]) >> 8; | |
+ } | |
+ } | |
+ return true; | |
+} | |
+ | |
+bool BlenderSession::builtin_image_float_pixels(const string &builtin_name, | |
+ void *builtin_data, | |
+ int tile, | |
+ float *pixels, | |
+ const size_t pixels_size, | |
+ const bool, | |
+ const bool free_cache) | |
+{ | |
+ if (!builtin_data) { | |
+ return false; | |
+ } | |
+ | |
+ PointerRNA ptr; | |
+ RNA_id_pointer_create((ID *)builtin_data, &ptr); | |
+ BL::ID b_id(ptr); | |
+ | |
+ if (b_id.is_a(&RNA_Image)) { | |
+ /* image data */ | |
+ BL::Image b_image(b_id); | |
+ int frame = builtin_image_frame(builtin_name); | |
+ | |
+ const int width = b_image.size()[0]; | |
+ const int height = b_image.size()[1]; | |
+ const int channels = b_image.channels(); | |
+ | |
+ float *image_pixels; | |
+ image_pixels = image_get_float_pixels_for_frame(b_image, frame, tile); | |
+ const size_t num_pixels = ((size_t)width) * height; | |
+ | |
+ if (image_pixels && num_pixels * channels == pixels_size) { | |
+ memcpy(pixels, image_pixels, pixels_size * sizeof(float)); | |
+ } | |
+ else { | |
+ if (channels == 1) { | |
+ memset(pixels, 0, num_pixels * sizeof(float)); | |
+ } | |
+ else { | |
+ const size_t num_pixels_safe = pixels_size / channels; | |
+ float *fp = pixels; | |
+ for (int i = 0; i < num_pixels_safe; i++, fp += channels) { | |
+ fp[0] = 1.0f; | |
+ fp[1] = 0.0f; | |
+ fp[2] = 1.0f; | |
+ if (channels == 4) { | |
+ fp[3] = 1.0f; | |
+ } | |
+ } | |
+ } | |
+ } | |
+ | |
+ if (image_pixels) { | |
+ MEM_freeN(image_pixels); | |
+ } | |
+ | |
+ /* Free image buffers to save memory during render. */ | |
+ if (free_cache) { | |
+ b_image.buffers_free(); | |
+ } | |
+ | |
+ return true; | |
+ } | |
+ else if (b_id.is_a(&RNA_Object)) { | |
+ /* smoke volume data */ | |
+ BL::Object b_ob(b_id); | |
+ BL::FluidDomainSettings b_domain = object_fluid_domain_find(b_ob); | |
+ | |
+ if (!b_domain) { | |
+ return false; | |
+ } | |
+ | |
+ int3 resolution = get_int3(b_domain.domain_resolution()); | |
+ int length, amplify = (b_domain.use_noise()) ? b_domain.noise_scale() : 1; | |
+ | |
+ /* Velocity and heat data is always low-resolution. */ | |
+ if (builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_VELOCITY) || | |
+ builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_HEAT)) { | |
+ amplify = 1; | |
+ } | |
+ | |
+ const int width = resolution.x * amplify; | |
+ const int height = resolution.y * amplify; | |
+ const int depth = resolution.z * amplify; | |
+ const size_t num_pixels = ((size_t)width) * height * depth; | |
+ | |
+ if (builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_DENSITY)) { | |
+ FluidDomainSettings_density_grid_get_length(&b_domain.ptr, &length); | |
+ if (length == num_pixels) { | |
+ FluidDomainSettings_density_grid_get(&b_domain.ptr, pixels); | |
+ return true; | |
+ } | |
+ } | |
+ else if (builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_FLAME)) { | |
+ /* this is in range 0..1, and interpreted by the OpenGL smoke viewer | |
+ * as 1500..3000 K with the first part faded to zero density */ | |
+ FluidDomainSettings_flame_grid_get_length(&b_domain.ptr, &length); | |
+ if (length == num_pixels) { | |
+ FluidDomainSettings_flame_grid_get(&b_domain.ptr, pixels); | |
+ return true; | |
+ } | |
+ } | |
+ else if (builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_COLOR)) { | |
+ /* the RGB is "premultiplied" by density for better interpolation results */ | |
+ FluidDomainSettings_color_grid_get_length(&b_domain.ptr, &length); | |
+ if (length == num_pixels * 4) { | |
+ FluidDomainSettings_color_grid_get(&b_domain.ptr, pixels); | |
+ return true; | |
+ } | |
+ } | |
+ else if (builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_VELOCITY)) { | |
+ FluidDomainSettings_velocity_grid_get_length(&b_domain.ptr, &length); | |
+ if (length == num_pixels * 3) { | |
+ FluidDomainSettings_velocity_grid_get(&b_domain.ptr, pixels); | |
+ return true; | |
+ } | |
+ } | |
+ else if (builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_HEAT)) { | |
+ FluidDomainSettings_heat_grid_get_length(&b_domain.ptr, &length); | |
+ if (length == num_pixels) { | |
+ FluidDomainSettings_heat_grid_get(&b_domain.ptr, pixels); | |
+ return true; | |
+ } | |
+ } | |
+ else if (builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_TEMPERATURE)) { | |
+ FluidDomainSettings_temperature_grid_get_length(&b_domain.ptr, &length); | |
+ if (length == num_pixels) { | |
+ FluidDomainSettings_temperature_grid_get(&b_domain.ptr, pixels); | |
+ return true; | |
+ } | |
+ } | |
+ else { | |
+ fprintf( | |
+ stderr, "Cycles error: unknown volume attribute %s, skipping\n", builtin_name.c_str()); | |
+ pixels[0] = 0.0f; | |
+ return false; | |
+ } | |
+ | |
+ fprintf(stderr, "Cycles error: unexpected smoke volume resolution, skipping\n"); | |
+ } | |
+ else { | |
+ /* We originally were passing view_layer here but in reality we need a | |
+ * a depsgraph to pass to the RE_point_density_minmax() function. | |
+ */ | |
+ /* TODO(sergey): Check we're indeed in shader node tree. */ | |
+ PointerRNA ptr; | |
+ RNA_pointer_create(NULL, &RNA_Node, builtin_data, &ptr); | |
+ BL::Node b_node(ptr); | |
+ if (b_node.is_a(&RNA_ShaderNodeTexPointDensity)) { | |
+ BL::ShaderNodeTexPointDensity b_point_density_node(b_node); | |
+ int length; | |
+ b_point_density_node.calc_point_density(b_depsgraph, &length, &pixels); | |
+ } | |
+ } | |
+ | |
+ return false; | |
+} | |
+ | |
+void BlenderSession::builtin_images_load() | |
+{ | |
+ /* Force builtin images to be loaded along with Blender data sync. This | |
+ * is needed because we may be reading from depsgraph evaluated data which | |
+ * can be freed by Blender before Cycles reads it. | |
+ * | |
+ * TODO: the assumption that no further access to builtin image data will | |
+ * happen is really weak, and likely to break in the future. We should find | |
+ * a better solution to hand over the data directly to the image manager | |
+ * instead of through callbacks whose timing is difficult to control. */ | |
+ ImageManager *manager = session->scene->image_manager; | |
+ Device *device = session->device; | |
+ manager->device_load_builtin(device, session->scene, session->progress); | |
+} | |
+ | |
+void BlenderSession::update_resumable_tile_manager(int num_samples) | |
+{ | |
+ const int num_resumable_chunks = BlenderSession::num_resumable_chunks, | |
+ current_resumable_chunk = BlenderSession::current_resumable_chunk; | |
+ if (num_resumable_chunks == 0) { | |
+ return; | |
+ } | |
+ | |
+ if (num_resumable_chunks > num_samples) { | |
+ fprintf(stderr, | |
+ "Cycles warning: more sample chunks (%d) than samples (%d), " | |
+ "this will cause some samples to be included in multiple chunks.\n", | |
+ num_resumable_chunks, | |
+ num_samples); | |
+ } | |
+ | |
+ const float num_samples_per_chunk = (float)num_samples / num_resumable_chunks; | |
+ | |
+ float range_start_sample, range_num_samples; | |
+ if (current_resumable_chunk != 0) { | |
+ /* Single chunk rendering. */ | |
+ range_start_sample = num_samples_per_chunk * (current_resumable_chunk - 1); | |
+ range_num_samples = num_samples_per_chunk; | |
+ } | |
+ else { | |
+ /* Ranged-chunks. */ | |
+ const int num_chunks = end_resumable_chunk - start_resumable_chunk + 1; | |
+ range_start_sample = num_samples_per_chunk * (start_resumable_chunk - 1); | |
+ range_num_samples = num_chunks * num_samples_per_chunk; | |
+ } | |
+ | |
+ /* Round after doing the multiplications with num_chunks and num_samples_per_chunk | |
+ * to allow for many small chunks. */ | |
+ int rounded_range_start_sample = (int)floorf(range_start_sample + 0.5f); | |
+ int rounded_range_num_samples = max((int)floorf(range_num_samples + 0.5f), 1); | |
+ | |
+ /* Make sure we don't overshoot. */ | |
+ if (rounded_range_start_sample + rounded_range_num_samples > num_samples) { | |
+ rounded_range_num_samples = num_samples - rounded_range_num_samples; | |
+ } | |
+ | |
+ VLOG(1) << "Samples range start is " << range_start_sample << ", " | |
+ << "number of samples to render is " << range_num_samples; | |
+ | |
+ scene->integrator->start_sample = rounded_range_start_sample; | |
+ scene->integrator->tag_update(scene); | |
+ | |
+ session->tile_manager.range_start_sample = rounded_range_start_sample; | |
+ session->tile_manager.range_num_samples = rounded_range_num_samples; | |
+} | |
+ | |
+void BlenderSession::free_blender_memory_if_possible() | |
+{ | |
+ if (!background) { | |
+ /* During interactive render we can not free anything: attempts to save | |
+ * memory would cause things to be allocated and evaluated for every | |
+ * updated sample. | |
+ */ | |
+ return; | |
+ } | |
+ b_engine.free_blender_memory(); | |
+} | |
+ | |
+CCL_NAMESPACE_END | |
diff -Naur a/intern/cycles/blender/blender_sync.cpp b/intern/cycles/blender/blender_sync.cpp | |
--- a/intern/cycles/blender/blender_sync.cpp 2020-01-10 20:37:06.000000000 +0300 | |
+++ b/intern/cycles/blender/blender_sync.cpp 2020-01-10 20:42:43.457590054 +0300 | |
@@ -291,6 +291,16 @@ | |
integrator->sample_all_lights_indirect = get_boolean(cscene, "sample_all_lights_indirect"); | |
integrator->light_sampling_threshold = get_float(cscene, "light_sampling_threshold"); | |
+ if (RNA_boolean_get(&cscene, "use_adaptive_sampling")) { | |
+ integrator->sampling_pattern = SAMPLING_PATTERN_PMJ; | |
+ integrator->adaptive_min_samples = get_int(cscene, "adaptive_min_samples"); | |
+ integrator->adaptive_threshold = get_float(cscene, "adaptive_threshold"); | |
+ } | |
+ else { | |
+ integrator->adaptive_min_samples = INT_MAX; | |
+ integrator->adaptive_threshold = 0.0f; | |
+ } | |
+ | |
int diffuse_samples = get_int(cscene, "diffuse_samples"); | |
int glossy_samples = get_int(cscene, "glossy_samples"); | |
int transmission_samples = get_int(cscene, "transmission_samples"); | |
@@ -307,6 +317,8 @@ | |
integrator->mesh_light_samples = mesh_light_samples * mesh_light_samples; | |
integrator->subsurface_samples = subsurface_samples * subsurface_samples; | |
integrator->volume_samples = volume_samples * volume_samples; | |
+ integrator->adaptive_min_samples = min( | |
+ integrator->adaptive_min_samples * integrator->adaptive_min_samples, INT_MAX); | |
} | |
else { | |
integrator->diffuse_samples = diffuse_samples; | |
@@ -482,6 +494,8 @@ | |
MAP_PASS("Debug Ray Bounces", PASS_RAY_BOUNCES); | |
#endif | |
MAP_PASS("Debug Render Time", PASS_RENDER_TIME); | |
+ MAP_PASS("AdaptiveAuxBuffer", PASS_ADAPTIVE_AUX_BUFFER); | |
+ MAP_PASS("Debug Sample Count", PASS_SAMPLE_COUNT); | |
if (string_startswith(name, cryptomatte_prefix)) { | |
return PASS_CRYPTOMATTE; | |
} | |
@@ -517,7 +531,9 @@ | |
return -1; | |
} | |
-vector<Pass> BlenderSync::sync_render_passes(BL::RenderLayer &b_rlay, BL::ViewLayer &b_view_layer) | |
+vector<Pass> BlenderSync::sync_render_passes(BL::RenderLayer &b_rlay, | |
+ BL::ViewLayer &b_view_layer, | |
+ bool adaptive_sampling) | |
{ | |
vector<Pass> passes; | |
@@ -595,6 +611,10 @@ | |
b_engine.add_pass("Debug Render Time", 1, "X", b_view_layer.name().c_str()); | |
Pass::add(PASS_RENDER_TIME, passes, "Debug Render Time"); | |
} | |
+ if (get_boolean(crp, "pass_debug_sample_count")) { | |
+ b_engine.add_pass("Debug Sample Count", 1, "X", b_view_layer.name().c_str()); | |
+ Pass::add(PASS_SAMPLE_COUNT, passes); | |
+ } | |
if (get_boolean(crp, "use_pass_volume_direct")) { | |
b_engine.add_pass("VolumeDir", 3, "RGB", b_view_layer.name().c_str()); | |
Pass::add(PASS_VOLUME_DIRECT, passes, "VolumeDir"); | |
@@ -656,6 +676,13 @@ | |
} | |
RNA_END; | |
+ if (adaptive_sampling) { | |
+ Pass::add(PASS_ADAPTIVE_AUX_BUFFER, passes); | |
+ if (!get_boolean(crp, "pass_debug_sample_count")) { | |
+ Pass::add(PASS_SAMPLE_COUNT, passes); | |
+ } | |
+ } | |
+ | |
return passes; | |
} | |
@@ -889,6 +916,8 @@ | |
params.use_profiling = params.device.has_profiling && !b_engine.is_preview() && background && | |
BlenderSession::print_render_stats; | |
+ params.adaptive_sampling = RNA_boolean_get(&cscene, "use_adaptive_sampling"); | |
+ | |
return params; | |
} | |
diff -Naur a/intern/cycles/blender/blender_sync.cpp.orig b/intern/cycles/blender/blender_sync.cpp.orig | |
--- a/intern/cycles/blender/blender_sync.cpp.orig 1970-01-01 03:00:00.000000000 +0300 | |
+++ b/intern/cycles/blender/blender_sync.cpp.orig 2020-01-10 20:37:06.000000000 +0300 | |
@@ -0,0 +1,895 @@ | |
+/* | |
+ * Copyright 2011-2013 Blender Foundation | |
+ * | |
+ * Licensed under the Apache License, Version 2.0 (the "License"); | |
+ * you may not use this file except in compliance with the License. | |
+ * You may obtain a copy of the License at | |
+ * | |
+ * http://www.apache.org/licenses/LICENSE-2.0 | |
+ * | |
+ * Unless required by applicable law or agreed to in writing, software | |
+ * distributed under the License is distributed on an "AS IS" BASIS, | |
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
+ * See the License for the specific language governing permissions and | |
+ * limitations under the License. | |
+ */ | |
+ | |
+#include "render/background.h" | |
+#include "render/camera.h" | |
+#include "render/film.h" | |
+#include "render/graph.h" | |
+#include "render/integrator.h" | |
+#include "render/light.h" | |
+#include "render/mesh.h" | |
+#include "render/nodes.h" | |
+#include "render/object.h" | |
+#include "render/scene.h" | |
+#include "render/shader.h" | |
+#include "render/curves.h" | |
+ | |
+#include "device/device.h" | |
+ | |
+#include "blender/blender_device.h" | |
+#include "blender/blender_sync.h" | |
+#include "blender/blender_session.h" | |
+#include "blender/blender_util.h" | |
+ | |
+#include "util/util_debug.h" | |
+#include "util/util_foreach.h" | |
+#include "util/util_opengl.h" | |
+#include "util/util_hash.h" | |
+ | |
+CCL_NAMESPACE_BEGIN | |
+ | |
+static const char *cryptomatte_prefix = "Crypto"; | |
+ | |
+/* Constructor */ | |
+ | |
+BlenderSync::BlenderSync(BL::RenderEngine &b_engine, | |
+ BL::BlendData &b_data, | |
+ BL::Scene &b_scene, | |
+ Scene *scene, | |
+ bool preview, | |
+ Progress &progress) | |
+ : b_engine(b_engine), | |
+ b_data(b_data), | |
+ b_scene(b_scene), | |
+ shader_map(&scene->shaders), | |
+ object_map(&scene->objects), | |
+ mesh_map(&scene->meshes), | |
+ light_map(&scene->lights), | |
+ particle_system_map(&scene->particle_systems), | |
+ world_map(NULL), | |
+ world_recalc(false), | |
+ scene(scene), | |
+ preview(preview), | |
+ experimental(false), | |
+ dicing_rate(1.0f), | |
+ max_subdivisions(12), | |
+ progress(progress) | |
+{ | |
+ PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles"); | |
+ dicing_rate = preview ? RNA_float_get(&cscene, "preview_dicing_rate") : | |
+ RNA_float_get(&cscene, "dicing_rate"); | |
+ max_subdivisions = RNA_int_get(&cscene, "max_subdivisions"); | |
+} | |
+ | |
+BlenderSync::~BlenderSync() | |
+{ | |
+} | |
+ | |
+/* Sync */ | |
+ | |
+void BlenderSync::sync_recalc(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d) | |
+{ | |
+ /* Sync recalc flags from blender to cycles. Actual update is done separate, | |
+ * so we can do it later on if doing it immediate is not suitable. */ | |
+ | |
+ bool has_updated_objects = b_depsgraph.id_type_updated(BL::DriverTarget::id_type_OBJECT); | |
+ | |
+ if (experimental) { | |
+ /* Mark all meshes as needing to be exported again if dicing changed. */ | |
+ PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles"); | |
+ bool dicing_prop_changed = false; | |
+ | |
+ float updated_dicing_rate = preview ? RNA_float_get(&cscene, "preview_dicing_rate") : | |
+ RNA_float_get(&cscene, "dicing_rate"); | |
+ | |
+ if (dicing_rate != updated_dicing_rate) { | |
+ dicing_rate = updated_dicing_rate; | |
+ dicing_prop_changed = true; | |
+ } | |
+ | |
+ int updated_max_subdivisions = RNA_int_get(&cscene, "max_subdivisions"); | |
+ | |
+ if (max_subdivisions != updated_max_subdivisions) { | |
+ max_subdivisions = updated_max_subdivisions; | |
+ dicing_prop_changed = true; | |
+ } | |
+ | |
+ if (dicing_prop_changed) { | |
+ for (const pair<void *, Mesh *> &iter : mesh_map.key_to_scene_data()) { | |
+ Mesh *mesh = iter.second; | |
+ if (mesh->subdivision_type != Mesh::SUBDIVISION_NONE) { | |
+ mesh_map.set_recalc(iter.first); | |
+ } | |
+ } | |
+ } | |
+ } | |
+ | |
+ /* Iterate over all IDs in this depsgraph. */ | |
+ BL::Depsgraph::updates_iterator b_update; | |
+ for (b_depsgraph.updates.begin(b_update); b_update != b_depsgraph.updates.end(); ++b_update) { | |
+ BL::ID b_id(b_update->id()); | |
+ | |
+ /* Material */ | |
+ if (b_id.is_a(&RNA_Material)) { | |
+ BL::Material b_mat(b_id); | |
+ shader_map.set_recalc(b_mat); | |
+ } | |
+ /* Light */ | |
+ else if (b_id.is_a(&RNA_Light)) { | |
+ BL::Light b_light(b_id); | |
+ shader_map.set_recalc(b_light); | |
+ } | |
+ /* Object */ | |
+ else if (b_id.is_a(&RNA_Object)) { | |
+ BL::Object b_ob(b_id); | |
+ const bool updated_geometry = b_update->is_updated_geometry(); | |
+ | |
+ if (b_update->is_updated_transform()) { | |
+ object_map.set_recalc(b_ob); | |
+ light_map.set_recalc(b_ob); | |
+ } | |
+ | |
+ if (object_is_mesh(b_ob)) { | |
+ if (updated_geometry || | |
+ (object_subdivision_type(b_ob, preview, experimental) != Mesh::SUBDIVISION_NONE)) { | |
+ BL::ID key = BKE_object_is_modified(b_ob) ? b_ob : b_ob.data(); | |
+ mesh_map.set_recalc(key); | |
+ } | |
+ } | |
+ else if (object_is_light(b_ob)) { | |
+ if (updated_geometry) { | |
+ light_map.set_recalc(b_ob); | |
+ } | |
+ } | |
+ | |
+ if (updated_geometry) { | |
+ BL::Object::particle_systems_iterator b_psys; | |
+ for (b_ob.particle_systems.begin(b_psys); b_psys != b_ob.particle_systems.end(); ++b_psys) | |
+ particle_system_map.set_recalc(b_ob); | |
+ } | |
+ } | |
+ /* Mesh */ | |
+ else if (b_id.is_a(&RNA_Mesh)) { | |
+ BL::Mesh b_mesh(b_id); | |
+ mesh_map.set_recalc(b_mesh); | |
+ } | |
+ /* World */ | |
+ else if (b_id.is_a(&RNA_World)) { | |
+ BL::World b_world(b_id); | |
+ if (world_map == b_world.ptr.data) { | |
+ world_recalc = true; | |
+ } | |
+ } | |
+ } | |
+ | |
+ BlenderViewportParameters new_viewport_parameters(b_v3d); | |
+ if (viewport_parameters.modified(new_viewport_parameters)) { | |
+ world_recalc = true; | |
+ } | |
+ | |
+ /* Updates shader with object dependency if objects changed. */ | |
+ if (has_updated_objects) { | |
+ if (scene->default_background->has_object_dependency) { | |
+ world_recalc = true; | |
+ } | |
+ | |
+ foreach (Shader *shader, scene->shaders) { | |
+ if (shader->has_object_dependency) { | |
+ shader->need_sync_object = true; | |
+ } | |
+ } | |
+ } | |
+} | |
+ | |
+void BlenderSync::sync_data(BL::RenderSettings &b_render, | |
+ BL::Depsgraph &b_depsgraph, | |
+ BL::SpaceView3D &b_v3d, | |
+ BL::Object &b_override, | |
+ int width, | |
+ int height, | |
+ void **python_thread_state) | |
+{ | |
+ BL::ViewLayer b_view_layer = b_depsgraph.view_layer_eval(); | |
+ | |
+ sync_view_layer(b_v3d, b_view_layer); | |
+ sync_integrator(); | |
+ sync_film(b_v3d); | |
+ sync_shaders(b_depsgraph, b_v3d); | |
+ sync_images(); | |
+ sync_curve_settings(); | |
+ | |
+ mesh_synced.clear(); /* use for objects and motion sync */ | |
+ | |
+ if (scene->need_motion() == Scene::MOTION_PASS || scene->need_motion() == Scene::MOTION_NONE || | |
+ scene->camera->motion_position == Camera::MOTION_POSITION_CENTER) { | |
+ sync_objects(b_depsgraph, b_v3d); | |
+ } | |
+ sync_motion(b_render, b_depsgraph, b_v3d, b_override, width, height, python_thread_state); | |
+ | |
+ mesh_synced.clear(); | |
+ | |
+ /* Shader sync done at the end, since object sync uses it. | |
+ * false = don't delete unused shaders, not supported. */ | |
+ shader_map.post_sync(false); | |
+ | |
+ free_data_after_sync(b_depsgraph); | |
+} | |
+ | |
+/* Integrator */ | |
+ | |
+void BlenderSync::sync_integrator() | |
+{ | |
+ BL::RenderSettings r = b_scene.render(); | |
+ PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles"); | |
+ | |
+ experimental = (get_enum(cscene, "feature_set") != 0); | |
+ | |
+ Integrator *integrator = scene->integrator; | |
+ Integrator previntegrator = *integrator; | |
+ | |
+ integrator->min_bounce = get_int(cscene, "min_light_bounces"); | |
+ integrator->max_bounce = get_int(cscene, "max_bounces"); | |
+ | |
+ integrator->max_diffuse_bounce = get_int(cscene, "diffuse_bounces"); | |
+ integrator->max_glossy_bounce = get_int(cscene, "glossy_bounces"); | |
+ integrator->max_transmission_bounce = get_int(cscene, "transmission_bounces"); | |
+ integrator->max_volume_bounce = get_int(cscene, "volume_bounces"); | |
+ | |
+ integrator->transparent_min_bounce = get_int(cscene, "min_transparent_bounces"); | |
+ integrator->transparent_max_bounce = get_int(cscene, "transparent_max_bounces"); | |
+ | |
+ integrator->volume_max_steps = get_int(cscene, "volume_max_steps"); | |
+ integrator->volume_step_size = get_float(cscene, "volume_step_size"); | |
+ | |
+ integrator->caustics_reflective = get_boolean(cscene, "caustics_reflective"); | |
+ integrator->caustics_refractive = get_boolean(cscene, "caustics_refractive"); | |
+ integrator->filter_glossy = get_float(cscene, "blur_glossy"); | |
+ | |
+ integrator->seed = get_int(cscene, "seed"); | |
+ if (get_boolean(cscene, "use_animated_seed")) { | |
+ integrator->seed = hash_uint2(b_scene.frame_current(), get_int(cscene, "seed")); | |
+ if (b_scene.frame_subframe() != 0.0f) { | |
+ /* TODO(sergey): Ideally should be some sort of hash_merge, | |
+ * but this is good enough for now. | |
+ */ | |
+ integrator->seed += hash_uint2((int)(b_scene.frame_subframe() * (float)INT_MAX), | |
+ get_int(cscene, "seed")); | |
+ } | |
+ } | |
+ | |
+ integrator->sampling_pattern = (SamplingPattern)get_enum( | |
+ cscene, "sampling_pattern", SAMPLING_NUM_PATTERNS, SAMPLING_PATTERN_SOBOL); | |
+ | |
+ integrator->sample_clamp_direct = get_float(cscene, "sample_clamp_direct"); | |
+ integrator->sample_clamp_indirect = get_float(cscene, "sample_clamp_indirect"); | |
+ if (!preview) { | |
+ if (integrator->motion_blur != r.use_motion_blur()) { | |
+ scene->object_manager->tag_update(scene); | |
+ scene->camera->tag_update(); | |
+ } | |
+ | |
+ integrator->motion_blur = r.use_motion_blur(); | |
+ } | |
+ | |
+ integrator->method = (Integrator::Method)get_enum( | |
+ cscene, "progressive", Integrator::NUM_METHODS, Integrator::PATH); | |
+ | |
+ integrator->sample_all_lights_direct = get_boolean(cscene, "sample_all_lights_direct"); | |
+ integrator->sample_all_lights_indirect = get_boolean(cscene, "sample_all_lights_indirect"); | |
+ integrator->light_sampling_threshold = get_float(cscene, "light_sampling_threshold"); | |
+ | |
+ int diffuse_samples = get_int(cscene, "diffuse_samples"); | |
+ int glossy_samples = get_int(cscene, "glossy_samples"); | |
+ int transmission_samples = get_int(cscene, "transmission_samples"); | |
+ int ao_samples = get_int(cscene, "ao_samples"); | |
+ int mesh_light_samples = get_int(cscene, "mesh_light_samples"); | |
+ int subsurface_samples = get_int(cscene, "subsurface_samples"); | |
+ int volume_samples = get_int(cscene, "volume_samples"); | |
+ | |
+ if (get_boolean(cscene, "use_square_samples")) { | |
+ integrator->diffuse_samples = diffuse_samples * diffuse_samples; | |
+ integrator->glossy_samples = glossy_samples * glossy_samples; | |
+ integrator->transmission_samples = transmission_samples * transmission_samples; | |
+ integrator->ao_samples = ao_samples * ao_samples; | |
+ integrator->mesh_light_samples = mesh_light_samples * mesh_light_samples; | |
+ integrator->subsurface_samples = subsurface_samples * subsurface_samples; | |
+ integrator->volume_samples = volume_samples * volume_samples; | |
+ } | |
+ else { | |
+ integrator->diffuse_samples = diffuse_samples; | |
+ integrator->glossy_samples = glossy_samples; | |
+ integrator->transmission_samples = transmission_samples; | |
+ integrator->ao_samples = ao_samples; | |
+ integrator->mesh_light_samples = mesh_light_samples; | |
+ integrator->subsurface_samples = subsurface_samples; | |
+ integrator->volume_samples = volume_samples; | |
+ } | |
+ | |
+ if (b_scene.render().use_simplify()) { | |
+ if (preview) { | |
+ integrator->ao_bounces = get_int(cscene, "ao_bounces"); | |
+ } | |
+ else { | |
+ integrator->ao_bounces = get_int(cscene, "ao_bounces_render"); | |
+ } | |
+ } | |
+ else { | |
+ integrator->ao_bounces = 0; | |
+ } | |
+ | |
+ if (integrator->modified(previntegrator)) | |
+ integrator->tag_update(scene); | |
+} | |
+ | |
+/* Film */ | |
+ | |
+void BlenderSync::sync_film(BL::SpaceView3D &b_v3d) | |
+{ | |
+ PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles"); | |
+ | |
+ Film *film = scene->film; | |
+ Film prevfilm = *film; | |
+ | |
+ if (b_v3d) { | |
+ film->display_pass = update_viewport_display_passes(b_v3d, film->passes); | |
+ } | |
+ | |
+ film->exposure = get_float(cscene, "film_exposure"); | |
+ film->filter_type = (FilterType)get_enum( | |
+ cscene, "pixel_filter_type", FILTER_NUM_TYPES, FILTER_BLACKMAN_HARRIS); | |
+ film->filter_width = (film->filter_type == FILTER_BOX) ? 1.0f : | |
+ get_float(cscene, "filter_width"); | |
+ | |
+ if (b_scene.world()) { | |
+ BL::WorldMistSettings b_mist = b_scene.world().mist_settings(); | |
+ | |
+ film->mist_start = b_mist.start(); | |
+ film->mist_depth = b_mist.depth(); | |
+ | |
+ switch (b_mist.falloff()) { | |
+ case BL::WorldMistSettings::falloff_QUADRATIC: | |
+ film->mist_falloff = 2.0f; | |
+ break; | |
+ case BL::WorldMistSettings::falloff_LINEAR: | |
+ film->mist_falloff = 1.0f; | |
+ break; | |
+ case BL::WorldMistSettings::falloff_INVERSE_QUADRATIC: | |
+ film->mist_falloff = 0.5f; | |
+ break; | |
+ } | |
+ } | |
+ | |
+ if (film->modified(prevfilm)) { | |
+ film->tag_update(scene); | |
+ film->tag_passes_update(scene, prevfilm.passes, false); | |
+ } | |
+} | |
+ | |
+/* Render Layer */ | |
+ | |
+void BlenderSync::sync_view_layer(BL::SpaceView3D & /*b_v3d*/, BL::ViewLayer &b_view_layer) | |
+{ | |
+ /* render layer */ | |
+ view_layer.name = b_view_layer.name(); | |
+ view_layer.use_background_shader = b_view_layer.use_sky(); | |
+ view_layer.use_background_ao = b_view_layer.use_ao(); | |
+ view_layer.use_surfaces = b_view_layer.use_solid(); | |
+ view_layer.use_hair = b_view_layer.use_strand(); | |
+ | |
+ /* Material override. */ | |
+ view_layer.material_override = b_view_layer.material_override(); | |
+ | |
+ /* Sample override. */ | |
+ PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles"); | |
+ int use_layer_samples = get_enum(cscene, "use_layer_samples"); | |
+ | |
+ view_layer.bound_samples = (use_layer_samples == 1); | |
+ view_layer.samples = 0; | |
+ | |
+ if (use_layer_samples != 2) { | |
+ int samples = b_view_layer.samples(); | |
+ if (get_boolean(cscene, "use_square_samples")) | |
+ view_layer.samples = samples * samples; | |
+ else | |
+ view_layer.samples = samples; | |
+ } | |
+} | |
+ | |
+/* Images */ | |
+void BlenderSync::sync_images() | |
+{ | |
+ /* Sync is a convention for this API, but currently it frees unused buffers. */ | |
+ | |
+ const bool is_interface_locked = b_engine.render() && b_engine.render().use_lock_interface(); | |
+ if (is_interface_locked == false && BlenderSession::headless == false) { | |
+ /* If interface is not locked, it's possible image is needed for | |
+ * the display. | |
+ */ | |
+ return; | |
+ } | |
+ /* Free buffers used by images which are not needed for render. */ | |
+ BL::BlendData::images_iterator b_image; | |
+ for (b_data.images.begin(b_image); b_image != b_data.images.end(); ++b_image) { | |
+ /* TODO(sergey): Consider making it an utility function to check | |
+ * whether image is considered builtin. | |
+ */ | |
+ const bool is_builtin = b_image->packed_file() || | |
+ b_image->source() == BL::Image::source_GENERATED || | |
+ b_image->source() == BL::Image::source_MOVIE || b_engine.is_preview(); | |
+ if (is_builtin == false) { | |
+ b_image->buffers_free(); | |
+ } | |
+ /* TODO(sergey): Free builtin images not used by any shader. */ | |
+ } | |
+} | |
+ | |
+/* Passes */ | |
+PassType BlenderSync::get_pass_type(BL::RenderPass &b_pass) | |
+{ | |
+ string name = b_pass.name(); | |
+#define MAP_PASS(passname, passtype) \ | |
+ if (name == passname) \ | |
+ return passtype; | |
+ /* NOTE: Keep in sync with defined names from DNA_scene_types.h */ | |
+ MAP_PASS("Combined", PASS_COMBINED); | |
+ MAP_PASS("Depth", PASS_DEPTH); | |
+ MAP_PASS("Mist", PASS_MIST); | |
+ MAP_PASS("Normal", PASS_NORMAL); | |
+ MAP_PASS("IndexOB", PASS_OBJECT_ID); | |
+ MAP_PASS("UV", PASS_UV); | |
+ MAP_PASS("Vector", PASS_MOTION); | |
+ MAP_PASS("IndexMA", PASS_MATERIAL_ID); | |
+ | |
+ MAP_PASS("DiffDir", PASS_DIFFUSE_DIRECT); | |
+ MAP_PASS("GlossDir", PASS_GLOSSY_DIRECT); | |
+ MAP_PASS("TransDir", PASS_TRANSMISSION_DIRECT); | |
+ MAP_PASS("SubsurfaceDir", PASS_SUBSURFACE_DIRECT); | |
+ MAP_PASS("VolumeDir", PASS_VOLUME_DIRECT); | |
+ | |
+ MAP_PASS("DiffInd", PASS_DIFFUSE_INDIRECT); | |
+ MAP_PASS("GlossInd", PASS_GLOSSY_INDIRECT); | |
+ MAP_PASS("TransInd", PASS_TRANSMISSION_INDIRECT); | |
+ MAP_PASS("SubsurfaceInd", PASS_SUBSURFACE_INDIRECT); | |
+ MAP_PASS("VolumeInd", PASS_VOLUME_INDIRECT); | |
+ | |
+ MAP_PASS("DiffCol", PASS_DIFFUSE_COLOR); | |
+ MAP_PASS("GlossCol", PASS_GLOSSY_COLOR); | |
+ MAP_PASS("TransCol", PASS_TRANSMISSION_COLOR); | |
+ MAP_PASS("SubsurfaceCol", PASS_SUBSURFACE_COLOR); | |
+ | |
+ MAP_PASS("Emit", PASS_EMISSION); | |
+ MAP_PASS("Env", PASS_BACKGROUND); | |
+ MAP_PASS("AO", PASS_AO); | |
+ MAP_PASS("Shadow", PASS_SHADOW); | |
+ | |
+#ifdef __KERNEL_DEBUG__ | |
+ MAP_PASS("Debug BVH Traversed Nodes", PASS_BVH_TRAVERSED_NODES); | |
+ MAP_PASS("Debug BVH Traversed Instances", PASS_BVH_TRAVERSED_INSTANCES); | |
+ MAP_PASS("Debug BVH Intersections", PASS_BVH_INTERSECTIONS); | |
+ MAP_PASS("Debug Ray Bounces", PASS_RAY_BOUNCES); | |
+#endif | |
+ MAP_PASS("Debug Render Time", PASS_RENDER_TIME); | |
+ if (string_startswith(name, cryptomatte_prefix)) { | |
+ return PASS_CRYPTOMATTE; | |
+ } | |
+#undef MAP_PASS | |
+ | |
+ return PASS_NONE; | |
+} | |
+ | |
+int BlenderSync::get_denoising_pass(BL::RenderPass &b_pass) | |
+{ | |
+ string name = b_pass.name(); | |
+ | |
+ if (name == "Noisy Image") | |
+ return DENOISING_PASS_PREFILTERED_COLOR; | |
+ | |
+ if (name.substr(0, 10) != "Denoising ") { | |
+ return -1; | |
+ } | |
+ name = name.substr(10); | |
+ | |
+#define MAP_PASS(passname, offset) \ | |
+ if (name == passname) \ | |
+ return offset; | |
+ MAP_PASS("Normal", DENOISING_PASS_PREFILTERED_NORMAL); | |
+ MAP_PASS("Albedo", DENOISING_PASS_PREFILTERED_ALBEDO); | |
+ MAP_PASS("Depth", DENOISING_PASS_PREFILTERED_DEPTH); | |
+ MAP_PASS("Shadowing", DENOISING_PASS_PREFILTERED_SHADOWING); | |
+ MAP_PASS("Variance", DENOISING_PASS_PREFILTERED_VARIANCE); | |
+ MAP_PASS("Intensity", DENOISING_PASS_PREFILTERED_INTENSITY); | |
+ MAP_PASS("Clean", DENOISING_PASS_CLEAN); | |
+#undef MAP_PASS | |
+ | |
+ return -1; | |
+} | |
+ | |
+vector<Pass> BlenderSync::sync_render_passes(BL::RenderLayer &b_rlay, BL::ViewLayer &b_view_layer) | |
+{ | |
+ vector<Pass> passes; | |
+ | |
+ /* loop over passes */ | |
+ BL::RenderLayer::passes_iterator b_pass_iter; | |
+ | |
+ for (b_rlay.passes.begin(b_pass_iter); b_pass_iter != b_rlay.passes.end(); ++b_pass_iter) { | |
+ BL::RenderPass b_pass(*b_pass_iter); | |
+ PassType pass_type = get_pass_type(b_pass); | |
+ | |
+ if (pass_type == PASS_MOTION && scene->integrator->motion_blur) | |
+ continue; | |
+ if (pass_type != PASS_NONE) | |
+ Pass::add(pass_type, passes, b_pass.name().c_str()); | |
+ } | |
+ | |
+ PointerRNA crp = RNA_pointer_get(&b_view_layer.ptr, "cycles"); | |
+ bool use_denoising = get_boolean(crp, "use_denoising"); | |
+ bool use_optix_denoising = get_boolean(crp, "use_optix_denoising"); | |
+ bool write_denoising_passes = get_boolean(crp, "denoising_store_passes"); | |
+ | |
+ scene->film->denoising_flags = 0; | |
+ if (use_denoising || write_denoising_passes) { | |
+ if (!use_optix_denoising) { | |
+#define MAP_OPTION(name, flag) \ | |
+ if (!get_boolean(crp, name)) \ | |
+ scene->film->denoising_flags |= flag; | |
+ MAP_OPTION("denoising_diffuse_direct", DENOISING_CLEAN_DIFFUSE_DIR); | |
+ MAP_OPTION("denoising_diffuse_indirect", DENOISING_CLEAN_DIFFUSE_IND); | |
+ MAP_OPTION("denoising_glossy_direct", DENOISING_CLEAN_GLOSSY_DIR); | |
+ MAP_OPTION("denoising_glossy_indirect", DENOISING_CLEAN_GLOSSY_IND); | |
+ MAP_OPTION("denoising_transmission_direct", DENOISING_CLEAN_TRANSMISSION_DIR); | |
+ MAP_OPTION("denoising_transmission_indirect", DENOISING_CLEAN_TRANSMISSION_IND); | |
+ MAP_OPTION("denoising_subsurface_direct", DENOISING_CLEAN_SUBSURFACE_DIR); | |
+ MAP_OPTION("denoising_subsurface_indirect", DENOISING_CLEAN_SUBSURFACE_IND); | |
+#undef MAP_OPTION | |
+ } | |
+ b_engine.add_pass("Noisy Image", 4, "RGBA", b_view_layer.name().c_str()); | |
+ } | |
+ | |
+ if (write_denoising_passes) { | |
+ b_engine.add_pass("Denoising Normal", 3, "XYZ", b_view_layer.name().c_str()); | |
+ b_engine.add_pass("Denoising Albedo", 3, "RGB", b_view_layer.name().c_str()); | |
+ b_engine.add_pass("Denoising Depth", 1, "Z", b_view_layer.name().c_str()); | |
+ if (!use_optix_denoising) { | |
+ b_engine.add_pass("Denoising Shadowing", 1, "X", b_view_layer.name().c_str()); | |
+ b_engine.add_pass("Denoising Variance", 3, "RGB", b_view_layer.name().c_str()); | |
+ b_engine.add_pass("Denoising Intensity", 1, "X", b_view_layer.name().c_str()); | |
+ } | |
+ | |
+ if (scene->film->denoising_flags & DENOISING_CLEAN_ALL_PASSES) { | |
+ b_engine.add_pass("Denoising Clean", 3, "RGB", b_view_layer.name().c_str()); | |
+ } | |
+ } | |
+ | |
+#ifdef __KERNEL_DEBUG__ | |
+ if (get_boolean(crp, "pass_debug_bvh_traversed_nodes")) { | |
+ b_engine.add_pass("Debug BVH Traversed Nodes", 1, "X", b_view_layer.name().c_str()); | |
+ Pass::add(PASS_BVH_TRAVERSED_NODES, passes, "Debug BVH Traversed Nodes"); | |
+ } | |
+ if (get_boolean(crp, "pass_debug_bvh_traversed_instances")) { | |
+ b_engine.add_pass("Debug BVH Traversed Instances", 1, "X", b_view_layer.name().c_str()); | |
+ Pass::add(PASS_BVH_TRAVERSED_INSTANCES, passes, "Debug BVH Traversed Instances"); | |
+ } | |
+ if (get_boolean(crp, "pass_debug_bvh_intersections")) { | |
+ b_engine.add_pass("Debug BVH Intersections", 1, "X", b_view_layer.name().c_str()); | |
+ Pass::add(PASS_BVH_INTERSECTIONS, passes, "Debug BVH Intersections"); | |
+ } | |
+ if (get_boolean(crp, "pass_debug_ray_bounces")) { | |
+ b_engine.add_pass("Debug Ray Bounces", 1, "X", b_view_layer.name().c_str()); | |
+ Pass::add(PASS_RAY_BOUNCES, passes, "Debug Ray Bounces"); | |
+ } | |
+#endif | |
+ if (get_boolean(crp, "pass_debug_render_time")) { | |
+ b_engine.add_pass("Debug Render Time", 1, "X", b_view_layer.name().c_str()); | |
+ Pass::add(PASS_RENDER_TIME, passes, "Debug Render Time"); | |
+ } | |
+ if (get_boolean(crp, "use_pass_volume_direct")) { | |
+ b_engine.add_pass("VolumeDir", 3, "RGB", b_view_layer.name().c_str()); | |
+ Pass::add(PASS_VOLUME_DIRECT, passes, "VolumeDir"); | |
+ } | |
+ if (get_boolean(crp, "use_pass_volume_indirect")) { | |
+ b_engine.add_pass("VolumeInd", 3, "RGB", b_view_layer.name().c_str()); | |
+ Pass::add(PASS_VOLUME_INDIRECT, passes, "VolumeInd"); | |
+ } | |
+ | |
+ /* Cryptomatte stores two ID/weight pairs per RGBA layer. | |
+ * User facing parameter is the number of pairs. */ | |
+ int crypto_depth = min(16, get_int(crp, "pass_crypto_depth")) / 2; | |
+ scene->film->cryptomatte_depth = crypto_depth; | |
+ scene->film->cryptomatte_passes = CRYPT_NONE; | |
+ if (get_boolean(crp, "use_pass_crypto_object")) { | |
+ for (int i = 0; i < crypto_depth; ++i) { | |
+ string passname = cryptomatte_prefix + string_printf("Object%02d", i); | |
+ b_engine.add_pass(passname.c_str(), 4, "RGBA", b_view_layer.name().c_str()); | |
+ Pass::add(PASS_CRYPTOMATTE, passes, passname.c_str()); | |
+ } | |
+ scene->film->cryptomatte_passes = (CryptomatteType)(scene->film->cryptomatte_passes | | |
+ CRYPT_OBJECT); | |
+ } | |
+ if (get_boolean(crp, "use_pass_crypto_material")) { | |
+ for (int i = 0; i < crypto_depth; ++i) { | |
+ string passname = cryptomatte_prefix + string_printf("Material%02d", i); | |
+ b_engine.add_pass(passname.c_str(), 4, "RGBA", b_view_layer.name().c_str()); | |
+ Pass::add(PASS_CRYPTOMATTE, passes, passname.c_str()); | |
+ } | |
+ scene->film->cryptomatte_passes = (CryptomatteType)(scene->film->cryptomatte_passes | | |
+ CRYPT_MATERIAL); | |
+ } | |
+ if (get_boolean(crp, "use_pass_crypto_asset")) { | |
+ for (int i = 0; i < crypto_depth; ++i) { | |
+ string passname = cryptomatte_prefix + string_printf("Asset%02d", i); | |
+ b_engine.add_pass(passname.c_str(), 4, "RGBA", b_view_layer.name().c_str()); | |
+ Pass::add(PASS_CRYPTOMATTE, passes, passname.c_str()); | |
+ } | |
+ scene->film->cryptomatte_passes = (CryptomatteType)(scene->film->cryptomatte_passes | | |
+ CRYPT_ASSET); | |
+ } | |
+ if (get_boolean(crp, "pass_crypto_accurate") && scene->film->cryptomatte_passes != CRYPT_NONE) { | |
+ scene->film->cryptomatte_passes = (CryptomatteType)(scene->film->cryptomatte_passes | | |
+ CRYPT_ACCURATE); | |
+ } | |
+ | |
+ RNA_BEGIN (&crp, b_aov, "aovs") { | |
+ bool is_color = (get_enum(b_aov, "type") == 1); | |
+ string name = get_string(b_aov, "name"); | |
+ | |
+ if (is_color) { | |
+ b_engine.add_pass(name.c_str(), 4, "RGBA", b_view_layer.name().c_str()); | |
+ Pass::add(PASS_AOV_COLOR, passes, name.c_str()); | |
+ } | |
+ else { | |
+ b_engine.add_pass(name.c_str(), 1, "X", b_view_layer.name().c_str()); | |
+ Pass::add(PASS_AOV_VALUE, passes, name.c_str()); | |
+ } | |
+ } | |
+ RNA_END; | |
+ | |
+ return passes; | |
+} | |
+ | |
+void BlenderSync::free_data_after_sync(BL::Depsgraph &b_depsgraph) | |
+{ | |
+ /* When viewport display is not needed during render we can force some | |
+ * caches to be releases from blender side in order to reduce peak memory | |
+ * footprint during synchronization process. | |
+ */ | |
+ const bool is_interface_locked = b_engine.render() && b_engine.render().use_lock_interface(); | |
+ const bool can_free_caches = BlenderSession::headless || is_interface_locked; | |
+ if (!can_free_caches) { | |
+ return; | |
+ } | |
+ /* TODO(sergey): We can actually remove the whole dependency graph, | |
+ * but that will need some API support first. | |
+ */ | |
+ BL::Depsgraph::objects_iterator b_ob; | |
+ for (b_depsgraph.objects.begin(b_ob); b_ob != b_depsgraph.objects.end(); ++b_ob) { | |
+ b_ob->cache_release(); | |
+ } | |
+} | |
+ | |
+/* Scene Parameters */ | |
+ | |
+SceneParams BlenderSync::get_scene_params(BL::Scene &b_scene, bool background) | |
+{ | |
+ BL::RenderSettings r = b_scene.render(); | |
+ SceneParams params; | |
+ PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles"); | |
+ const bool shadingsystem = RNA_boolean_get(&cscene, "shading_system"); | |
+ | |
+ if (shadingsystem == 0) | |
+ params.shadingsystem = SHADINGSYSTEM_SVM; | |
+ else if (shadingsystem == 1) | |
+ params.shadingsystem = SHADINGSYSTEM_OSL; | |
+ | |
+ if (background || DebugFlags().viewport_static_bvh) | |
+ params.bvh_type = SceneParams::BVH_STATIC; | |
+ else | |
+ params.bvh_type = SceneParams::BVH_DYNAMIC; | |
+ | |
+ params.use_bvh_spatial_split = RNA_boolean_get(&cscene, "debug_use_spatial_splits"); | |
+ params.use_bvh_unaligned_nodes = RNA_boolean_get(&cscene, "debug_use_hair_bvh"); | |
+ params.num_bvh_time_steps = RNA_int_get(&cscene, "debug_bvh_time_steps"); | |
+ | |
+ if (background && params.shadingsystem != SHADINGSYSTEM_OSL) | |
+ params.persistent_data = r.use_persistent_data(); | |
+ else | |
+ params.persistent_data = false; | |
+ | |
+ int texture_limit; | |
+ if (background) { | |
+ texture_limit = RNA_enum_get(&cscene, "texture_limit_render"); | |
+ } | |
+ else { | |
+ texture_limit = RNA_enum_get(&cscene, "texture_limit"); | |
+ } | |
+ if (texture_limit > 0 && b_scene.render().use_simplify()) { | |
+ params.texture_limit = 1 << (texture_limit + 6); | |
+ } | |
+ else { | |
+ params.texture_limit = 0; | |
+ } | |
+ | |
+ /* TODO(sergey): Once OSL supports per-microarchitecture optimization get | |
+ * rid of this. | |
+ */ | |
+ if (params.shadingsystem == SHADINGSYSTEM_OSL) { | |
+ params.bvh_layout = BVH_LAYOUT_BVH4; | |
+ } | |
+ else { | |
+ params.bvh_layout = DebugFlags().cpu.bvh_layout; | |
+ } | |
+ | |
+#ifdef WITH_EMBREE | |
+ params.bvh_layout = RNA_boolean_get(&cscene, "use_bvh_embree") ? BVH_LAYOUT_EMBREE : | |
+ params.bvh_layout; | |
+#endif | |
+ | |
+ params.background = background; | |
+ | |
+ return params; | |
+} | |
+ | |
+/* Session Parameters */ | |
+ | |
+bool BlenderSync::get_session_pause(BL::Scene &b_scene, bool background) | |
+{ | |
+ PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles"); | |
+ return (background) ? false : get_boolean(cscene, "preview_pause"); | |
+} | |
+ | |
+SessionParams BlenderSync::get_session_params(BL::RenderEngine &b_engine, | |
+ BL::Preferences &b_preferences, | |
+ BL::Scene &b_scene, | |
+ bool background) | |
+{ | |
+ SessionParams params; | |
+ PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles"); | |
+ | |
+ /* feature set */ | |
+ params.experimental = (get_enum(cscene, "feature_set") != 0); | |
+ | |
+ /* Background */ | |
+ params.background = background; | |
+ | |
+ /* Device */ | |
+ params.threads = blender_device_threads(b_scene); | |
+ params.device = blender_device_info(b_preferences, b_scene, params.background); | |
+ | |
+ /* samples */ | |
+ int samples = get_int(cscene, "samples"); | |
+ int aa_samples = get_int(cscene, "aa_samples"); | |
+ int preview_samples = get_int(cscene, "preview_samples"); | |
+ int preview_aa_samples = get_int(cscene, "preview_aa_samples"); | |
+ | |
+ if (get_boolean(cscene, "use_square_samples")) { | |
+ aa_samples = aa_samples * aa_samples; | |
+ preview_aa_samples = preview_aa_samples * preview_aa_samples; | |
+ | |
+ samples = samples * samples; | |
+ preview_samples = preview_samples * preview_samples; | |
+ } | |
+ | |
+ if (get_enum(cscene, "progressive") == 0 && (params.device.type != DEVICE_OPTIX)) { | |
+ if (background) { | |
+ params.samples = aa_samples; | |
+ } | |
+ else { | |
+ params.samples = preview_aa_samples; | |
+ if (params.samples == 0) | |
+ params.samples = INT_MAX; | |
+ } | |
+ } | |
+ else { | |
+ if (background) { | |
+ params.samples = samples; | |
+ } | |
+ else { | |
+ params.samples = preview_samples; | |
+ if (params.samples == 0) | |
+ params.samples = INT_MAX; | |
+ } | |
+ } | |
+ | |
+ /* Clamp samples. */ | |
+ params.samples = min(params.samples, Integrator::MAX_SAMPLES); | |
+ | |
+ /* tiles */ | |
+ const bool is_cpu = (params.device.type == DEVICE_CPU); | |
+ if (!is_cpu && !background) { | |
+ /* currently GPU could be much slower than CPU when using tiles, | |
+ * still need to be investigated, but meanwhile make it possible | |
+ * to work in viewport smoothly | |
+ */ | |
+ int debug_tile_size = get_int(cscene, "debug_tile_size"); | |
+ | |
+ params.tile_size = make_int2(debug_tile_size, debug_tile_size); | |
+ } | |
+ else { | |
+ int tile_x = b_engine.tile_x(); | |
+ int tile_y = b_engine.tile_y(); | |
+ | |
+ params.tile_size = make_int2(tile_x, tile_y); | |
+ } | |
+ | |
+ if ((BlenderSession::headless == false) && background) { | |
+ params.tile_order = (TileOrder)get_enum(cscene, "tile_order"); | |
+ } | |
+ else { | |
+ params.tile_order = TILE_BOTTOM_TO_TOP; | |
+ } | |
+ | |
+ /* other parameters */ | |
+ params.start_resolution = get_int(cscene, "preview_start_resolution"); | |
+ params.pixel_size = b_engine.get_preview_pixel_size(b_scene); | |
+ | |
+ /* other parameters */ | |
+ params.cancel_timeout = (double)get_float(cscene, "debug_cancel_timeout"); | |
+ params.reset_timeout = (double)get_float(cscene, "debug_reset_timeout"); | |
+ params.text_timeout = (double)get_float(cscene, "debug_text_timeout"); | |
+ | |
+ /* progressive refine */ | |
+ BL::RenderSettings b_r = b_scene.render(); | |
+ params.progressive_refine = (b_engine.is_preview() || | |
+ get_boolean(cscene, "use_progressive_refine")) && | |
+ !b_r.use_save_buffers(); | |
+ | |
+ if (params.progressive_refine) { | |
+ BL::Scene::view_layers_iterator b_view_layer; | |
+ for (b_scene.view_layers.begin(b_view_layer); b_view_layer != b_scene.view_layers.end(); | |
+ ++b_view_layer) { | |
+ PointerRNA crl = RNA_pointer_get(&b_view_layer->ptr, "cycles"); | |
+ if (get_boolean(crl, "use_denoising")) { | |
+ params.progressive_refine = false; | |
+ } | |
+ } | |
+ } | |
+ | |
+ if (background) { | |
+ if (params.progressive_refine) | |
+ params.progressive = true; | |
+ else | |
+ params.progressive = false; | |
+ | |
+ params.start_resolution = INT_MAX; | |
+ params.pixel_size = 1; | |
+ } | |
+ else | |
+ params.progressive = true; | |
+ | |
+ /* shading system - scene level needs full refresh */ | |
+ const bool shadingsystem = RNA_boolean_get(&cscene, "shading_system"); | |
+ | |
+ if (shadingsystem == 0) | |
+ params.shadingsystem = SHADINGSYSTEM_SVM; | |
+ else if (shadingsystem == 1) | |
+ params.shadingsystem = SHADINGSYSTEM_OSL; | |
+ | |
+ /* color managagement */ | |
+ params.display_buffer_linear = b_engine.support_display_space_shader(b_scene); | |
+ | |
+ if (b_engine.is_preview()) { | |
+ /* For preview rendering we're using same timeout as | |
+ * blender's job update. | |
+ */ | |
+ params.progressive_update_timeout = 0.1; | |
+ } | |
+ | |
+ params.use_profiling = params.device.has_profiling && !b_engine.is_preview() && background && | |
+ BlenderSession::print_render_stats; | |
+ | |
+ return params; | |
+} | |
+ | |
+CCL_NAMESPACE_END | |
diff -Naur a/intern/cycles/blender/blender_sync.h b/intern/cycles/blender/blender_sync.h | |
--- a/intern/cycles/blender/blender_sync.h 2020-01-10 20:37:06.000000000 +0300 | |
+++ b/intern/cycles/blender/blender_sync.h 2020-01-10 20:42:43.457590054 +0300 | |
@@ -70,7 +70,9 @@ | |
int height, | |
void **python_thread_state); | |
void sync_view_layer(BL::SpaceView3D &b_v3d, BL::ViewLayer &b_view_layer); | |
- vector<Pass> sync_render_passes(BL::RenderLayer &b_render_layer, BL::ViewLayer &b_view_layer); | |
+ vector<Pass> sync_render_passes(BL::RenderLayer &b_render_layer, | |
+ BL::ViewLayer &b_view_layer, | |
+ bool adaptive_sampling); | |
void sync_integrator(); | |
void sync_camera(BL::RenderSettings &b_render, | |
BL::Object &b_override, | |
diff -Naur a/intern/cycles/blender/blender_sync.h.orig b/intern/cycles/blender/blender_sync.h.orig | |
--- a/intern/cycles/blender/blender_sync.h.orig 1970-01-01 03:00:00.000000000 +0300 | |
+++ b/intern/cycles/blender/blender_sync.h.orig 2020-01-10 20:37:06.000000000 +0300 | |
@@ -0,0 +1,226 @@ | |
+/* | |
+ * Copyright 2011-2013 Blender Foundation | |
+ * | |
+ * Licensed under the Apache License, Version 2.0 (the "License"); | |
+ * you may not use this file except in compliance with the License. | |
+ * You may obtain a copy of the License at | |
+ * | |
+ * http://www.apache.org/licenses/LICENSE-2.0 | |
+ * | |
+ * Unless required by applicable law or agreed to in writing, software | |
+ * distributed under the License is distributed on an "AS IS" BASIS, | |
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
+ * See the License for the specific language governing permissions and | |
+ * limitations under the License. | |
+ */ | |
+ | |
+#ifndef __BLENDER_SYNC_H__ | |
+#define __BLENDER_SYNC_H__ | |
+ | |
+#include "MEM_guardedalloc.h" | |
+#include "RNA_types.h" | |
+#include "RNA_access.h" | |
+#include "RNA_blender_cpp.h" | |
+ | |
+#include "blender/blender_util.h" | |
+#include "blender/blender_viewport.h" | |
+ | |
+#include "render/scene.h" | |
+#include "render/session.h" | |
+ | |
+#include "util/util_map.h" | |
+#include "util/util_set.h" | |
+#include "util/util_transform.h" | |
+#include "util/util_vector.h" | |
+ | |
+CCL_NAMESPACE_BEGIN | |
+ | |
+class Background; | |
+class BlenderObjectCulling; | |
+class BlenderViewportParameters; | |
+class Camera; | |
+class Film; | |
+class Light; | |
+class Mesh; | |
+class Object; | |
+class ParticleSystem; | |
+class Scene; | |
+class ViewLayer; | |
+class Shader; | |
+class ShaderGraph; | |
+class ShaderNode; | |
+ | |
+class BlenderSync { | |
+ public: | |
+ BlenderSync(BL::RenderEngine &b_engine, | |
+ BL::BlendData &b_data, | |
+ BL::Scene &b_scene, | |
+ Scene *scene, | |
+ bool preview, | |
+ Progress &progress); | |
+ ~BlenderSync(); | |
+ | |
+ /* sync */ | |
+ void sync_recalc(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d); | |
+ void sync_data(BL::RenderSettings &b_render, | |
+ BL::Depsgraph &b_depsgraph, | |
+ BL::SpaceView3D &b_v3d, | |
+ BL::Object &b_override, | |
+ int width, | |
+ int height, | |
+ void **python_thread_state); | |
+ void sync_view_layer(BL::SpaceView3D &b_v3d, BL::ViewLayer &b_view_layer); | |
+ vector<Pass> sync_render_passes(BL::RenderLayer &b_render_layer, BL::ViewLayer &b_view_layer); | |
+ void sync_integrator(); | |
+ void sync_camera(BL::RenderSettings &b_render, | |
+ BL::Object &b_override, | |
+ int width, | |
+ int height, | |
+ const char *viewname); | |
+ void sync_view(BL::SpaceView3D &b_v3d, BL::RegionView3D &b_rv3d, int width, int height); | |
+ inline int get_layer_samples() | |
+ { | |
+ return view_layer.samples; | |
+ } | |
+ inline int get_layer_bound_samples() | |
+ { | |
+ return view_layer.bound_samples; | |
+ } | |
+ | |
+ /* get parameters */ | |
+ static SceneParams get_scene_params(BL::Scene &b_scene, bool background); | |
+ static SessionParams get_session_params(BL::RenderEngine &b_engine, | |
+ BL::Preferences &b_userpref, | |
+ BL::Scene &b_scene, | |
+ bool background); | |
+ static bool get_session_pause(BL::Scene &b_scene, bool background); | |
+ static BufferParams get_buffer_params(BL::RenderSettings &b_render, | |
+ BL::SpaceView3D &b_v3d, | |
+ BL::RegionView3D &b_rv3d, | |
+ Camera *cam, | |
+ int width, | |
+ int height); | |
+ | |
+ static PassType get_pass_type(BL::RenderPass &b_pass); | |
+ static int get_denoising_pass(BL::RenderPass &b_pass); | |
+ | |
+ private: | |
+ /* sync */ | |
+ void sync_lights(BL::Depsgraph &b_depsgraph, bool update_all); | |
+ void sync_materials(BL::Depsgraph &b_depsgraph, bool update_all); | |
+ void sync_objects(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d, float motion_time = 0.0f); | |
+ void sync_motion(BL::RenderSettings &b_render, | |
+ BL::Depsgraph &b_depsgraph, | |
+ BL::SpaceView3D &b_v3d, | |
+ BL::Object &b_override, | |
+ int width, | |
+ int height, | |
+ void **python_thread_state); | |
+ void sync_film(BL::SpaceView3D &b_v3d); | |
+ void sync_view(); | |
+ void sync_world(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d, bool update_all); | |
+ void sync_shaders(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d); | |
+ void sync_curve_settings(); | |
+ | |
+ void sync_nodes(Shader *shader, BL::ShaderNodeTree &b_ntree); | |
+ Mesh *sync_mesh(BL::Depsgraph &b_depsgrpah, | |
+ BL::Object &b_ob, | |
+ BL::Object &b_ob_instance, | |
+ bool object_updated, | |
+ bool show_self, | |
+ bool show_particles); | |
+ void sync_curves( | |
+ Mesh *mesh, BL::Mesh &b_mesh, BL::Object &b_ob, bool motion, int motion_step = 0); | |
+ Object *sync_object(BL::Depsgraph &b_depsgraph, | |
+ BL::ViewLayer &b_view_layer, | |
+ BL::DepsgraphObjectInstance &b_instance, | |
+ float motion_time, | |
+ bool show_self, | |
+ bool show_particles, | |
+ bool show_lights, | |
+ BlenderObjectCulling &culling, | |
+ bool *use_portal); | |
+ void sync_light(BL::Object &b_parent, | |
+ int persistent_id[OBJECT_PERSISTENT_ID_SIZE], | |
+ BL::Object &b_ob, | |
+ BL::Object &b_ob_instance, | |
+ int random_id, | |
+ Transform &tfm, | |
+ bool *use_portal); | |
+ void sync_background_light(BL::SpaceView3D &b_v3d, bool use_portal); | |
+ void sync_mesh_motion(BL::Depsgraph &b_depsgraph, | |
+ BL::Object &b_ob, | |
+ Object *object, | |
+ float motion_time); | |
+ void sync_camera_motion( | |
+ BL::RenderSettings &b_render, BL::Object &b_ob, int width, int height, float motion_time); | |
+ | |
+ /* particles */ | |
+ bool sync_dupli_particle(BL::Object &b_ob, | |
+ BL::DepsgraphObjectInstance &b_instance, | |
+ Object *object); | |
+ | |
+ /* Images. */ | |
+ void sync_images(); | |
+ | |
+ /* Early data free. */ | |
+ void free_data_after_sync(BL::Depsgraph &b_depsgraph); | |
+ | |
+ /* util */ | |
+ void find_shader(BL::ID &id, vector<Shader *> &used_shaders, Shader *default_shader); | |
+ bool BKE_object_is_modified(BL::Object &b_ob); | |
+ bool object_is_mesh(BL::Object &b_ob); | |
+ bool object_is_light(BL::Object &b_ob); | |
+ | |
+ /* variables */ | |
+ BL::RenderEngine b_engine; | |
+ BL::BlendData b_data; | |
+ BL::Scene b_scene; | |
+ | |
+ id_map<void *, Shader> shader_map; | |
+ id_map<ObjectKey, Object> object_map; | |
+ id_map<void *, Mesh> mesh_map; | |
+ id_map<ObjectKey, Light> light_map; | |
+ id_map<ParticleSystemKey, ParticleSystem> particle_system_map; | |
+ set<Mesh *> mesh_synced; | |
+ set<Mesh *> mesh_motion_synced; | |
+ set<float> motion_times; | |
+ void *world_map; | |
+ bool world_recalc; | |
+ BlenderViewportParameters viewport_parameters; | |
+ | |
+ Scene *scene; | |
+ bool preview; | |
+ bool experimental; | |
+ | |
+ float dicing_rate; | |
+ int max_subdivisions; | |
+ | |
+ struct RenderLayerInfo { | |
+ RenderLayerInfo() | |
+ : material_override(PointerRNA_NULL), | |
+ use_background_shader(true), | |
+ use_background_ao(true), | |
+ use_surfaces(true), | |
+ use_hair(true), | |
+ samples(0), | |
+ bound_samples(false) | |
+ { | |
+ } | |
+ | |
+ string name; | |
+ BL::Material material_override; | |
+ bool use_background_shader; | |
+ bool use_background_ao; | |
+ bool use_surfaces; | |
+ bool use_hair; | |
+ int samples; | |
+ bool bound_samples; | |
+ } view_layer; | |
+ | |
+ Progress &progress; | |
+}; | |
+ | |
+CCL_NAMESPACE_END | |
+ | |
+#endif /* __BLENDER_SYNC_H__ */ | |
diff -Naur a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp | |
--- a/intern/cycles/device/device_cpu.cpp 2020-01-10 20:37:06.000000000 +0300 | |
+++ b/intern/cycles/device/device_cpu.cpp 2020-01-10 20:42:43.457590054 +0300 | |
@@ -34,6 +34,7 @@ | |
#include "kernel/kernel_types.h" | |
#include "kernel/split/kernel_split_data.h" | |
#include "kernel/kernel_globals.h" | |
+#include "kernel/kernel_adaptive_sampling.h" | |
#include "kernel/filter/filter.h" | |
@@ -317,6 +318,10 @@ | |
REGISTER_SPLIT_KERNEL(next_iteration_setup); | |
REGISTER_SPLIT_KERNEL(indirect_subsurface); | |
REGISTER_SPLIT_KERNEL(buffer_update); | |
+ REGISTER_SPLIT_KERNEL(adaptive_stopping); | |
+ REGISTER_SPLIT_KERNEL(adaptive_filter_x); | |
+ REGISTER_SPLIT_KERNEL(adaptive_filter_y); | |
+ REGISTER_SPLIT_KERNEL(adaptive_adjust_samples); | |
#undef REGISTER_SPLIT_KERNEL | |
#undef KERNEL_FUNCTIONS | |
} | |
@@ -851,10 +856,33 @@ | |
path_trace_kernel()(kg, render_buffer, sample, x, y, tile.offset, tile.stride); | |
} | |
} | |
- | |
tile.sample = sample + 1; | |
task.update_progress(&tile, tile.w * tile.h); | |
+ | |
+ if (kernel_data.film.pass_adaptive_aux_buffer && (sample & 0x3) == 3 && | |
+ sample >= kernel_data.integrator.adaptive_min_samples - 1) { | |
+ WorkTile wtile; | |
+ wtile.x = tile.x; | |
+ wtile.y = tile.y; | |
+ wtile.w = tile.w; | |
+ wtile.h = tile.h; | |
+ wtile.offset = tile.offset; | |
+ wtile.stride = tile.stride; | |
+ wtile.buffer = (float *)tile.buffer; | |
+ | |
+ bool any = false; | |
+ for (int y = tile.y; y < tile.y + tile.h; ++y) { | |
+ any |= kernel_do_adaptive_filter_x(kg, y, &wtile); | |
+ } | |
+ for (int x = tile.x; x < tile.x + tile.w; ++x) { | |
+ any |= kernel_do_adaptive_filter_y(kg, x, &wtile); | |
+ } | |
+ if (!any) { | |
+ tile.sample = end_sample; | |
+ break; | |
+ } | |
+ } | |
} | |
if (use_coverage) { | |
coverage.finalize(); | |
@@ -931,6 +959,28 @@ | |
} | |
else { | |
path_trace(task, tile, kg); | |
+ if (task.integrator_adaptive && kernel_data.film.pass_adaptive_aux_buffer) { | |
+ float *render_buffer = (float *)tile.buffer; | |
+ for (int y = tile.y; y < tile.y + tile.h; y++) { | |
+ for (int x = tile.x; x < tile.x + tile.w; x++) { | |
+ int index = tile.offset + x + y * tile.stride; | |
+ ccl_global float *buffer = render_buffer + index * kernel_data.film.pass_stride; | |
+ if (buffer[kernel_data.film.pass_sample_count] < 0.0f) { | |
+ buffer[kernel_data.film.pass_sample_count] = | |
+ -buffer[kernel_data.film.pass_sample_count]; | |
+ float sample_multiplier = tile.sample / | |
+ max((float)tile.start_sample + 1.0f, | |
+ buffer[kernel_data.film.pass_sample_count]); | |
+ if (sample_multiplier != 1.0f) { | |
+ kernel_adaptive_post_adjust(kg, buffer, sample_multiplier); | |
+ } | |
+ } | |
+ else { | |
+ kernel_adaptive_post_adjust(kg, buffer, tile.sample / (tile.sample - 1.0f)); | |
+ } | |
+ } | |
+ } | |
+ } | |
} | |
} | |
else if (tile.task == RenderTile::DENOISE) { | |
diff -Naur a/intern/cycles/device/device_cpu.cpp.orig b/intern/cycles/device/device_cpu.cpp.orig | |
--- a/intern/cycles/device/device_cpu.cpp.orig 1970-01-01 03:00:00.000000000 +0300 | |
+++ b/intern/cycles/device/device_cpu.cpp.orig 2020-01-10 20:37:06.000000000 +0300 | |
@@ -0,0 +1,1247 @@ | |
+/* | |
+ * Copyright 2011-2013 Blender Foundation | |
+ * | |
+ * Licensed under the Apache License, Version 2.0 (the "License"); | |
+ * you may not use this file except in compliance with the License. | |
+ * You may obtain a copy of the License at | |
+ * | |
+ * http://www.apache.org/licenses/LICENSE-2.0 | |
+ * | |
+ * Unless required by applicable law or agreed to in writing, software | |
+ * distributed under the License is distributed on an "AS IS" BASIS, | |
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
+ * See the License for the specific language governing permissions and | |
+ * limitations under the License. | |
+ */ | |
+ | |
+#include <stdlib.h> | |
+#include <string.h> | |
+ | |
+/* So ImathMath is included before our kernel_cpu_compat. */ | |
+#ifdef WITH_OSL | |
+/* So no context pollution happens from indirectly included windows.h */ | |
+# include "util/util_windows.h" | |
+# include <OSL/oslexec.h> | |
+#endif | |
+ | |
+#include "device/device.h" | |
+#include "device/device_denoising.h" | |
+#include "device/device_intern.h" | |
+#include "device/device_split_kernel.h" | |
+ | |
+#include "kernel/kernel.h" | |
+#include "kernel/kernel_compat_cpu.h" | |
+#include "kernel/kernel_types.h" | |
+#include "kernel/split/kernel_split_data.h" | |
+#include "kernel/kernel_globals.h" | |
+ | |
+#include "kernel/filter/filter.h" | |
+ | |
+#include "kernel/osl/osl_shader.h" | |
+#include "kernel/osl/osl_globals.h" | |
+ | |
+#include "render/buffers.h" | |
+#include "render/coverage.h" | |
+ | |
+#include "util/util_debug.h" | |
+#include "util/util_foreach.h" | |
+#include "util/util_function.h" | |
+#include "util/util_logging.h" | |
+#include "util/util_map.h" | |
+#include "util/util_opengl.h" | |
+#include "util/util_optimization.h" | |
+#include "util/util_progress.h" | |
+#include "util/util_system.h" | |
+#include "util/util_thread.h" | |
+ | |
+CCL_NAMESPACE_BEGIN | |
+ | |
+class CPUDevice; | |
+ | |
+/* Has to be outside of the class to be shared across template instantiations. */ | |
+static const char *logged_architecture = ""; | |
+ | |
+template<typename F> class KernelFunctions { | |
+ public: | |
+ KernelFunctions() | |
+ { | |
+ kernel = (F)NULL; | |
+ } | |
+ | |
+ KernelFunctions( | |
+ F kernel_default, F kernel_sse2, F kernel_sse3, F kernel_sse41, F kernel_avx, F kernel_avx2) | |
+ { | |
+ const char *architecture_name = "default"; | |
+ kernel = kernel_default; | |
+ | |
+ /* Silence potential warnings about unused variables | |
+ * when compiling without some architectures. */ | |
+ (void)kernel_sse2; | |
+ (void)kernel_sse3; | |
+ (void)kernel_sse41; | |
+ (void)kernel_avx; | |
+ (void)kernel_avx2; | |
+#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 | |
+ if (DebugFlags().cpu.has_avx2() && system_cpu_support_avx2()) { | |
+ architecture_name = "AVX2"; | |
+ kernel = kernel_avx2; | |
+ } | |
+ else | |
+#endif | |
+#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX | |
+ if (DebugFlags().cpu.has_avx() && system_cpu_support_avx()) { | |
+ architecture_name = "AVX"; | |
+ kernel = kernel_avx; | |
+ } | |
+ else | |
+#endif | |
+#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 | |
+ if (DebugFlags().cpu.has_sse41() && system_cpu_support_sse41()) { | |
+ architecture_name = "SSE4.1"; | |
+ kernel = kernel_sse41; | |
+ } | |
+ else | |
+#endif | |
+#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 | |
+ if (DebugFlags().cpu.has_sse3() && system_cpu_support_sse3()) { | |
+ architecture_name = "SSE3"; | |
+ kernel = kernel_sse3; | |
+ } | |
+ else | |
+#endif | |
+#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 | |
+ if (DebugFlags().cpu.has_sse2() && system_cpu_support_sse2()) { | |
+ architecture_name = "SSE2"; | |
+ kernel = kernel_sse2; | |
+ } | |
+#else | |
+ { | |
+ /* Dummy to prevent the architecture if below become | |
+ * conditional when WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 | |
+ * is not defined. */ | |
+ } | |
+#endif | |
+ | |
+ if (strcmp(architecture_name, logged_architecture) != 0) { | |
+ VLOG(1) << "Will be using " << architecture_name << " kernels."; | |
+ logged_architecture = architecture_name; | |
+ } | |
+ } | |
+ | |
+ inline F operator()() const | |
+ { | |
+ assert(kernel); | |
+ return kernel; | |
+ } | |
+ | |
+ protected: | |
+ F kernel; | |
+}; | |
+ | |
+class CPUSplitKernel : public DeviceSplitKernel { | |
+ CPUDevice *device; | |
+ | |
+ public: | |
+ explicit CPUSplitKernel(CPUDevice *device); | |
+ | |
+ virtual bool enqueue_split_kernel_data_init(const KernelDimensions &dim, | |
+ RenderTile &rtile, | |
+ int num_global_elements, | |
+ device_memory &kernel_globals, | |
+ device_memory &kernel_data_, | |
+ device_memory &split_data, | |
+ device_memory &ray_state, | |
+ device_memory &queue_index, | |
+ device_memory &use_queues_flag, | |
+ device_memory &work_pool_wgs); | |
+ | |
+ virtual SplitKernelFunction *get_split_kernel_function(const string &kernel_name, | |
+ const DeviceRequestedFeatures &); | |
+ virtual int2 split_kernel_local_size(); | |
+ virtual int2 split_kernel_global_size(device_memory &kg, device_memory &data, DeviceTask *task); | |
+ virtual uint64_t state_buffer_size(device_memory &kg, device_memory &data, size_t num_threads); | |
+}; | |
+ | |
+class CPUDevice : public Device { | |
+ public: | |
+ TaskPool task_pool; | |
+ KernelGlobals kernel_globals; | |
+ | |
+ device_vector<TextureInfo> texture_info; | |
+ bool need_texture_info; | |
+ | |
+#ifdef WITH_OSL | |
+ OSLGlobals osl_globals; | |
+#endif | |
+ | |
+ bool use_split_kernel; | |
+ | |
+ DeviceRequestedFeatures requested_features; | |
+ | |
+ KernelFunctions<void (*)(KernelGlobals *, float *, int, int, int, int, int)> path_trace_kernel; | |
+ KernelFunctions<void (*)(KernelGlobals *, uchar4 *, float *, float, int, int, int, int)> | |
+ convert_to_half_float_kernel; | |
+ KernelFunctions<void (*)(KernelGlobals *, uchar4 *, float *, float, int, int, int, int)> | |
+ convert_to_byte_kernel; | |
+ KernelFunctions<void (*)(KernelGlobals *, uint4 *, float4 *, int, int, int, int, int)> | |
+ shader_kernel; | |
+ | |
+ KernelFunctions<void (*)( | |
+ int, TileInfo *, int, int, float *, float *, float *, float *, float *, int *, int, int)> | |
+ filter_divide_shadow_kernel; | |
+ KernelFunctions<void (*)( | |
+ int, TileInfo *, int, int, int, int, float *, float *, float, int *, int, int)> | |
+ filter_get_feature_kernel; | |
+ KernelFunctions<void (*)(int, int, int, int *, float *, float *, int, int *)> | |
+ filter_write_feature_kernel; | |
+ KernelFunctions<void (*)(int, int, float *, float *, float *, float *, int *, int)> | |
+ filter_detect_outliers_kernel; | |
+ KernelFunctions<void (*)(int, int, float *, float *, float *, float *, int *, int)> | |
+ filter_combine_halves_kernel; | |
+ | |
+ KernelFunctions<void (*)( | |
+ int, int, float *, float *, float *, float *, int *, int, int, int, float, float)> | |
+ filter_nlm_calc_difference_kernel; | |
+ KernelFunctions<void (*)(float *, float *, int *, int, int)> filter_nlm_blur_kernel; | |
+ KernelFunctions<void (*)(float *, float *, int *, int, int)> filter_nlm_calc_weight_kernel; | |
+ KernelFunctions<void (*)( | |
+ int, int, float *, float *, float *, float *, float *, int *, int, int, int)> | |
+ filter_nlm_update_output_kernel; | |
+ KernelFunctions<void (*)(float *, float *, int *, int)> filter_nlm_normalize_kernel; | |
+ | |
+ KernelFunctions<void (*)( | |
+ float *, TileInfo *, int, int, int, float *, int *, int *, int, int, bool, int, float)> | |
+ filter_construct_transform_kernel; | |
+ KernelFunctions<void (*)(int, | |
+ int, | |
+ int, | |
+ float *, | |
+ float *, | |
+ float *, | |
+ int *, | |
+ float *, | |
+ float3 *, | |
+ int *, | |
+ int *, | |
+ int, | |
+ int, | |
+ int, | |
+ int, | |
+ bool)> | |
+ filter_nlm_construct_gramian_kernel; | |
+ KernelFunctions<void (*)(int, int, int, float *, int *, float *, float3 *, int *, int)> | |
+ filter_finalize_kernel; | |
+ | |
+ KernelFunctions<void (*)(KernelGlobals *, | |
+ ccl_constant KernelData *, | |
+ ccl_global void *, | |
+ int, | |
+ ccl_global char *, | |
+ int, | |
+ int, | |
+ int, | |
+ int, | |
+ int, | |
+ int, | |
+ int, | |
+ int, | |
+ ccl_global int *, | |
+ int, | |
+ ccl_global char *, | |
+ ccl_global unsigned int *, | |
+ unsigned int, | |
+ ccl_global float *)> | |
+ data_init_kernel; | |
+ unordered_map<string, KernelFunctions<void (*)(KernelGlobals *, KernelData *)>> split_kernels; | |
+ | |
+#define KERNEL_FUNCTIONS(name) \ | |
+ KERNEL_NAME_EVAL(cpu, name), KERNEL_NAME_EVAL(cpu_sse2, name), \ | |
+ KERNEL_NAME_EVAL(cpu_sse3, name), KERNEL_NAME_EVAL(cpu_sse41, name), \ | |
+ KERNEL_NAME_EVAL(cpu_avx, name), KERNEL_NAME_EVAL(cpu_avx2, name) | |
+ | |
+ CPUDevice(DeviceInfo &info_, Stats &stats_, Profiler &profiler_, bool background_) | |
+ : Device(info_, stats_, profiler_, background_), | |
+ texture_info(this, "__texture_info", MEM_TEXTURE), | |
+#define REGISTER_KERNEL(name) name##_kernel(KERNEL_FUNCTIONS(name)) | |
+ REGISTER_KERNEL(path_trace), | |
+ REGISTER_KERNEL(convert_to_half_float), | |
+ REGISTER_KERNEL(convert_to_byte), | |
+ REGISTER_KERNEL(shader), | |
+ REGISTER_KERNEL(filter_divide_shadow), | |
+ REGISTER_KERNEL(filter_get_feature), | |
+ REGISTER_KERNEL(filter_write_feature), | |
+ REGISTER_KERNEL(filter_detect_outliers), | |
+ REGISTER_KERNEL(filter_combine_halves), | |
+ REGISTER_KERNEL(filter_nlm_calc_difference), | |
+ REGISTER_KERNEL(filter_nlm_blur), | |
+ REGISTER_KERNEL(filter_nlm_calc_weight), | |
+ REGISTER_KERNEL(filter_nlm_update_output), | |
+ REGISTER_KERNEL(filter_nlm_normalize), | |
+ REGISTER_KERNEL(filter_construct_transform), | |
+ REGISTER_KERNEL(filter_nlm_construct_gramian), | |
+ REGISTER_KERNEL(filter_finalize), | |
+ REGISTER_KERNEL(data_init) | |
+#undef REGISTER_KERNEL | |
+ { | |
+ if (info.cpu_threads == 0) { | |
+ info.cpu_threads = TaskScheduler::num_threads(); | |
+ } | |
+ | |
+#ifdef WITH_OSL | |
+ kernel_globals.osl = &osl_globals; | |
+#endif | |
+ use_split_kernel = DebugFlags().cpu.split_kernel; | |
+ if (use_split_kernel) { | |
+ VLOG(1) << "Will be using split kernel."; | |
+ } | |
+ need_texture_info = false; | |
+ | |
+#define REGISTER_SPLIT_KERNEL(name) \ | |
+ split_kernels[#name] = KernelFunctions<void (*)(KernelGlobals *, KernelData *)>( \ | |
+ KERNEL_FUNCTIONS(name)) | |
+ REGISTER_SPLIT_KERNEL(path_init); | |
+ REGISTER_SPLIT_KERNEL(scene_intersect); | |
+ REGISTER_SPLIT_KERNEL(lamp_emission); | |
+ REGISTER_SPLIT_KERNEL(do_volume); | |
+ REGISTER_SPLIT_KERNEL(queue_enqueue); | |
+ REGISTER_SPLIT_KERNEL(indirect_background); | |
+ REGISTER_SPLIT_KERNEL(shader_setup); | |
+ REGISTER_SPLIT_KERNEL(shader_sort); | |
+ REGISTER_SPLIT_KERNEL(shader_eval); | |
+ REGISTER_SPLIT_KERNEL(holdout_emission_blurring_pathtermination_ao); | |
+ REGISTER_SPLIT_KERNEL(subsurface_scatter); | |
+ REGISTER_SPLIT_KERNEL(direct_lighting); | |
+ REGISTER_SPLIT_KERNEL(shadow_blocked_ao); | |
+ REGISTER_SPLIT_KERNEL(shadow_blocked_dl); | |
+ REGISTER_SPLIT_KERNEL(enqueue_inactive); | |
+ REGISTER_SPLIT_KERNEL(next_iteration_setup); | |
+ REGISTER_SPLIT_KERNEL(indirect_subsurface); | |
+ REGISTER_SPLIT_KERNEL(buffer_update); | |
+#undef REGISTER_SPLIT_KERNEL | |
+#undef KERNEL_FUNCTIONS | |
+ } | |
+ | |
+ ~CPUDevice() | |
+ { | |
+ task_pool.stop(); | |
+ texture_info.free(); | |
+ } | |
+ | |
+ virtual bool show_samples() const | |
+ { | |
+ return (info.cpu_threads == 1); | |
+ } | |
+ | |
+ virtual BVHLayoutMask get_bvh_layout_mask() const | |
+ { | |
+ BVHLayoutMask bvh_layout_mask = BVH_LAYOUT_BVH2; | |
+ if (DebugFlags().cpu.has_sse2() && system_cpu_support_sse2()) { | |
+ bvh_layout_mask |= BVH_LAYOUT_BVH4; | |
+ } | |
+#if defined(__x86_64__) || defined(_M_X64) | |
+ if (DebugFlags().cpu.has_avx2() && system_cpu_support_avx2()) { | |
+ bvh_layout_mask |= BVH_LAYOUT_BVH8; | |
+ } | |
+#endif | |
+#ifdef WITH_EMBREE | |
+ bvh_layout_mask |= BVH_LAYOUT_EMBREE; | |
+#endif /* WITH_EMBREE */ | |
+ return bvh_layout_mask; | |
+ } | |
+ | |
+ void load_texture_info() | |
+ { | |
+ if (need_texture_info) { | |
+ texture_info.copy_to_device(); | |
+ need_texture_info = false; | |
+ } | |
+ } | |
+ | |
+ void mem_alloc(device_memory &mem) | |
+ { | |
+ if (mem.type == MEM_TEXTURE) { | |
+ assert(!"mem_alloc not supported for textures."); | |
+ } | |
+ else { | |
+ if (mem.name) { | |
+ VLOG(1) << "Buffer allocate: " << mem.name << ", " | |
+ << string_human_readable_number(mem.memory_size()) << " bytes. (" | |
+ << string_human_readable_size(mem.memory_size()) << ")"; | |
+ } | |
+ | |
+ if (mem.type == MEM_DEVICE_ONLY) { | |
+ assert(!mem.host_pointer); | |
+ size_t alignment = MIN_ALIGNMENT_CPU_DATA_TYPES; | |
+ void *data = util_aligned_malloc(mem.memory_size(), alignment); | |
+ mem.device_pointer = (device_ptr)data; | |
+ } | |
+ else { | |
+ mem.device_pointer = (device_ptr)mem.host_pointer; | |
+ } | |
+ | |
+ mem.device_size = mem.memory_size(); | |
+ stats.mem_alloc(mem.device_size); | |
+ } | |
+ } | |
+ | |
+ void mem_copy_to(device_memory &mem) | |
+ { | |
+ if (mem.type == MEM_TEXTURE) { | |
+ tex_free(mem); | |
+ tex_alloc(mem); | |
+ } | |
+ else if (mem.type == MEM_PIXELS) { | |
+ assert(!"mem_copy_to not supported for pixels."); | |
+ } | |
+ else { | |
+ if (!mem.device_pointer) { | |
+ mem_alloc(mem); | |
+ } | |
+ | |
+ /* copy is no-op */ | |
+ } | |
+ } | |
+ | |
+ void mem_copy_from(device_memory & /*mem*/, int /*y*/, int /*w*/, int /*h*/, int /*elem*/) | |
+ { | |
+ /* no-op */ | |
+ } | |
+ | |
+ void mem_zero(device_memory &mem) | |
+ { | |
+ if (!mem.device_pointer) { | |
+ mem_alloc(mem); | |
+ } | |
+ | |
+ if (mem.device_pointer) { | |
+ memset((void *)mem.device_pointer, 0, mem.memory_size()); | |
+ } | |
+ } | |
+ | |
+ void mem_free(device_memory &mem) | |
+ { | |
+ if (mem.type == MEM_TEXTURE) { | |
+ tex_free(mem); | |
+ } | |
+ else if (mem.device_pointer) { | |
+ if (mem.type == MEM_DEVICE_ONLY) { | |
+ util_aligned_free((void *)mem.device_pointer); | |
+ } | |
+ mem.device_pointer = 0; | |
+ stats.mem_free(mem.device_size); | |
+ mem.device_size = 0; | |
+ } | |
+ } | |
+ | |
+ virtual device_ptr mem_alloc_sub_ptr(device_memory &mem, int offset, int /*size*/) | |
+ { | |
+ return (device_ptr)(((char *)mem.device_pointer) + mem.memory_elements_size(offset)); | |
+ } | |
+ | |
+ void const_copy_to(const char *name, void *host, size_t size) | |
+ { | |
+ kernel_const_copy(&kernel_globals, name, host, size); | |
+ } | |
+ | |
+ void tex_alloc(device_memory &mem) | |
+ { | |
+ VLOG(1) << "Texture allocate: " << mem.name << ", " | |
+ << string_human_readable_number(mem.memory_size()) << " bytes. (" | |
+ << string_human_readable_size(mem.memory_size()) << ")"; | |
+ | |
+ if (mem.interpolation == INTERPOLATION_NONE) { | |
+ /* Data texture. */ | |
+ kernel_tex_copy(&kernel_globals, mem.name, mem.host_pointer, mem.data_size); | |
+ } | |
+ else { | |
+ /* Image Texture. */ | |
+ int flat_slot = 0; | |
+ if (string_startswith(mem.name, "__tex_image")) { | |
+ int pos = string(mem.name).rfind("_"); | |
+ flat_slot = atoi(mem.name + pos + 1); | |
+ } | |
+ else { | |
+ assert(0); | |
+ } | |
+ | |
+ if (flat_slot >= texture_info.size()) { | |
+ /* Allocate some slots in advance, to reduce amount | |
+ * of re-allocations. */ | |
+ texture_info.resize(flat_slot + 128); | |
+ } | |
+ | |
+ TextureInfo &info = texture_info[flat_slot]; | |
+ info.data = (uint64_t)mem.host_pointer; | |
+ info.cl_buffer = 0; | |
+ info.interpolation = mem.interpolation; | |
+ info.extension = mem.extension; | |
+ info.width = mem.data_width; | |
+ info.height = mem.data_height; | |
+ info.depth = mem.data_depth; | |
+ | |
+ need_texture_info = true; | |
+ } | |
+ | |
+ mem.device_pointer = (device_ptr)mem.host_pointer; | |
+ mem.device_size = mem.memory_size(); | |
+ stats.mem_alloc(mem.device_size); | |
+ } | |
+ | |
+ void tex_free(device_memory &mem) | |
+ { | |
+ if (mem.device_pointer) { | |
+ mem.device_pointer = 0; | |
+ stats.mem_free(mem.device_size); | |
+ mem.device_size = 0; | |
+ need_texture_info = true; | |
+ } | |
+ } | |
+ | |
+ void *osl_memory() | |
+ { | |
+#ifdef WITH_OSL | |
+ return &osl_globals; | |
+#else | |
+ return NULL; | |
+#endif | |
+ } | |
+ | |
+ void thread_run(DeviceTask *task) | |
+ { | |
+ if (task->type == DeviceTask::RENDER) { | |
+ thread_render(*task); | |
+ } | |
+ else if (task->type == DeviceTask::FILM_CONVERT) | |
+ thread_film_convert(*task); | |
+ else if (task->type == DeviceTask::SHADER) | |
+ thread_shader(*task); | |
+ } | |
+ | |
+ class CPUDeviceTask : public DeviceTask { | |
+ public: | |
+ CPUDeviceTask(CPUDevice *device, DeviceTask &task) : DeviceTask(task) | |
+ { | |
+ run = function_bind(&CPUDevice::thread_run, device, this); | |
+ } | |
+ }; | |
+ | |
+ bool denoising_non_local_means(device_ptr image_ptr, | |
+ device_ptr guide_ptr, | |
+ device_ptr variance_ptr, | |
+ device_ptr out_ptr, | |
+ DenoisingTask *task) | |
+ { | |
+ ProfilingHelper profiling(task->profiler, PROFILING_DENOISING_NON_LOCAL_MEANS); | |
+ | |
+ int4 rect = task->rect; | |
+ int r = task->nlm_state.r; | |
+ int f = task->nlm_state.f; | |
+ float a = task->nlm_state.a; | |
+ float k_2 = task->nlm_state.k_2; | |
+ | |
+ int w = align_up(rect.z - rect.x, 4); | |
+ int h = rect.w - rect.y; | |
+ int stride = task->buffer.stride; | |
+ int channel_offset = task->nlm_state.is_color ? task->buffer.pass_stride : 0; | |
+ | |
+ float *temporary_mem = (float *)task->buffer.temporary_mem.device_pointer; | |
+ float *blurDifference = temporary_mem; | |
+ float *difference = temporary_mem + task->buffer.pass_stride; | |
+ float *weightAccum = temporary_mem + 2 * task->buffer.pass_stride; | |
+ | |
+ memset(weightAccum, 0, sizeof(float) * w * h); | |
+ memset((float *)out_ptr, 0, sizeof(float) * w * h); | |
+ | |
+ for (int i = 0; i < (2 * r + 1) * (2 * r + 1); i++) { | |
+ int dy = i / (2 * r + 1) - r; | |
+ int dx = i % (2 * r + 1) - r; | |
+ | |
+ int local_rect[4] = { | |
+ max(0, -dx), max(0, -dy), rect.z - rect.x - max(0, dx), rect.w - rect.y - max(0, dy)}; | |
+ filter_nlm_calc_difference_kernel()(dx, | |
+ dy, | |
+ (float *)guide_ptr, | |
+ (float *)variance_ptr, | |
+ NULL, | |
+ difference, | |
+ local_rect, | |
+ w, | |
+ channel_offset, | |
+ 0, | |
+ a, | |
+ k_2); | |
+ | |
+ filter_nlm_blur_kernel()(difference, blurDifference, local_rect, w, f); | |
+ filter_nlm_calc_weight_kernel()(blurDifference, difference, local_rect, w, f); | |
+ filter_nlm_blur_kernel()(difference, blurDifference, local_rect, w, f); | |
+ | |
+ filter_nlm_update_output_kernel()(dx, | |
+ dy, | |
+ blurDifference, | |
+ (float *)image_ptr, | |
+ difference, | |
+ (float *)out_ptr, | |
+ weightAccum, | |
+ local_rect, | |
+ channel_offset, | |
+ stride, | |
+ f); | |
+ } | |
+ | |
+ int local_rect[4] = {0, 0, rect.z - rect.x, rect.w - rect.y}; | |
+ filter_nlm_normalize_kernel()((float *)out_ptr, weightAccum, local_rect, w); | |
+ | |
+ return true; | |
+ } | |
+ | |
+ bool denoising_construct_transform(DenoisingTask *task) | |
+ { | |
+ ProfilingHelper profiling(task->profiler, PROFILING_DENOISING_CONSTRUCT_TRANSFORM); | |
+ | |
+ for (int y = 0; y < task->filter_area.w; y++) { | |
+ for (int x = 0; x < task->filter_area.z; x++) { | |
+ filter_construct_transform_kernel()((float *)task->buffer.mem.device_pointer, | |
+ task->tile_info, | |
+ x + task->filter_area.x, | |
+ y + task->filter_area.y, | |
+ y * task->filter_area.z + x, | |
+ (float *)task->storage.transform.device_pointer, | |
+ (int *)task->storage.rank.device_pointer, | |
+ &task->rect.x, | |
+ task->buffer.pass_stride, | |
+ task->buffer.frame_stride, | |
+ task->buffer.use_time, | |
+ task->radius, | |
+ task->pca_threshold); | |
+ } | |
+ } | |
+ return true; | |
+ } | |
+ | |
+ bool denoising_accumulate(device_ptr color_ptr, | |
+ device_ptr color_variance_ptr, | |
+ device_ptr scale_ptr, | |
+ int frame, | |
+ DenoisingTask *task) | |
+ { | |
+ ProfilingHelper profiling(task->profiler, PROFILING_DENOISING_RECONSTRUCT); | |
+ | |
+ float *temporary_mem = (float *)task->buffer.temporary_mem.device_pointer; | |
+ float *difference = temporary_mem; | |
+ float *blurDifference = temporary_mem + task->buffer.pass_stride; | |
+ | |
+ int r = task->radius; | |
+ int frame_offset = frame * task->buffer.frame_stride; | |
+ for (int i = 0; i < (2 * r + 1) * (2 * r + 1); i++) { | |
+ int dy = i / (2 * r + 1) - r; | |
+ int dx = i % (2 * r + 1) - r; | |
+ | |
+ int local_rect[4] = {max(0, -dx), | |
+ max(0, -dy), | |
+ task->reconstruction_state.source_w - max(0, dx), | |
+ task->reconstruction_state.source_h - max(0, dy)}; | |
+ filter_nlm_calc_difference_kernel()(dx, | |
+ dy, | |
+ (float *)color_ptr, | |
+ (float *)color_variance_ptr, | |
+ (float *)scale_ptr, | |
+ difference, | |
+ local_rect, | |
+ task->buffer.stride, | |
+ task->buffer.pass_stride, | |
+ frame_offset, | |
+ 1.0f, | |
+ task->nlm_k_2); | |
+ filter_nlm_blur_kernel()(difference, blurDifference, local_rect, task->buffer.stride, 4); | |
+ filter_nlm_calc_weight_kernel()( | |
+ blurDifference, difference, local_rect, task->buffer.stride, 4); | |
+ filter_nlm_blur_kernel()(difference, blurDifference, local_rect, task->buffer.stride, 4); | |
+ filter_nlm_construct_gramian_kernel()(dx, | |
+ dy, | |
+ task->tile_info->frames[frame], | |
+ blurDifference, | |
+ (float *)task->buffer.mem.device_pointer, | |
+ (float *)task->storage.transform.device_pointer, | |
+ (int *)task->storage.rank.device_pointer, | |
+ (float *)task->storage.XtWX.device_pointer, | |
+ (float3 *)task->storage.XtWY.device_pointer, | |
+ local_rect, | |
+ &task->reconstruction_state.filter_window.x, | |
+ task->buffer.stride, | |
+ 4, | |
+ task->buffer.pass_stride, | |
+ frame_offset, | |
+ task->buffer.use_time); | |
+ } | |
+ | |
+ return true; | |
+ } | |
+ | |
+ bool denoising_solve(device_ptr output_ptr, DenoisingTask *task) | |
+ { | |
+ for (int y = 0; y < task->filter_area.w; y++) { | |
+ for (int x = 0; x < task->filter_area.z; x++) { | |
+ filter_finalize_kernel()(x, | |
+ y, | |
+ y * task->filter_area.z + x, | |
+ (float *)output_ptr, | |
+ (int *)task->storage.rank.device_pointer, | |
+ (float *)task->storage.XtWX.device_pointer, | |
+ (float3 *)task->storage.XtWY.device_pointer, | |
+ &task->reconstruction_state.buffer_params.x, | |
+ task->render_buffer.samples); | |
+ } | |
+ } | |
+ return true; | |
+ } | |
+ | |
+ bool denoising_combine_halves(device_ptr a_ptr, | |
+ device_ptr b_ptr, | |
+ device_ptr mean_ptr, | |
+ device_ptr variance_ptr, | |
+ int r, | |
+ int4 rect, | |
+ DenoisingTask *task) | |
+ { | |
+ ProfilingHelper profiling(task->profiler, PROFILING_DENOISING_COMBINE_HALVES); | |
+ | |
+ for (int y = rect.y; y < rect.w; y++) { | |
+ for (int x = rect.x; x < rect.z; x++) { | |
+ filter_combine_halves_kernel()(x, | |
+ y, | |
+ (float *)mean_ptr, | |
+ (float *)variance_ptr, | |
+ (float *)a_ptr, | |
+ (float *)b_ptr, | |
+ &rect.x, | |
+ r); | |
+ } | |
+ } | |
+ return true; | |
+ } | |
+ | |
+ bool denoising_divide_shadow(device_ptr a_ptr, | |
+ device_ptr b_ptr, | |
+ device_ptr sample_variance_ptr, | |
+ device_ptr sv_variance_ptr, | |
+ device_ptr buffer_variance_ptr, | |
+ DenoisingTask *task) | |
+ { | |
+ ProfilingHelper profiling(task->profiler, PROFILING_DENOISING_DIVIDE_SHADOW); | |
+ | |
+ for (int y = task->rect.y; y < task->rect.w; y++) { | |
+ for (int x = task->rect.x; x < task->rect.z; x++) { | |
+ filter_divide_shadow_kernel()(task->render_buffer.samples, | |
+ task->tile_info, | |
+ x, | |
+ y, | |
+ (float *)a_ptr, | |
+ (float *)b_ptr, | |
+ (float *)sample_variance_ptr, | |
+ (float *)sv_variance_ptr, | |
+ (float *)buffer_variance_ptr, | |
+ &task->rect.x, | |
+ task->render_buffer.pass_stride, | |
+ task->render_buffer.offset); | |
+ } | |
+ } | |
+ return true; | |
+ } | |
+ | |
+ bool denoising_get_feature(int mean_offset, | |
+ int variance_offset, | |
+ device_ptr mean_ptr, | |
+ device_ptr variance_ptr, | |
+ float scale, | |
+ DenoisingTask *task) | |
+ { | |
+ ProfilingHelper profiling(task->profiler, PROFILING_DENOISING_GET_FEATURE); | |
+ | |
+ for (int y = task->rect.y; y < task->rect.w; y++) { | |
+ for (int x = task->rect.x; x < task->rect.z; x++) { | |
+ filter_get_feature_kernel()(task->render_buffer.samples, | |
+ task->tile_info, | |
+ mean_offset, | |
+ variance_offset, | |
+ x, | |
+ y, | |
+ (float *)mean_ptr, | |
+ (float *)variance_ptr, | |
+ scale, | |
+ &task->rect.x, | |
+ task->render_buffer.pass_stride, | |
+ task->render_buffer.offset); | |
+ } | |
+ } | |
+ return true; | |
+ } | |
+ | |
+ bool denoising_write_feature(int out_offset, | |
+ device_ptr from_ptr, | |
+ device_ptr buffer_ptr, | |
+ DenoisingTask *task) | |
+ { | |
+ for (int y = 0; y < task->filter_area.w; y++) { | |
+ for (int x = 0; x < task->filter_area.z; x++) { | |
+ filter_write_feature_kernel()(task->render_buffer.samples, | |
+ x + task->filter_area.x, | |
+ y + task->filter_area.y, | |
+ &task->reconstruction_state.buffer_params.x, | |
+ (float *)from_ptr, | |
+ (float *)buffer_ptr, | |
+ out_offset, | |
+ &task->rect.x); | |
+ } | |
+ } | |
+ return true; | |
+ } | |
+ | |
+ bool denoising_detect_outliers(device_ptr image_ptr, | |
+ device_ptr variance_ptr, | |
+ device_ptr depth_ptr, | |
+ device_ptr output_ptr, | |
+ DenoisingTask *task) | |
+ { | |
+ ProfilingHelper profiling(task->profiler, PROFILING_DENOISING_DETECT_OUTLIERS); | |
+ | |
+ for (int y = task->rect.y; y < task->rect.w; y++) { | |
+ for (int x = task->rect.x; x < task->rect.z; x++) { | |
+ filter_detect_outliers_kernel()(x, | |
+ y, | |
+ (float *)image_ptr, | |
+ (float *)variance_ptr, | |
+ (float *)depth_ptr, | |
+ (float *)output_ptr, | |
+ &task->rect.x, | |
+ task->buffer.pass_stride); | |
+ } | |
+ } | |
+ return true; | |
+ } | |
+ | |
+ void path_trace(DeviceTask &task, RenderTile &tile, KernelGlobals *kg) | |
+ { | |
+ const bool use_coverage = kernel_data.film.cryptomatte_passes & CRYPT_ACCURATE; | |
+ | |
+ scoped_timer timer(&tile.buffers->render_time); | |
+ | |
+ Coverage coverage(kg, tile); | |
+ if (use_coverage) { | |
+ coverage.init_path_trace(); | |
+ } | |
+ | |
+ float *render_buffer = (float *)tile.buffer; | |
+ int start_sample = tile.start_sample; | |
+ int end_sample = tile.start_sample + tile.num_samples; | |
+ | |
+ /* Needed for Embree. */ | |
+ SIMD_SET_FLUSH_TO_ZERO; | |
+ | |
+ for (int sample = start_sample; sample < end_sample; sample++) { | |
+ if (task.get_cancel() || task_pool.canceled()) { | |
+ if (task.need_finish_queue == false) | |
+ break; | |
+ } | |
+ | |
+ for (int y = tile.y; y < tile.y + tile.h; y++) { | |
+ for (int x = tile.x; x < tile.x + tile.w; x++) { | |
+ if (use_coverage) { | |
+ coverage.init_pixel(x, y); | |
+ } | |
+ path_trace_kernel()(kg, render_buffer, sample, x, y, tile.offset, tile.stride); | |
+ } | |
+ } | |
+ | |
+ tile.sample = sample + 1; | |
+ | |
+ task.update_progress(&tile, tile.w * tile.h); | |
+ } | |
+ if (use_coverage) { | |
+ coverage.finalize(); | |
+ } | |
+ } | |
+ | |
+ void denoise(DenoisingTask &denoising, RenderTile &tile) | |
+ { | |
+ ProfilingHelper profiling(denoising.profiler, PROFILING_DENOISING); | |
+ | |
+ tile.sample = tile.start_sample + tile.num_samples; | |
+ | |
+ denoising.functions.construct_transform = function_bind( | |
+ &CPUDevice::denoising_construct_transform, this, &denoising); | |
+ denoising.functions.accumulate = function_bind( | |
+ &CPUDevice::denoising_accumulate, this, _1, _2, _3, _4, &denoising); | |
+ denoising.functions.solve = function_bind(&CPUDevice::denoising_solve, this, _1, &denoising); | |
+ denoising.functions.divide_shadow = function_bind( | |
+ &CPUDevice::denoising_divide_shadow, this, _1, _2, _3, _4, _5, &denoising); | |
+ denoising.functions.non_local_means = function_bind( | |
+ &CPUDevice::denoising_non_local_means, this, _1, _2, _3, _4, &denoising); | |
+ denoising.functions.combine_halves = function_bind( | |
+ &CPUDevice::denoising_combine_halves, this, _1, _2, _3, _4, _5, _6, &denoising); | |
+ denoising.functions.get_feature = function_bind( | |
+ &CPUDevice::denoising_get_feature, this, _1, _2, _3, _4, _5, &denoising); | |
+ denoising.functions.write_feature = function_bind( | |
+ &CPUDevice::denoising_write_feature, this, _1, _2, _3, &denoising); | |
+ denoising.functions.detect_outliers = function_bind( | |
+ &CPUDevice::denoising_detect_outliers, this, _1, _2, _3, _4, &denoising); | |
+ | |
+ denoising.filter_area = make_int4(tile.x, tile.y, tile.w, tile.h); | |
+ denoising.render_buffer.samples = tile.sample; | |
+ denoising.buffer.gpu_temporary_mem = false; | |
+ | |
+ denoising.run_denoising(&tile); | |
+ } | |
+ | |
+ void thread_render(DeviceTask &task) | |
+ { | |
+ if (task_pool.canceled()) { | |
+ if (task.need_finish_queue == false) | |
+ return; | |
+ } | |
+ | |
+ /* allocate buffer for kernel globals */ | |
+ device_only_memory<KernelGlobals> kgbuffer(this, "kernel_globals"); | |
+ kgbuffer.alloc_to_device(1); | |
+ | |
+ KernelGlobals *kg = new ((void *)kgbuffer.device_pointer) | |
+ KernelGlobals(thread_kernel_globals_init()); | |
+ | |
+ profiler.add_state(&kg->profiler); | |
+ | |
+ CPUSplitKernel *split_kernel = NULL; | |
+ if (use_split_kernel) { | |
+ split_kernel = new CPUSplitKernel(this); | |
+ if (!split_kernel->load_kernels(requested_features)) { | |
+ thread_kernel_globals_free((KernelGlobals *)kgbuffer.device_pointer); | |
+ kgbuffer.free(); | |
+ delete split_kernel; | |
+ return; | |
+ } | |
+ } | |
+ | |
+ RenderTile tile; | |
+ DenoisingTask denoising(this, task); | |
+ denoising.profiler = &kg->profiler; | |
+ | |
+ while (task.acquire_tile(this, tile)) { | |
+ if (tile.task == RenderTile::PATH_TRACE) { | |
+ if (use_split_kernel) { | |
+ device_only_memory<uchar> void_buffer(this, "void_buffer"); | |
+ split_kernel->path_trace(&task, tile, kgbuffer, void_buffer); | |
+ } | |
+ else { | |
+ path_trace(task, tile, kg); | |
+ } | |
+ } | |
+ else if (tile.task == RenderTile::DENOISE) { | |
+ denoise(denoising, tile); | |
+ task.update_progress(&tile, tile.w * tile.h); | |
+ } | |
+ | |
+ task.release_tile(tile); | |
+ | |
+ if (task_pool.canceled()) { | |
+ if (task.need_finish_queue == false) | |
+ break; | |
+ } | |
+ } | |
+ | |
+ profiler.remove_state(&kg->profiler); | |
+ | |
+ thread_kernel_globals_free((KernelGlobals *)kgbuffer.device_pointer); | |
+ kg->~KernelGlobals(); | |
+ kgbuffer.free(); | |
+ delete split_kernel; | |
+ } | |
+ | |
+ void thread_film_convert(DeviceTask &task) | |
+ { | |
+ float sample_scale = 1.0f / (task.sample + 1); | |
+ | |
+ if (task.rgba_half) { | |
+ for (int y = task.y; y < task.y + task.h; y++) | |
+ for (int x = task.x; x < task.x + task.w; x++) | |
+ convert_to_half_float_kernel()(&kernel_globals, | |
+ (uchar4 *)task.rgba_half, | |
+ (float *)task.buffer, | |
+ sample_scale, | |
+ x, | |
+ y, | |
+ task.offset, | |
+ task.stride); | |
+ } | |
+ else { | |
+ for (int y = task.y; y < task.y + task.h; y++) | |
+ for (int x = task.x; x < task.x + task.w; x++) | |
+ convert_to_byte_kernel()(&kernel_globals, | |
+ (uchar4 *)task.rgba_byte, | |
+ (float *)task.buffer, | |
+ sample_scale, | |
+ x, | |
+ y, | |
+ task.offset, | |
+ task.stride); | |
+ } | |
+ } | |
+ | |
+ void thread_shader(DeviceTask &task) | |
+ { | |
+ KernelGlobals *kg = new KernelGlobals(thread_kernel_globals_init()); | |
+ | |
+ for (int sample = 0; sample < task.num_samples; sample++) { | |
+ for (int x = task.shader_x; x < task.shader_x + task.shader_w; x++) | |
+ shader_kernel()(kg, | |
+ (uint4 *)task.shader_input, | |
+ (float4 *)task.shader_output, | |
+ task.shader_eval_type, | |
+ task.shader_filter, | |
+ x, | |
+ task.offset, | |
+ sample); | |
+ | |
+ if (task.get_cancel() || task_pool.canceled()) | |
+ break; | |
+ | |
+ task.update_progress(NULL); | |
+ } | |
+ | |
+ thread_kernel_globals_free(kg); | |
+ delete kg; | |
+ } | |
+ | |
+ int get_split_task_count(DeviceTask &task) | |
+ { | |
+ if (task.type == DeviceTask::SHADER) | |
+ return task.get_subtask_count(info.cpu_threads, 256); | |
+ else | |
+ return task.get_subtask_count(info.cpu_threads); | |
+ } | |
+ | |
+ void task_add(DeviceTask &task) | |
+ { | |
+ /* Load texture info. */ | |
+ load_texture_info(); | |
+ | |
+ /* split task into smaller ones */ | |
+ list<DeviceTask> tasks; | |
+ | |
+ if (task.type == DeviceTask::SHADER) | |
+ task.split(tasks, info.cpu_threads, 256); | |
+ else | |
+ task.split(tasks, info.cpu_threads); | |
+ | |
+ foreach (DeviceTask &task, tasks) | |
+ task_pool.push(new CPUDeviceTask(this, task)); | |
+ } | |
+ | |
+ void task_wait() | |
+ { | |
+ task_pool.wait_work(); | |
+ } | |
+ | |
+ void task_cancel() | |
+ { | |
+ task_pool.cancel(); | |
+ } | |
+ | |
+ protected: | |
+ inline KernelGlobals thread_kernel_globals_init() | |
+ { | |
+ KernelGlobals kg = kernel_globals; | |
+ kg.transparent_shadow_intersections = NULL; | |
+ const int decoupled_count = sizeof(kg.decoupled_volume_steps) / | |
+ sizeof(*kg.decoupled_volume_steps); | |
+ for (int i = 0; i < decoupled_count; ++i) { | |
+ kg.decoupled_volume_steps[i] = NULL; | |
+ } | |
+ kg.decoupled_volume_steps_index = 0; | |
+ kg.coverage_asset = kg.coverage_object = kg.coverage_material = NULL; | |
+#ifdef WITH_OSL | |
+ OSLShader::thread_init(&kg, &kernel_globals, &osl_globals); | |
+#endif | |
+ return kg; | |
+ } | |
+ | |
+ inline void thread_kernel_globals_free(KernelGlobals *kg) | |
+ { | |
+ if (kg == NULL) { | |
+ return; | |
+ } | |
+ | |
+ if (kg->transparent_shadow_intersections != NULL) { | |
+ free(kg->transparent_shadow_intersections); | |
+ } | |
+ const int decoupled_count = sizeof(kg->decoupled_volume_steps) / | |
+ sizeof(*kg->decoupled_volume_steps); | |
+ for (int i = 0; i < decoupled_count; ++i) { | |
+ if (kg->decoupled_volume_steps[i] != NULL) { | |
+ free(kg->decoupled_volume_steps[i]); | |
+ } | |
+ } | |
+#ifdef WITH_OSL | |
+ OSLShader::thread_free(kg); | |
+#endif | |
+ } | |
+ | |
+ virtual bool load_kernels(const DeviceRequestedFeatures &requested_features_) | |
+ { | |
+ requested_features = requested_features_; | |
+ | |
+ return true; | |
+ } | |
+}; | |
+ | |
+/* split kernel */ | |
+ | |
+class CPUSplitKernelFunction : public SplitKernelFunction { | |
+ public: | |
+ CPUDevice *device; | |
+ void (*func)(KernelGlobals *kg, KernelData *data); | |
+ | |
+ CPUSplitKernelFunction(CPUDevice *device) : device(device), func(NULL) | |
+ { | |
+ } | |
+ ~CPUSplitKernelFunction() | |
+ { | |
+ } | |
+ | |
+ virtual bool enqueue(const KernelDimensions &dim, | |
+ device_memory &kernel_globals, | |
+ device_memory &data) | |
+ { | |
+ if (!func) { | |
+ return false; | |
+ } | |
+ | |
+ KernelGlobals *kg = (KernelGlobals *)kernel_globals.device_pointer; | |
+ kg->global_size = make_int2(dim.global_size[0], dim.global_size[1]); | |
+ | |
+ for (int y = 0; y < dim.global_size[1]; y++) { | |
+ for (int x = 0; x < dim.global_size[0]; x++) { | |
+ kg->global_id = make_int2(x, y); | |
+ | |
+ func(kg, (KernelData *)data.device_pointer); | |
+ } | |
+ } | |
+ | |
+ return true; | |
+ } | |
+}; | |
+ | |
+CPUSplitKernel::CPUSplitKernel(CPUDevice *device) : DeviceSplitKernel(device), device(device) | |
+{ | |
+} | |
+ | |
+bool CPUSplitKernel::enqueue_split_kernel_data_init(const KernelDimensions &dim, | |
+ RenderTile &rtile, | |
+ int num_global_elements, | |
+ device_memory &kernel_globals, | |
+ device_memory &data, | |
+ device_memory &split_data, | |
+ device_memory &ray_state, | |
+ device_memory &queue_index, | |
+ device_memory &use_queues_flags, | |
+ device_memory &work_pool_wgs) | |
+{ | |
+ KernelGlobals *kg = (KernelGlobals *)kernel_globals.device_pointer; | |
+ kg->global_size = make_int2(dim.global_size[0], dim.global_size[1]); | |
+ | |
+ for (int y = 0; y < dim.global_size[1]; y++) { | |
+ for (int x = 0; x < dim.global_size[0]; x++) { | |
+ kg->global_id = make_int2(x, y); | |
+ | |
+ device->data_init_kernel()((KernelGlobals *)kernel_globals.device_pointer, | |
+ (KernelData *)data.device_pointer, | |
+ (void *)split_data.device_pointer, | |
+ num_global_elements, | |
+ (char *)ray_state.device_pointer, | |
+ rtile.start_sample, | |
+ rtile.start_sample + rtile.num_samples, | |
+ rtile.x, | |
+ rtile.y, | |
+ rtile.w, | |
+ rtile.h, | |
+ rtile.offset, | |
+ rtile.stride, | |
+ (int *)queue_index.device_pointer, | |
+ dim.global_size[0] * dim.global_size[1], | |
+ (char *)use_queues_flags.device_pointer, | |
+ (uint *)work_pool_wgs.device_pointer, | |
+ rtile.num_samples, | |
+ (float *)rtile.buffer); | |
+ } | |
+ } | |
+ | |
+ return true; | |
+} | |
+ | |
+SplitKernelFunction *CPUSplitKernel::get_split_kernel_function(const string &kernel_name, | |
+ const DeviceRequestedFeatures &) | |
+{ | |
+ CPUSplitKernelFunction *kernel = new CPUSplitKernelFunction(device); | |
+ | |
+ kernel->func = device->split_kernels[kernel_name](); | |
+ if (!kernel->func) { | |
+ delete kernel; | |
+ return NULL; | |
+ } | |
+ | |
+ return kernel; | |
+} | |
+ | |
+int2 CPUSplitKernel::split_kernel_local_size() | |
+{ | |
+ return make_int2(1, 1); | |
+} | |
+ | |
+int2 CPUSplitKernel::split_kernel_global_size(device_memory & /*kg*/, | |
+ device_memory & /*data*/, | |
+ DeviceTask * /*task*/) | |
+{ | |
+ return make_int2(1, 1); | |
+} | |
+ | |
+uint64_t CPUSplitKernel::state_buffer_size(device_memory &kernel_globals, | |
+ device_memory & /*data*/, | |
+ size_t num_threads) | |
+{ | |
+ KernelGlobals *kg = (KernelGlobals *)kernel_globals.device_pointer; | |
+ | |
+ return split_data_buffer_size(kg, num_threads); | |
+} | |
+ | |
+Device *device_cpu_create(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background) | |
+{ | |
+ return new CPUDevice(info, stats, profiler, background); | |
+} | |
+ | |
+void device_cpu_info(vector<DeviceInfo> &devices) | |
+{ | |
+ DeviceInfo info; | |
+ | |
+ info.type = DEVICE_CPU; | |
+ info.description = system_cpu_brand_string(); | |
+ info.id = "CPU"; | |
+ info.num = 0; | |
+ info.has_volume_decoupled = true; | |
+ info.has_osl = true; | |
+ info.has_half_images = true; | |
+ info.has_profiling = true; | |
+ | |
+ devices.insert(devices.begin(), info); | |
+} | |
+ | |
+string device_cpu_capabilities() | |
+{ | |
+ string capabilities = ""; | |
+ capabilities += system_cpu_support_sse2() ? "SSE2 " : ""; | |
+ capabilities += system_cpu_support_sse3() ? "SSE3 " : ""; | |
+ capabilities += system_cpu_support_sse41() ? "SSE41 " : ""; | |
+ capabilities += system_cpu_support_avx() ? "AVX " : ""; | |
+ capabilities += system_cpu_support_avx2() ? "AVX2" : ""; | |
+ if (capabilities[capabilities.size() - 1] == ' ') | |
+ capabilities.resize(capabilities.size() - 1); | |
+ return capabilities; | |
+} | |
+ | |
+CCL_NAMESPACE_END | |
diff -Naur a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp | |
--- a/intern/cycles/device/device_cuda.cpp 2020-01-10 20:37:06.000000000 +0300 | |
+++ b/intern/cycles/device/device_cuda.cpp 2020-01-10 20:42:43.460923388 +0300 | |
@@ -1788,6 +1788,23 @@ | |
cuda_assert(cuFuncSetCacheConfig(cuPathTrace, CU_FUNC_CACHE_PREFER_L1)); | |
+ /* Kernels for adaptive sampling. */ | |
+ CUfunction cuAdaptiveStopping, cuAdaptiveFilterX, cuAdaptiveFilterY, cuAdaptiveScaleSamples; | |
+ if (task.integrator_adaptive) { | |
+ cuda_assert( | |
+ cuModuleGetFunction(&cuAdaptiveStopping, cuModule, "kernel_cuda_adaptive_stopping")); | |
+ cuda_assert(cuFuncSetCacheConfig(cuAdaptiveStopping, CU_FUNC_CACHE_PREFER_L1)); | |
+ cuda_assert( | |
+ cuModuleGetFunction(&cuAdaptiveFilterX, cuModule, "kernel_cuda_adaptive_filter_x")); | |
+ cuda_assert(cuFuncSetCacheConfig(cuAdaptiveFilterX, CU_FUNC_CACHE_PREFER_L1)); | |
+ cuda_assert( | |
+ cuModuleGetFunction(&cuAdaptiveFilterY, cuModule, "kernel_cuda_adaptive_filter_y")); | |
+ cuda_assert(cuFuncSetCacheConfig(cuAdaptiveFilterY, CU_FUNC_CACHE_PREFER_L1)); | |
+ cuda_assert(cuModuleGetFunction( | |
+ &cuAdaptiveScaleSamples, cuModule, "kernel_cuda_adaptive_scale_samples")); | |
+ cuda_assert(cuFuncSetCacheConfig(cuAdaptiveScaleSamples, CU_FUNC_CACHE_PREFER_L1)); | |
+ } | |
+ | |
/* Allocate work tile. */ | |
work_tiles.alloc(1); | |
@@ -1812,6 +1829,16 @@ | |
uint step_samples = divide_up(min_blocks * num_threads_per_block, wtile->w * wtile->h); | |
+ if (task.integrator_adaptive) { | |
+ /* Force to either 1, 2 or multiple of 4 samples per kernel invocation. */ | |
+ if (step_samples == 3) { | |
+ step_samples = 2; | |
+ } | |
+ else if (step_samples > 4) { | |
+ step_samples &= 0xfffffffc; | |
+ } | |
+ } | |
+ | |
/* Render all samples. */ | |
int start_sample = rtile.start_sample; | |
int end_sample = rtile.start_sample + rtile.num_samples; | |
@@ -1832,6 +1859,26 @@ | |
cuda_assert(cuLaunchKernel( | |
cuPathTrace, num_blocks, 1, 1, num_threads_per_block, 1, 1, 0, 0, args, 0)); | |
+ uint filter_sample = sample + wtile->num_samples - 1; | |
+ /* Run the adaptive sampling kernels when we're at a multiple of 4 samples. | |
+ * These are a series of tiny kernels because there is no grid synchronisation | |
+ * from within a kernel, so multiple kernel launches it is. */ | |
+ if (task.integrator_adaptive && (filter_sample & 0x3) == 3) { | |
+ total_work_size = wtile->h * wtile->w; | |
+ void *args2[] = {&d_work_tiles, &filter_sample, &total_work_size}; | |
+ num_blocks = divide_up(total_work_size, num_threads_per_block); | |
+ cuda_assert(cuLaunchKernel( | |
+ cuAdaptiveStopping, num_blocks, 1, 1, num_threads_per_block, 1, 1, 0, 0, args2, 0)); | |
+ total_work_size = wtile->h; | |
+ num_blocks = divide_up(total_work_size, num_threads_per_block); | |
+ cuda_assert(cuLaunchKernel( | |
+ cuAdaptiveFilterX, num_blocks, 1, 1, num_threads_per_block, 1, 1, 0, 0, args2, 0)); | |
+ total_work_size = wtile->w; | |
+ num_blocks = divide_up(total_work_size, num_threads_per_block); | |
+ cuda_assert(cuLaunchKernel( | |
+ cuAdaptiveFilterY, num_blocks, 1, 1, num_threads_per_block, 1, 1, 0, 0, args2, 0)); | |
+ } | |
+ | |
cuda_assert(cuCtxSynchronize()); | |
/* Update progress. */ | |
@@ -1843,6 +1890,17 @@ | |
break; | |
} | |
} | |
+ | |
+ if (task.integrator_adaptive) { | |
+ CUdeviceptr d_work_tiles = cuda_device_ptr(work_tiles.device_pointer); | |
+ uint total_work_size = wtile->h * wtile->w; | |
+ void *args[] = {&d_work_tiles, &rtile.start_sample, &rtile.sample, &total_work_size}; | |
+ uint num_blocks = divide_up(total_work_size, num_threads_per_block); | |
+ cuda_assert(cuLaunchKernel( | |
+ cuAdaptiveScaleSamples, num_blocks, 1, 1, num_threads_per_block, 1, 1, 0, 0, args, 0)); | |
+ cuda_assert(cuCtxSynchronize()); | |
+ task.update_progress(&rtile, rtile.w * rtile.h * wtile->num_samples); | |
+ } | |
} | |
void film_convert(DeviceTask &task, | |
diff -Naur a/intern/cycles/device/device_cuda.cpp.orig b/intern/cycles/device/device_cuda.cpp.orig | |
--- a/intern/cycles/device/device_cuda.cpp.orig 1970-01-01 03:00:00.000000000 +0300 | |
+++ b/intern/cycles/device/device_cuda.cpp.orig 2020-01-10 20:37:06.000000000 +0300 | |
@@ -0,0 +1,2846 @@ | |
+/* | |
+ * Copyright 2011-2013 Blender Foundation | |
+ * | |
+ * Licensed under the Apache License, Version 2.0 (the "License"); | |
+ * you may not use this file except in compliance with the License. | |
+ * You may obtain a copy of the License at | |
+ * | |
+ * http://www.apache.org/licenses/LICENSE-2.0 | |
+ * | |
+ * Unless required by applicable law or agreed to in writing, software | |
+ * distributed under the License is distributed on an "AS IS" BASIS, | |
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
+ * See the License for the specific language governing permissions and | |
+ * limitations under the License. | |
+ */ | |
+ | |
+#include <climits> | |
+#include <limits.h> | |
+#include <stdio.h> | |
+#include <stdlib.h> | |
+#include <string.h> | |
+ | |
+#include "device/device.h" | |
+#include "device/device_denoising.h" | |
+#include "device/device_intern.h" | |
+#include "device/device_split_kernel.h" | |
+ | |
+#include "render/buffers.h" | |
+ | |
+#include "kernel/filter/filter_defines.h" | |
+ | |
+#ifdef WITH_CUDA_DYNLOAD | |
+# include "cuew.h" | |
+#else | |
+# include "util/util_opengl.h" | |
+# include <cuda.h> | |
+# include <cudaGL.h> | |
+#endif | |
+#include "util/util_debug.h" | |
+#include "util/util_foreach.h" | |
+#include "util/util_logging.h" | |
+#include "util/util_map.h" | |
+#include "util/util_md5.h" | |
+#include "util/util_opengl.h" | |
+#include "util/util_path.h" | |
+#include "util/util_string.h" | |
+#include "util/util_system.h" | |
+#include "util/util_types.h" | |
+#include "util/util_time.h" | |
+#include "util/util_windows.h" | |
+ | |
+#include "kernel/split/kernel_split_data_types.h" | |
+ | |
+CCL_NAMESPACE_BEGIN | |
+ | |
+#ifndef WITH_CUDA_DYNLOAD | |
+ | |
+/* Transparently implement some functions, so majority of the file does not need | |
+ * to worry about difference between dynamically loaded and linked CUDA at all. | |
+ */ | |
+ | |
+namespace { | |
+ | |
+const char *cuewErrorString(CUresult result) | |
+{ | |
+ /* We can only give error code here without major code duplication, that | |
+ * should be enough since dynamic loading is only being disabled by folks | |
+ * who knows what they're doing anyway. | |
+ * | |
+ * NOTE: Avoid call from several threads. | |
+ */ | |
+ static string error; | |
+ error = string_printf("%d", result); | |
+ return error.c_str(); | |
+} | |
+ | |
+const char *cuewCompilerPath() | |
+{ | |
+ return CYCLES_CUDA_NVCC_EXECUTABLE; | |
+} | |
+ | |
+int cuewCompilerVersion() | |
+{ | |
+ return (CUDA_VERSION / 100) + (CUDA_VERSION % 100 / 10); | |
+} | |
+ | |
+} /* namespace */ | |
+#endif /* WITH_CUDA_DYNLOAD */ | |
+ | |
+class CUDADevice; | |
+ | |
+class CUDASplitKernel : public DeviceSplitKernel { | |
+ CUDADevice *device; | |
+ | |
+ public: | |
+ explicit CUDASplitKernel(CUDADevice *device); | |
+ | |
+ virtual uint64_t state_buffer_size(device_memory &kg, device_memory &data, size_t num_threads); | |
+ | |
+ virtual bool enqueue_split_kernel_data_init(const KernelDimensions &dim, | |
+ RenderTile &rtile, | |
+ int num_global_elements, | |
+ device_memory &kernel_globals, | |
+ device_memory &kernel_data_, | |
+ device_memory &split_data, | |
+ device_memory &ray_state, | |
+ device_memory &queue_index, | |
+ device_memory &use_queues_flag, | |
+ device_memory &work_pool_wgs); | |
+ | |
+ virtual SplitKernelFunction *get_split_kernel_function(const string &kernel_name, | |
+ const DeviceRequestedFeatures &); | |
+ virtual int2 split_kernel_local_size(); | |
+ virtual int2 split_kernel_global_size(device_memory &kg, device_memory &data, DeviceTask *task); | |
+}; | |
+ | |
+/* Utility to push/pop CUDA context. */ | |
+class CUDAContextScope { | |
+ public: | |
+ CUDAContextScope(CUDADevice *device); | |
+ ~CUDAContextScope(); | |
+ | |
+ private: | |
+ CUDADevice *device; | |
+}; | |
+ | |
+class CUDADevice : public Device { | |
+ public: | |
+ DedicatedTaskPool task_pool; | |
+ CUdevice cuDevice; | |
+ CUcontext cuContext; | |
+ CUmodule cuModule, cuFilterModule; | |
+ size_t device_texture_headroom; | |
+ size_t device_working_headroom; | |
+ bool move_texture_to_host; | |
+ size_t map_host_used; | |
+ size_t map_host_limit; | |
+ int can_map_host; | |
+ int cuDevId; | |
+ int cuDevArchitecture; | |
+ bool first_error; | |
+ CUDASplitKernel *split_kernel; | |
+ | |
+ struct CUDAMem { | |
+ CUDAMem() : texobject(0), array(0), use_mapped_host(false) | |
+ { | |
+ } | |
+ | |
+ CUtexObject texobject; | |
+ CUarray array; | |
+ | |
+ /* If true, a mapped host memory in shared_pointer is being used. */ | |
+ bool use_mapped_host; | |
+ }; | |
+ typedef map<device_memory *, CUDAMem> CUDAMemMap; | |
+ CUDAMemMap cuda_mem_map; | |
+ | |
+ struct PixelMem { | |
+ GLuint cuPBO; | |
+ CUgraphicsResource cuPBOresource; | |
+ GLuint cuTexId; | |
+ int w, h; | |
+ }; | |
+ map<device_ptr, PixelMem> pixel_mem_map; | |
+ | |
+ /* Bindless Textures */ | |
+ device_vector<TextureInfo> texture_info; | |
+ bool need_texture_info; | |
+ | |
+ CUdeviceptr cuda_device_ptr(device_ptr mem) | |
+ { | |
+ return (CUdeviceptr)mem; | |
+ } | |
+ | |
+ static bool have_precompiled_kernels() | |
+ { | |
+ string cubins_path = path_get("lib"); | |
+ return path_exists(cubins_path); | |
+ } | |
+ | |
+ virtual bool show_samples() const | |
+ { | |
+ /* The CUDADevice only processes one tile at a time, so showing samples is fine. */ | |
+ return true; | |
+ } | |
+ | |
+ virtual BVHLayoutMask get_bvh_layout_mask() const | |
+ { | |
+ return BVH_LAYOUT_BVH2; | |
+ } | |
+ | |
+ /*#ifdef NDEBUG | |
+#define cuda_abort() | |
+#else | |
+#define cuda_abort() abort() | |
+#endif*/ | |
+ void cuda_error_documentation() | |
+ { | |
+ if (first_error) { | |
+ fprintf(stderr, | |
+ "\nRefer to the Cycles GPU rendering documentation for possible solutions:\n"); | |
+ fprintf(stderr, | |
+ "https://docs.blender.org/manual/en/latest/render/cycles/gpu_rendering.html\n\n"); | |
+ first_error = false; | |
+ } | |
+ } | |
+ | |
+#define cuda_assert(stmt) \ | |
+ { \ | |
+ CUresult result = stmt; \ | |
+\ | |
+ if (result != CUDA_SUCCESS) { \ | |
+ string message = string_printf( \ | |
+ "CUDA error: %s in %s, line %d", cuewErrorString(result), #stmt, __LINE__); \ | |
+ if (error_msg == "") \ | |
+ error_msg = message; \ | |
+ fprintf(stderr, "%s\n", message.c_str()); \ | |
+ /*cuda_abort();*/ \ | |
+ cuda_error_documentation(); \ | |
+ } \ | |
+ } \ | |
+ (void)0 | |
+ | |
+ bool cuda_error_(CUresult result, const string &stmt) | |
+ { | |
+ if (result == CUDA_SUCCESS) | |
+ return false; | |
+ | |
+ string message = string_printf("CUDA error at %s: %s", stmt.c_str(), cuewErrorString(result)); | |
+ if (error_msg == "") | |
+ error_msg = message; | |
+ fprintf(stderr, "%s\n", message.c_str()); | |
+ cuda_error_documentation(); | |
+ return true; | |
+ } | |
+ | |
+#define cuda_error(stmt) cuda_error_(stmt, #stmt) | |
+ | |
+ void cuda_error_message(const string &message) | |
+ { | |
+ if (error_msg == "") | |
+ error_msg = message; | |
+ fprintf(stderr, "%s\n", message.c_str()); | |
+ cuda_error_documentation(); | |
+ } | |
+ | |
+ CUDADevice(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background_) | |
+ : Device(info, stats, profiler, background_), | |
+ texture_info(this, "__texture_info", MEM_TEXTURE) | |
+ { | |
+ first_error = true; | |
+ background = background_; | |
+ | |
+ cuDevId = info.num; | |
+ cuDevice = 0; | |
+ cuContext = 0; | |
+ | |
+ cuModule = 0; | |
+ cuFilterModule = 0; | |
+ | |
+ split_kernel = NULL; | |
+ | |
+ need_texture_info = false; | |
+ | |
+ device_texture_headroom = 0; | |
+ device_working_headroom = 0; | |
+ move_texture_to_host = false; | |
+ map_host_limit = 0; | |
+ map_host_used = 0; | |
+ can_map_host = 0; | |
+ | |
+ /* Intialize CUDA. */ | |
+ if (cuda_error(cuInit(0))) | |
+ return; | |
+ | |
+ /* Setup device and context. */ | |
+ if (cuda_error(cuDeviceGet(&cuDevice, cuDevId))) | |
+ return; | |
+ | |
+ /* CU_CTX_MAP_HOST for mapping host memory when out of device memory. | |
+ * CU_CTX_LMEM_RESIZE_TO_MAX for reserving local memory ahead of render, | |
+ * so we can predict which memory to map to host. */ | |
+ cuda_assert( | |
+ cuDeviceGetAttribute(&can_map_host, CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY, cuDevice)); | |
+ | |
+ unsigned int ctx_flags = CU_CTX_LMEM_RESIZE_TO_MAX; | |
+ if (can_map_host) { | |
+ ctx_flags |= CU_CTX_MAP_HOST; | |
+ init_host_memory(); | |
+ } | |
+ | |
+ /* Create context. */ | |
+ CUresult result; | |
+ | |
+ if (background) { | |
+ result = cuCtxCreate(&cuContext, ctx_flags, cuDevice); | |
+ } | |
+ else { | |
+ result = cuGLCtxCreate(&cuContext, ctx_flags, cuDevice); | |
+ | |
+ if (result != CUDA_SUCCESS) { | |
+ result = cuCtxCreate(&cuContext, ctx_flags, cuDevice); | |
+ background = true; | |
+ } | |
+ } | |
+ | |
+ if (cuda_error_(result, "cuCtxCreate")) | |
+ return; | |
+ | |
+ int major, minor; | |
+ cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cuDevId); | |
+ cuDeviceGetAttribute(&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cuDevId); | |
+ cuDevArchitecture = major * 100 + minor * 10; | |
+ | |
+ /* Pop context set by cuCtxCreate. */ | |
+ cuCtxPopCurrent(NULL); | |
+ } | |
+ | |
+ ~CUDADevice() | |
+ { | |
+ task_pool.stop(); | |
+ | |
+ delete split_kernel; | |
+ | |
+ texture_info.free(); | |
+ | |
+ cuda_assert(cuCtxDestroy(cuContext)); | |
+ } | |
+ | |
+ bool support_device(const DeviceRequestedFeatures & /*requested_features*/) | |
+ { | |
+ int major, minor; | |
+ cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cuDevId); | |
+ cuDeviceGetAttribute(&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cuDevId); | |
+ | |
+ /* We only support sm_30 and above */ | |
+ if (major < 3) { | |
+ cuda_error_message(string_printf( | |
+ "CUDA device supported only with compute capability 3.0 or up, found %d.%d.", | |
+ major, | |
+ minor)); | |
+ return false; | |
+ } | |
+ | |
+ return true; | |
+ } | |
+ | |
+ bool use_adaptive_compilation() | |
+ { | |
+ return DebugFlags().cuda.adaptive_compile; | |
+ } | |
+ | |
+ bool use_split_kernel() | |
+ { | |
+ return DebugFlags().cuda.split_kernel; | |
+ } | |
+ | |
+ /* Common NVCC flags which stays the same regardless of shading model, | |
+ * kernel sources md5 and only depends on compiler or compilation settings. | |
+ */ | |
+ string compile_kernel_get_common_cflags(const DeviceRequestedFeatures &requested_features, | |
+ bool filter = false, | |
+ bool split = false) | |
+ { | |
+ const int machine = system_cpu_bits(); | |
+ const string source_path = path_get("source"); | |
+ const string include_path = source_path; | |
+ string cflags = string_printf( | |
+ "-m%d " | |
+ "--ptxas-options=\"-v\" " | |
+ "--use_fast_math " | |
+ "-DNVCC " | |
+ "-I\"%s\"", | |
+ machine, | |
+ include_path.c_str()); | |
+ if (!filter && use_adaptive_compilation()) { | |
+ cflags += " " + requested_features.get_build_options(); | |
+ } | |
+ const char *extra_cflags = getenv("CYCLES_CUDA_EXTRA_CFLAGS"); | |
+ if (extra_cflags) { | |
+ cflags += string(" ") + string(extra_cflags); | |
+ } | |
+#ifdef WITH_CYCLES_DEBUG | |
+ cflags += " -D__KERNEL_DEBUG__"; | |
+#endif | |
+ | |
+ if (split) { | |
+ cflags += " -D__SPLIT__"; | |
+ } | |
+ | |
+ return cflags; | |
+ } | |
+ | |
+ bool compile_check_compiler() | |
+ { | |
+ const char *nvcc = cuewCompilerPath(); | |
+ if (nvcc == NULL) { | |
+ cuda_error_message( | |
+ "CUDA nvcc compiler not found. " | |
+ "Install CUDA toolkit in default location."); | |
+ return false; | |
+ } | |
+ const int cuda_version = cuewCompilerVersion(); | |
+ VLOG(1) << "Found nvcc " << nvcc << ", CUDA version " << cuda_version << "."; | |
+ const int major = cuda_version / 10, minor = cuda_version % 10; | |
+ if (cuda_version == 0) { | |
+ cuda_error_message("CUDA nvcc compiler version could not be parsed."); | |
+ return false; | |
+ } | |
+ if (cuda_version < 80) { | |
+ printf( | |
+ "Unsupported CUDA version %d.%d detected, " | |
+ "you need CUDA 8.0 or newer.\n", | |
+ major, | |
+ minor); | |
+ return false; | |
+ } | |
+ else if (cuda_version != 101) { | |
+ printf( | |
+ "CUDA version %d.%d detected, build may succeed but only " | |
+ "CUDA 10.1 is officially supported.\n", | |
+ major, | |
+ minor); | |
+ } | |
+ return true; | |
+ } | |
+ | |
+ string compile_kernel(const DeviceRequestedFeatures &requested_features, | |
+ bool filter = false, | |
+ bool split = false) | |
+ { | |
+ const char *name, *source; | |
+ if (filter) { | |
+ name = "filter"; | |
+ source = "filter.cu"; | |
+ } | |
+ else if (split) { | |
+ name = "kernel_split"; | |
+ source = "kernel_split.cu"; | |
+ } | |
+ else { | |
+ name = "kernel"; | |
+ source = "kernel.cu"; | |
+ } | |
+ /* Compute cubin name. */ | |
+ int major, minor; | |
+ cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cuDevId); | |
+ cuDeviceGetAttribute(&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cuDevId); | |
+ | |
+ /* Attempt to use kernel provided with Blender. */ | |
+ if (!use_adaptive_compilation()) { | |
+ const string cubin = path_get(string_printf("lib/%s_sm_%d%d.cubin", name, major, minor)); | |
+ VLOG(1) << "Testing for pre-compiled kernel " << cubin << "."; | |
+ if (path_exists(cubin)) { | |
+ VLOG(1) << "Using precompiled kernel."; | |
+ return cubin; | |
+ } | |
+ const string ptx = path_get(string_printf("lib/%s_compute_%d%d.ptx", name, major, minor)); | |
+ VLOG(1) << "Testing for pre-compiled kernel " << ptx << "."; | |
+ if (path_exists(ptx)) { | |
+ VLOG(1) << "Using precompiled kernel."; | |
+ return ptx; | |
+ } | |
+ } | |
+ | |
+ const string common_cflags = compile_kernel_get_common_cflags( | |
+ requested_features, filter, split); | |
+ | |
+ /* Try to use locally compiled kernel. */ | |
+ const string source_path = path_get("source"); | |
+ const string kernel_md5 = path_files_md5_hash(source_path); | |
+ | |
+ /* We include cflags into md5 so changing cuda toolkit or changing other | |
+ * compiler command line arguments makes sure cubin gets re-built. | |
+ */ | |
+ const string cubin_md5 = util_md5_string(kernel_md5 + common_cflags); | |
+ | |
+ const string cubin_file = string_printf( | |
+ "cycles_%s_sm%d%d_%s.cubin", name, major, minor, cubin_md5.c_str()); | |
+ const string cubin = path_cache_get(path_join("kernels", cubin_file)); | |
+ VLOG(1) << "Testing for locally compiled kernel " << cubin << "."; | |
+ if (path_exists(cubin)) { | |
+ VLOG(1) << "Using locally compiled kernel."; | |
+ return cubin; | |
+ } | |
+ | |
+#ifdef _WIN32 | |
+ if (have_precompiled_kernels()) { | |
+ if (major < 3) { | |
+ cuda_error_message( | |
+ string_printf("CUDA device requires compute capability 3.0 or up, " | |
+ "found %d.%d. Your GPU is not supported.", | |
+ major, | |
+ minor)); | |
+ } | |
+ else { | |
+ cuda_error_message( | |
+ string_printf("CUDA binary kernel for this graphics card compute " | |
+ "capability (%d.%d) not found.", | |
+ major, | |
+ minor)); | |
+ } | |
+ return ""; | |
+ } | |
+#endif | |
+ | |
+ /* Compile. */ | |
+ if (!compile_check_compiler()) { | |
+ return ""; | |
+ } | |
+ const char *nvcc = cuewCompilerPath(); | |
+ const string kernel = path_join(path_join(source_path, "kernel"), | |
+ path_join("kernels", path_join("cuda", source))); | |
+ double starttime = time_dt(); | |
+ printf("Compiling CUDA kernel ...\n"); | |
+ | |
+ path_create_directories(cubin); | |
+ | |
+ string command = string_printf( | |
+ "\"%s\" " | |
+ "-arch=sm_%d%d " | |
+ "--cubin \"%s\" " | |
+ "-o \"%s\" " | |
+ "%s ", | |
+ nvcc, | |
+ major, | |
+ minor, | |
+ kernel.c_str(), | |
+ cubin.c_str(), | |
+ common_cflags.c_str()); | |
+ | |
+ printf("%s\n", command.c_str()); | |
+ | |
+ if (system(command.c_str()) == -1) { | |
+ cuda_error_message( | |
+ "Failed to execute compilation command, " | |
+ "see console for details."); | |
+ return ""; | |
+ } | |
+ | |
+ /* Verify if compilation succeeded */ | |
+ if (!path_exists(cubin)) { | |
+ cuda_error_message( | |
+ "CUDA kernel compilation failed, " | |
+ "see console for details."); | |
+ return ""; | |
+ } | |
+ | |
+ printf("Kernel compilation finished in %.2lfs.\n", time_dt() - starttime); | |
+ | |
+ return cubin; | |
+ } | |
+ | |
+ bool load_kernels(const DeviceRequestedFeatures &requested_features) | |
+ { | |
+ /* TODO(sergey): Support kernels re-load for CUDA devices. | |
+ * | |
+ * Currently re-loading kernel will invalidate memory pointers, | |
+ * causing problems in cuCtxSynchronize. | |
+ */ | |
+ if (cuFilterModule && cuModule) { | |
+ VLOG(1) << "Skipping kernel reload, not currently supported."; | |
+ return true; | |
+ } | |
+ | |
+ /* check if cuda init succeeded */ | |
+ if (cuContext == 0) | |
+ return false; | |
+ | |
+ /* check if GPU is supported */ | |
+ if (!support_device(requested_features)) | |
+ return false; | |
+ | |
+ /* get kernel */ | |
+ string cubin = compile_kernel(requested_features, false, use_split_kernel()); | |
+ if (cubin == "") | |
+ return false; | |
+ | |
+ string filter_cubin = compile_kernel(requested_features, true, false); | |
+ if (filter_cubin == "") | |
+ return false; | |
+ | |
+ /* open module */ | |
+ CUDAContextScope scope(this); | |
+ | |
+ string cubin_data; | |
+ CUresult result; | |
+ | |
+ if (path_read_text(cubin, cubin_data)) | |
+ result = cuModuleLoadData(&cuModule, cubin_data.c_str()); | |
+ else | |
+ result = CUDA_ERROR_FILE_NOT_FOUND; | |
+ | |
+ if (cuda_error_(result, "cuModuleLoad")) | |
+ cuda_error_message(string_printf("Failed loading CUDA kernel %s.", cubin.c_str())); | |
+ | |
+ if (path_read_text(filter_cubin, cubin_data)) | |
+ result = cuModuleLoadData(&cuFilterModule, cubin_data.c_str()); | |
+ else | |
+ result = CUDA_ERROR_FILE_NOT_FOUND; | |
+ | |
+ if (cuda_error_(result, "cuModuleLoad")) | |
+ cuda_error_message(string_printf("Failed loading CUDA kernel %s.", filter_cubin.c_str())); | |
+ | |
+ if (result == CUDA_SUCCESS) { | |
+ reserve_local_memory(requested_features); | |
+ } | |
+ | |
+ return (result == CUDA_SUCCESS); | |
+ } | |
+ | |
+ void reserve_local_memory(const DeviceRequestedFeatures &requested_features) | |
+ { | |
+ if (use_split_kernel()) { | |
+ /* Split kernel mostly uses global memory and adaptive compilation, | |
+ * difficult to predict how much is needed currently. */ | |
+ return; | |
+ } | |
+ | |
+ /* Together with CU_CTX_LMEM_RESIZE_TO_MAX, this reserves local memory | |
+ * needed for kernel launches, so that we can reliably figure out when | |
+ * to allocate scene data in mapped host memory. */ | |
+ CUDAContextScope scope(this); | |
+ | |
+ size_t total = 0, free_before = 0, free_after = 0; | |
+ cuMemGetInfo(&free_before, &total); | |
+ | |
+ /* Get kernel function. */ | |
+ CUfunction cuPathTrace; | |
+ | |
+ if (requested_features.use_integrator_branched) { | |
+ cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_branched_path_trace")); | |
+ } | |
+ else { | |
+ cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_path_trace")); | |
+ } | |
+ | |
+ cuda_assert(cuFuncSetCacheConfig(cuPathTrace, CU_FUNC_CACHE_PREFER_L1)); | |
+ | |
+ int min_blocks, num_threads_per_block; | |
+ cuda_assert(cuOccupancyMaxPotentialBlockSize( | |
+ &min_blocks, &num_threads_per_block, cuPathTrace, NULL, 0, 0)); | |
+ | |
+ /* Launch kernel, using just 1 block appears sufficient to reserve | |
+ * memory for all multiprocessors. It would be good to do this in | |
+ * parallel for the multi GPU case still to make it faster. */ | |
+ CUdeviceptr d_work_tiles = 0; | |
+ uint total_work_size = 0; | |
+ | |
+ void *args[] = {&d_work_tiles, &total_work_size}; | |
+ | |
+ cuda_assert(cuLaunchKernel(cuPathTrace, 1, 1, 1, num_threads_per_block, 1, 1, 0, 0, args, 0)); | |
+ | |
+ cuda_assert(cuCtxSynchronize()); | |
+ | |
+ cuMemGetInfo(&free_after, &total); | |
+ VLOG(1) << "Local memory reserved " << string_human_readable_number(free_before - free_after) | |
+ << " bytes. (" << string_human_readable_size(free_before - free_after) << ")"; | |
+ | |
+#if 0 | |
+ /* For testing mapped host memory, fill up device memory. */ | |
+ const size_t keep_mb = 1024; | |
+ | |
+ while (free_after > keep_mb * 1024 * 1024LL) { | |
+ CUdeviceptr tmp; | |
+ cuda_assert(cuMemAlloc(&tmp, 10 * 1024 * 1024LL)); | |
+ cuMemGetInfo(&free_after, &total); | |
+ } | |
+#endif | |
+ } | |
+ | |
+ void init_host_memory() | |
+ { | |
+ /* Limit amount of host mapped memory, because allocating too much can | |
+ * cause system instability. Leave at least half or 4 GB of system | |
+ * memory free, whichever is smaller. */ | |
+ size_t default_limit = 4 * 1024 * 1024 * 1024LL; | |
+ size_t system_ram = system_physical_ram(); | |
+ | |
+ if (system_ram > 0) { | |
+ if (system_ram / 2 > default_limit) { | |
+ map_host_limit = system_ram - default_limit; | |
+ } | |
+ else { | |
+ map_host_limit = system_ram / 2; | |
+ } | |
+ } | |
+ else { | |
+ VLOG(1) << "Mapped host memory disabled, failed to get system RAM"; | |
+ map_host_limit = 0; | |
+ } | |
+ | |
+ /* Amount of device memory to keep is free after texture memory | |
+ * and working memory allocations respectively. We set the working | |
+ * memory limit headroom lower so that some space is left after all | |
+ * texture memory allocations. */ | |
+ device_working_headroom = 32 * 1024 * 1024LL; // 32MB | |
+ device_texture_headroom = 128 * 1024 * 1024LL; // 128MB | |
+ | |
+ VLOG(1) << "Mapped host memory limit set to " << string_human_readable_number(map_host_limit) | |
+ << " bytes. (" << string_human_readable_size(map_host_limit) << ")"; | |
+ } | |
+ | |
+ void load_texture_info() | |
+ { | |
+ if (need_texture_info) { | |
+ texture_info.copy_to_device(); | |
+ need_texture_info = false; | |
+ } | |
+ } | |
+ | |
+ void move_textures_to_host(size_t size, bool for_texture) | |
+ { | |
+ /* Signal to reallocate textures in host memory only. */ | |
+ move_texture_to_host = true; | |
+ | |
+ while (size > 0) { | |
+ /* Find suitable memory allocation to move. */ | |
+ device_memory *max_mem = NULL; | |
+ size_t max_size = 0; | |
+ bool max_is_image = false; | |
+ | |
+ foreach (CUDAMemMap::value_type &pair, cuda_mem_map) { | |
+ device_memory &mem = *pair.first; | |
+ CUDAMem *cmem = &pair.second; | |
+ | |
+ bool is_texture = (mem.type == MEM_TEXTURE) && (&mem != &texture_info); | |
+ bool is_image = is_texture && (mem.data_height > 1); | |
+ | |
+ /* Can't move this type of memory. */ | |
+ if (!is_texture || cmem->array) { | |
+ continue; | |
+ } | |
+ | |
+ /* Already in host memory. */ | |
+ if (cmem->use_mapped_host) { | |
+ continue; | |
+ } | |
+ | |
+ /* For other textures, only move image textures. */ | |
+ if (for_texture && !is_image) { | |
+ continue; | |
+ } | |
+ | |
+ /* Try to move largest allocation, prefer moving images. */ | |
+ if (is_image > max_is_image || (is_image == max_is_image && mem.device_size > max_size)) { | |
+ max_is_image = is_image; | |
+ max_size = mem.device_size; | |
+ max_mem = &mem; | |
+ } | |
+ } | |
+ | |
+ /* Move to host memory. This part is mutex protected since | |
+ * multiple CUDA devices could be moving the memory. The | |
+ * first one will do it, and the rest will adopt the pointer. */ | |
+ if (max_mem) { | |
+ VLOG(1) << "Move memory from device to host: " << max_mem->name; | |
+ | |
+ static thread_mutex move_mutex; | |
+ thread_scoped_lock lock(move_mutex); | |
+ | |
+ /* Preserve the original device pointer, in case of multi device | |
+ * we can't change it because the pointer mapping would break. */ | |
+ device_ptr prev_pointer = max_mem->device_pointer; | |
+ size_t prev_size = max_mem->device_size; | |
+ | |
+ tex_free(*max_mem); | |
+ tex_alloc(*max_mem); | |
+ size = (max_size >= size) ? 0 : size - max_size; | |
+ | |
+ max_mem->device_pointer = prev_pointer; | |
+ max_mem->device_size = prev_size; | |
+ } | |
+ else { | |
+ break; | |
+ } | |
+ } | |
+ | |
+ /* Update texture info array with new pointers. */ | |
+ load_texture_info(); | |
+ | |
+ move_texture_to_host = false; | |
+ } | |
+ | |
+ CUDAMem *generic_alloc(device_memory &mem, size_t pitch_padding = 0) | |
+ { | |
+ CUDAContextScope scope(this); | |
+ | |
+ CUdeviceptr device_pointer = 0; | |
+ size_t size = mem.memory_size() + pitch_padding; | |
+ | |
+ CUresult mem_alloc_result = CUDA_ERROR_OUT_OF_MEMORY; | |
+ const char *status = ""; | |
+ | |
+ /* First try allocating in device memory, respecting headroom. We make | |
+ * an exception for texture info. It is small and frequently accessed, | |
+ * so treat it as working memory. | |
+ * | |
+ * If there is not enough room for working memory, we will try to move | |
+ * textures to host memory, assuming the performance impact would have | |
+ * been worse for working memory. */ | |
+ bool is_texture = (mem.type == MEM_TEXTURE) && (&mem != &texture_info); | |
+ bool is_image = is_texture && (mem.data_height > 1); | |
+ | |
+ size_t headroom = (is_texture) ? device_texture_headroom : device_working_headroom; | |
+ | |
+ size_t total = 0, free = 0; | |
+ cuMemGetInfo(&free, &total); | |
+ | |
+ /* Move textures to host memory if needed. */ | |
+ if (!move_texture_to_host && !is_image && (size + headroom) >= free && can_map_host) { | |
+ move_textures_to_host(size + headroom - free, is_texture); | |
+ cuMemGetInfo(&free, &total); | |
+ } | |
+ | |
+ /* Allocate in device memory. */ | |
+ if (!move_texture_to_host && (size + headroom) < free) { | |
+ mem_alloc_result = cuMemAlloc(&device_pointer, size); | |
+ if (mem_alloc_result == CUDA_SUCCESS) { | |
+ status = " in device memory"; | |
+ } | |
+ } | |
+ | |
+ /* Fall back to mapped host memory if needed and possible. */ | |
+ | |
+ void *shared_pointer = 0; | |
+ | |
+ if (mem_alloc_result != CUDA_SUCCESS && can_map_host) { | |
+ if (mem.shared_pointer) { | |
+ /* Another device already allocated host memory. */ | |
+ mem_alloc_result = CUDA_SUCCESS; | |
+ shared_pointer = mem.shared_pointer; | |
+ } | |
+ else if (map_host_used + size < map_host_limit) { | |
+ /* Allocate host memory ourselves. */ | |
+ mem_alloc_result = cuMemHostAlloc( | |
+ &shared_pointer, size, CU_MEMHOSTALLOC_DEVICEMAP | CU_MEMHOSTALLOC_WRITECOMBINED); | |
+ | |
+ assert((mem_alloc_result == CUDA_SUCCESS && shared_pointer != 0) || | |
+ (mem_alloc_result != CUDA_SUCCESS && shared_pointer == 0)); | |
+ } | |
+ | |
+ if (mem_alloc_result == CUDA_SUCCESS) { | |
+ cuda_assert(cuMemHostGetDevicePointer_v2(&device_pointer, shared_pointer, 0)); | |
+ map_host_used += size; | |
+ status = " in host memory"; | |
+ } | |
+ else { | |
+ status = " failed, out of host memory"; | |
+ } | |
+ } | |
+ | |
+ if (mem_alloc_result != CUDA_SUCCESS) { | |
+ status = " failed, out of device and host memory"; | |
+ cuda_assert(mem_alloc_result); | |
+ } | |
+ | |
+ if (mem.name) { | |
+ VLOG(1) << "Buffer allocate: " << mem.name << ", " | |
+ << string_human_readable_number(mem.memory_size()) << " bytes. (" | |
+ << string_human_readable_size(mem.memory_size()) << ")" << status; | |
+ } | |
+ | |
+ mem.device_pointer = (device_ptr)device_pointer; | |
+ mem.device_size = size; | |
+ stats.mem_alloc(size); | |
+ | |
+ if (!mem.device_pointer) { | |
+ return NULL; | |
+ } | |
+ | |
+ /* Insert into map of allocations. */ | |
+ CUDAMem *cmem = &cuda_mem_map[&mem]; | |
+ if (shared_pointer != 0) { | |
+ /* Replace host pointer with our host allocation. Only works if | |
+ * CUDA memory layout is the same and has no pitch padding. Also | |
+ * does not work if we move textures to host during a render, | |
+ * since other devices might be using the memory. */ | |
+ | |
+ if (!move_texture_to_host && pitch_padding == 0 && mem.host_pointer && | |
+ mem.host_pointer != shared_pointer) { | |
+ memcpy(shared_pointer, mem.host_pointer, size); | |
+ | |
+ /* A Call to device_memory::host_free() should be preceded by | |
+ * a call to device_memory::device_free() for host memory | |
+ * allocated by a device to be handled properly. Two exceptions | |
+ * are here and a call in OptiXDevice::generic_alloc(), where | |
+ * the current host memory can be assumed to be allocated by | |
+ * device_memory::host_alloc(), not by a device */ | |
+ | |
+ mem.host_free(); | |
+ mem.host_pointer = shared_pointer; | |
+ } | |
+ mem.shared_pointer = shared_pointer; | |
+ mem.shared_counter++; | |
+ cmem->use_mapped_host = true; | |
+ } | |
+ else { | |
+ cmem->use_mapped_host = false; | |
+ } | |
+ | |
+ return cmem; | |
+ } | |
+ | |
+ void generic_copy_to(device_memory &mem) | |
+ { | |
+ if (mem.host_pointer && mem.device_pointer) { | |
+ CUDAContextScope scope(this); | |
+ | |
+ /* If use_mapped_host of mem is false, the current device only | |
+ * uses device memory allocated by cuMemAlloc regardless of | |
+ * mem.host_pointer and mem.shared_pointer, and should copy | |
+ * data from mem.host_pointer. */ | |
+ | |
+ if (cuda_mem_map[&mem].use_mapped_host == false || mem.host_pointer != mem.shared_pointer) { | |
+ cuda_assert(cuMemcpyHtoD( | |
+ cuda_device_ptr(mem.device_pointer), mem.host_pointer, mem.memory_size())); | |
+ } | |
+ } | |
+ } | |
+ | |
+ void generic_free(device_memory &mem) | |
+ { | |
+ if (mem.device_pointer) { | |
+ CUDAContextScope scope(this); | |
+ const CUDAMem &cmem = cuda_mem_map[&mem]; | |
+ | |
+ /* If cmem.use_mapped_host is true, reference counting is used | |
+ * to safely free a mapped host memory. */ | |
+ | |
+ if (cmem.use_mapped_host) { | |
+ assert(mem.shared_pointer); | |
+ if (mem.shared_pointer) { | |
+ assert(mem.shared_counter > 0); | |
+ if (--mem.shared_counter == 0) { | |
+ if (mem.host_pointer == mem.shared_pointer) { | |
+ mem.host_pointer = 0; | |
+ } | |
+ cuMemFreeHost(mem.shared_pointer); | |
+ mem.shared_pointer = 0; | |
+ } | |
+ } | |
+ map_host_used -= mem.device_size; | |
+ } | |
+ else { | |
+ /* Free device memory. */ | |
+ cuMemFree(mem.device_pointer); | |
+ } | |
+ | |
+ stats.mem_free(mem.device_size); | |
+ mem.device_pointer = 0; | |
+ mem.device_size = 0; | |
+ | |
+ cuda_mem_map.erase(cuda_mem_map.find(&mem)); | |
+ } | |
+ } | |
+ | |
+ void mem_alloc(device_memory &mem) | |
+ { | |
+ if (mem.type == MEM_PIXELS && !background) { | |
+ pixels_alloc(mem); | |
+ } | |
+ else if (mem.type == MEM_TEXTURE) { | |
+ assert(!"mem_alloc not supported for textures."); | |
+ } | |
+ else { | |
+ generic_alloc(mem); | |
+ } | |
+ } | |
+ | |
+ void mem_copy_to(device_memory &mem) | |
+ { | |
+ if (mem.type == MEM_PIXELS) { | |
+ assert(!"mem_copy_to not supported for pixels."); | |
+ } | |
+ else if (mem.type == MEM_TEXTURE) { | |
+ tex_free(mem); | |
+ tex_alloc(mem); | |
+ } | |
+ else { | |
+ if (!mem.device_pointer) { | |
+ generic_alloc(mem); | |
+ } | |
+ | |
+ generic_copy_to(mem); | |
+ } | |
+ } | |
+ | |
+ void mem_copy_from(device_memory &mem, int y, int w, int h, int elem) | |
+ { | |
+ if (mem.type == MEM_PIXELS && !background) { | |
+ pixels_copy_from(mem, y, w, h); | |
+ } | |
+ else if (mem.type == MEM_TEXTURE) { | |
+ assert(!"mem_copy_from not supported for textures."); | |
+ } | |
+ else { | |
+ CUDAContextScope scope(this); | |
+ size_t offset = elem * y * w; | |
+ size_t size = elem * w * h; | |
+ | |
+ if (mem.host_pointer && mem.device_pointer) { | |
+ cuda_assert(cuMemcpyDtoH( | |
+ (uchar *)mem.host_pointer + offset, (CUdeviceptr)(mem.device_pointer + offset), size)); | |
+ } | |
+ else if (mem.host_pointer) { | |
+ memset((char *)mem.host_pointer + offset, 0, size); | |
+ } | |
+ } | |
+ } | |
+ | |
+ void mem_zero(device_memory &mem) | |
+ { | |
+ if (!mem.device_pointer) { | |
+ mem_alloc(mem); | |
+ } | |
+ | |
+ if (mem.host_pointer) { | |
+ memset(mem.host_pointer, 0, mem.memory_size()); | |
+ } | |
+ | |
+ /* If use_mapped_host of mem is false, mem.device_pointer currently | |
+ * refers to device memory regardless of mem.host_pointer and | |
+ * mem.shared_pointer. */ | |
+ | |
+ if (mem.device_pointer && | |
+ (cuda_mem_map[&mem].use_mapped_host == false || mem.host_pointer != mem.shared_pointer)) { | |
+ CUDAContextScope scope(this); | |
+ cuda_assert(cuMemsetD8(cuda_device_ptr(mem.device_pointer), 0, mem.memory_size())); | |
+ } | |
+ } | |
+ | |
+ void mem_free(device_memory &mem) | |
+ { | |
+ if (mem.type == MEM_PIXELS && !background) { | |
+ pixels_free(mem); | |
+ } | |
+ else if (mem.type == MEM_TEXTURE) { | |
+ tex_free(mem); | |
+ } | |
+ else { | |
+ generic_free(mem); | |
+ } | |
+ } | |
+ | |
+ virtual device_ptr mem_alloc_sub_ptr(device_memory &mem, int offset, int /*size*/) | |
+ { | |
+ return (device_ptr)(((char *)mem.device_pointer) + mem.memory_elements_size(offset)); | |
+ } | |
+ | |
+ void const_copy_to(const char *name, void *host, size_t size) | |
+ { | |
+ CUDAContextScope scope(this); | |
+ CUdeviceptr mem; | |
+ size_t bytes; | |
+ | |
+ cuda_assert(cuModuleGetGlobal(&mem, &bytes, cuModule, name)); | |
+ // assert(bytes == size); | |
+ cuda_assert(cuMemcpyHtoD(mem, host, size)); | |
+ } | |
+ | |
+ void tex_alloc(device_memory &mem) | |
+ { | |
+ CUDAContextScope scope(this); | |
+ | |
+ /* General variables for both architectures */ | |
+ string bind_name = mem.name; | |
+ size_t dsize = datatype_size(mem.data_type); | |
+ size_t size = mem.memory_size(); | |
+ | |
+ CUaddress_mode address_mode = CU_TR_ADDRESS_MODE_WRAP; | |
+ switch (mem.extension) { | |
+ case EXTENSION_REPEAT: | |
+ address_mode = CU_TR_ADDRESS_MODE_WRAP; | |
+ break; | |
+ case EXTENSION_EXTEND: | |
+ address_mode = CU_TR_ADDRESS_MODE_CLAMP; | |
+ break; | |
+ case EXTENSION_CLIP: | |
+ address_mode = CU_TR_ADDRESS_MODE_BORDER; | |
+ break; | |
+ default: | |
+ assert(0); | |
+ break; | |
+ } | |
+ | |
+ CUfilter_mode filter_mode; | |
+ if (mem.interpolation == INTERPOLATION_CLOSEST) { | |
+ filter_mode = CU_TR_FILTER_MODE_POINT; | |
+ } | |
+ else { | |
+ filter_mode = CU_TR_FILTER_MODE_LINEAR; | |
+ } | |
+ | |
+ /* Data Storage */ | |
+ if (mem.interpolation == INTERPOLATION_NONE) { | |
+ generic_alloc(mem); | |
+ generic_copy_to(mem); | |
+ | |
+ CUdeviceptr cumem; | |
+ size_t cubytes; | |
+ | |
+ cuda_assert(cuModuleGetGlobal(&cumem, &cubytes, cuModule, bind_name.c_str())); | |
+ | |
+ if (cubytes == 8) { | |
+ /* 64 bit device pointer */ | |
+ uint64_t ptr = mem.device_pointer; | |
+ cuda_assert(cuMemcpyHtoD(cumem, (void *)&ptr, cubytes)); | |
+ } | |
+ else { | |
+ /* 32 bit device pointer */ | |
+ uint32_t ptr = (uint32_t)mem.device_pointer; | |
+ cuda_assert(cuMemcpyHtoD(cumem, (void *)&ptr, cubytes)); | |
+ } | |
+ return; | |
+ } | |
+ | |
+ /* Image Texture Storage */ | |
+ CUarray_format_enum format; | |
+ switch (mem.data_type) { | |
+ case TYPE_UCHAR: | |
+ format = CU_AD_FORMAT_UNSIGNED_INT8; | |
+ break; | |
+ case TYPE_UINT16: | |
+ format = CU_AD_FORMAT_UNSIGNED_INT16; | |
+ break; | |
+ case TYPE_UINT: | |
+ format = CU_AD_FORMAT_UNSIGNED_INT32; | |
+ break; | |
+ case TYPE_INT: | |
+ format = CU_AD_FORMAT_SIGNED_INT32; | |
+ break; | |
+ case TYPE_FLOAT: | |
+ format = CU_AD_FORMAT_FLOAT; | |
+ break; | |
+ case TYPE_HALF: | |
+ format = CU_AD_FORMAT_HALF; | |
+ break; | |
+ default: | |
+ assert(0); | |
+ return; | |
+ } | |
+ | |
+ CUDAMem *cmem = NULL; | |
+ CUarray array_3d = NULL; | |
+ size_t src_pitch = mem.data_width * dsize * mem.data_elements; | |
+ size_t dst_pitch = src_pitch; | |
+ | |
+ if (mem.data_depth > 1) { | |
+ /* 3D texture using array, there is no API for linear memory. */ | |
+ CUDA_ARRAY3D_DESCRIPTOR desc; | |
+ | |
+ desc.Width = mem.data_width; | |
+ desc.Height = mem.data_height; | |
+ desc.Depth = mem.data_depth; | |
+ desc.Format = format; | |
+ desc.NumChannels = mem.data_elements; | |
+ desc.Flags = 0; | |
+ | |
+ VLOG(1) << "Array 3D allocate: " << mem.name << ", " | |
+ << string_human_readable_number(mem.memory_size()) << " bytes. (" | |
+ << string_human_readable_size(mem.memory_size()) << ")"; | |
+ | |
+ cuda_assert(cuArray3DCreate(&array_3d, &desc)); | |
+ | |
+ if (!array_3d) { | |
+ return; | |
+ } | |
+ | |
+ CUDA_MEMCPY3D param; | |
+ memset(¶m, 0, sizeof(param)); | |
+ param.dstMemoryType = CU_MEMORYTYPE_ARRAY; | |
+ param.dstArray = array_3d; | |
+ param.srcMemoryType = CU_MEMORYTYPE_HOST; | |
+ param.srcHost = mem.host_pointer; | |
+ param.srcPitch = src_pitch; | |
+ param.WidthInBytes = param.srcPitch; | |
+ param.Height = mem.data_height; | |
+ param.Depth = mem.data_depth; | |
+ | |
+ cuda_assert(cuMemcpy3D(¶m)); | |
+ | |
+ mem.device_pointer = (device_ptr)array_3d; | |
+ mem.device_size = size; | |
+ stats.mem_alloc(size); | |
+ | |
+ cmem = &cuda_mem_map[&mem]; | |
+ cmem->texobject = 0; | |
+ cmem->array = array_3d; | |
+ } | |
+ else if (mem.data_height > 0) { | |
+ /* 2D texture, using pitch aligned linear memory. */ | |
+ int alignment = 0; | |
+ cuda_assert( | |
+ cuDeviceGetAttribute(&alignment, CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT, cuDevice)); | |
+ dst_pitch = align_up(src_pitch, alignment); | |
+ size_t dst_size = dst_pitch * mem.data_height; | |
+ | |
+ cmem = generic_alloc(mem, dst_size - mem.memory_size()); | |
+ if (!cmem) { | |
+ return; | |
+ } | |
+ | |
+ CUDA_MEMCPY2D param; | |
+ memset(¶m, 0, sizeof(param)); | |
+ param.dstMemoryType = CU_MEMORYTYPE_DEVICE; | |
+ param.dstDevice = mem.device_pointer; | |
+ param.dstPitch = dst_pitch; | |
+ param.srcMemoryType = CU_MEMORYTYPE_HOST; | |
+ param.srcHost = mem.host_pointer; | |
+ param.srcPitch = src_pitch; | |
+ param.WidthInBytes = param.srcPitch; | |
+ param.Height = mem.data_height; | |
+ | |
+ cuda_assert(cuMemcpy2DUnaligned(¶m)); | |
+ } | |
+ else { | |
+ /* 1D texture, using linear memory. */ | |
+ cmem = generic_alloc(mem); | |
+ if (!cmem) { | |
+ return; | |
+ } | |
+ | |
+ cuda_assert(cuMemcpyHtoD(mem.device_pointer, mem.host_pointer, size)); | |
+ } | |
+ | |
+ /* Kepler+, bindless textures. */ | |
+ int flat_slot = 0; | |
+ if (string_startswith(mem.name, "__tex_image")) { | |
+ int pos = string(mem.name).rfind("_"); | |
+ flat_slot = atoi(mem.name + pos + 1); | |
+ } | |
+ else { | |
+ assert(0); | |
+ } | |
+ | |
+ CUDA_RESOURCE_DESC resDesc; | |
+ memset(&resDesc, 0, sizeof(resDesc)); | |
+ | |
+ if (array_3d) { | |
+ resDesc.resType = CU_RESOURCE_TYPE_ARRAY; | |
+ resDesc.res.array.hArray = array_3d; | |
+ resDesc.flags = 0; | |
+ } | |
+ else if (mem.data_height > 0) { | |
+ resDesc.resType = CU_RESOURCE_TYPE_PITCH2D; | |
+ resDesc.res.pitch2D.devPtr = mem.device_pointer; | |
+ resDesc.res.pitch2D.format = format; | |
+ resDesc.res.pitch2D.numChannels = mem.data_elements; | |
+ resDesc.res.pitch2D.height = mem.data_height; | |
+ resDesc.res.pitch2D.width = mem.data_width; | |
+ resDesc.res.pitch2D.pitchInBytes = dst_pitch; | |
+ } | |
+ else { | |
+ resDesc.resType = CU_RESOURCE_TYPE_LINEAR; | |
+ resDesc.res.linear.devPtr = mem.device_pointer; | |
+ resDesc.res.linear.format = format; | |
+ resDesc.res.linear.numChannels = mem.data_elements; | |
+ resDesc.res.linear.sizeInBytes = mem.device_size; | |
+ } | |
+ | |
+ CUDA_TEXTURE_DESC texDesc; | |
+ memset(&texDesc, 0, sizeof(texDesc)); | |
+ texDesc.addressMode[0] = address_mode; | |
+ texDesc.addressMode[1] = address_mode; | |
+ texDesc.addressMode[2] = address_mode; | |
+ texDesc.filterMode = filter_mode; | |
+ texDesc.flags = CU_TRSF_NORMALIZED_COORDINATES; | |
+ | |
+ cuda_assert(cuTexObjectCreate(&cmem->texobject, &resDesc, &texDesc, NULL)); | |
+ | |
+ /* Resize once */ | |
+ if (flat_slot >= texture_info.size()) { | |
+ /* Allocate some slots in advance, to reduce amount | |
+ * of re-allocations. */ | |
+ texture_info.resize(flat_slot + 128); | |
+ } | |
+ | |
+ /* Set Mapping and tag that we need to (re-)upload to device */ | |
+ TextureInfo &info = texture_info[flat_slot]; | |
+ info.data = (uint64_t)cmem->texobject; | |
+ info.cl_buffer = 0; | |
+ info.interpolation = mem.interpolation; | |
+ info.extension = mem.extension; | |
+ info.width = mem.data_width; | |
+ info.height = mem.data_height; | |
+ info.depth = mem.data_depth; | |
+ need_texture_info = true; | |
+ } | |
+ | |
+ void tex_free(device_memory &mem) | |
+ { | |
+ if (mem.device_pointer) { | |
+ CUDAContextScope scope(this); | |
+ const CUDAMem &cmem = cuda_mem_map[&mem]; | |
+ | |
+ if (cmem.texobject) { | |
+ /* Free bindless texture. */ | |
+ cuTexObjectDestroy(cmem.texobject); | |
+ } | |
+ | |
+ if (cmem.array) { | |
+ /* Free array. */ | |
+ cuArrayDestroy(cmem.array); | |
+ stats.mem_free(mem.device_size); | |
+ mem.device_pointer = 0; | |
+ mem.device_size = 0; | |
+ | |
+ cuda_mem_map.erase(cuda_mem_map.find(&mem)); | |
+ } | |
+ else { | |
+ generic_free(mem); | |
+ } | |
+ } | |
+ } | |
+ | |
+#define CUDA_GET_BLOCKSIZE(func, w, h) \ | |
+ int threads_per_block; \ | |
+ cuda_assert( \ | |
+ cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, func)); \ | |
+ int threads = (int)sqrt((float)threads_per_block); \ | |
+ int xblocks = ((w) + threads - 1) / threads; \ | |
+ int yblocks = ((h) + threads - 1) / threads; | |
+ | |
+#define CUDA_LAUNCH_KERNEL(func, args) \ | |
+ cuda_assert(cuLaunchKernel(func, xblocks, yblocks, 1, threads, threads, 1, 0, 0, args, 0)); | |
+ | |
+/* Similar as above, but for 1-dimensional blocks. */ | |
+#define CUDA_GET_BLOCKSIZE_1D(func, w, h) \ | |
+ int threads_per_block; \ | |
+ cuda_assert( \ | |
+ cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, func)); \ | |
+ int xblocks = ((w) + threads_per_block - 1) / threads_per_block; \ | |
+ int yblocks = h; | |
+ | |
+#define CUDA_LAUNCH_KERNEL_1D(func, args) \ | |
+ cuda_assert(cuLaunchKernel(func, xblocks, yblocks, 1, threads_per_block, 1, 1, 0, 0, args, 0)); | |
+ | |
+ bool denoising_non_local_means(device_ptr image_ptr, | |
+ device_ptr guide_ptr, | |
+ device_ptr variance_ptr, | |
+ device_ptr out_ptr, | |
+ DenoisingTask *task) | |
+ { | |
+ if (have_error()) | |
+ return false; | |
+ | |
+ CUDAContextScope scope(this); | |
+ | |
+ int stride = task->buffer.stride; | |
+ int w = task->buffer.width; | |
+ int h = task->buffer.h; | |
+ int r = task->nlm_state.r; | |
+ int f = task->nlm_state.f; | |
+ float a = task->nlm_state.a; | |
+ float k_2 = task->nlm_state.k_2; | |
+ | |
+ int pass_stride = task->buffer.pass_stride; | |
+ int num_shifts = (2 * r + 1) * (2 * r + 1); | |
+ int channel_offset = task->nlm_state.is_color ? task->buffer.pass_stride : 0; | |
+ int frame_offset = 0; | |
+ | |
+ if (have_error()) | |
+ return false; | |
+ | |
+ CUdeviceptr difference = cuda_device_ptr(task->buffer.temporary_mem.device_pointer); | |
+ CUdeviceptr blurDifference = difference + sizeof(float) * pass_stride * num_shifts; | |
+ CUdeviceptr weightAccum = difference + 2 * sizeof(float) * pass_stride * num_shifts; | |
+ CUdeviceptr scale_ptr = 0; | |
+ | |
+ cuda_assert(cuMemsetD8(weightAccum, 0, sizeof(float) * pass_stride)); | |
+ cuda_assert(cuMemsetD8(out_ptr, 0, sizeof(float) * pass_stride)); | |
+ | |
+ { | |
+ CUfunction cuNLMCalcDifference, cuNLMBlur, cuNLMCalcWeight, cuNLMUpdateOutput; | |
+ cuda_assert(cuModuleGetFunction( | |
+ &cuNLMCalcDifference, cuFilterModule, "kernel_cuda_filter_nlm_calc_difference")); | |
+ cuda_assert(cuModuleGetFunction(&cuNLMBlur, cuFilterModule, "kernel_cuda_filter_nlm_blur")); | |
+ cuda_assert(cuModuleGetFunction( | |
+ &cuNLMCalcWeight, cuFilterModule, "kernel_cuda_filter_nlm_calc_weight")); | |
+ cuda_assert(cuModuleGetFunction( | |
+ &cuNLMUpdateOutput, cuFilterModule, "kernel_cuda_filter_nlm_update_output")); | |
+ | |
+ cuda_assert(cuFuncSetCacheConfig(cuNLMCalcDifference, CU_FUNC_CACHE_PREFER_L1)); | |
+ cuda_assert(cuFuncSetCacheConfig(cuNLMBlur, CU_FUNC_CACHE_PREFER_L1)); | |
+ cuda_assert(cuFuncSetCacheConfig(cuNLMCalcWeight, CU_FUNC_CACHE_PREFER_L1)); | |
+ cuda_assert(cuFuncSetCacheConfig(cuNLMUpdateOutput, CU_FUNC_CACHE_PREFER_L1)); | |
+ | |
+ CUDA_GET_BLOCKSIZE_1D(cuNLMCalcDifference, w * h, num_shifts); | |
+ | |
+ void *calc_difference_args[] = {&guide_ptr, | |
+ &variance_ptr, | |
+ &scale_ptr, | |
+ &difference, | |
+ &w, | |
+ &h, | |
+ &stride, | |
+ &pass_stride, | |
+ &r, | |
+ &channel_offset, | |
+ &frame_offset, | |
+ &a, | |
+ &k_2}; | |
+ void *blur_args[] = {&difference, &blurDifference, &w, &h, &stride, &pass_stride, &r, &f}; | |
+ void *calc_weight_args[] = { | |
+ &blurDifference, &difference, &w, &h, &stride, &pass_stride, &r, &f}; | |
+ void *update_output_args[] = {&blurDifference, | |
+ &image_ptr, | |
+ &out_ptr, | |
+ &weightAccum, | |
+ &w, | |
+ &h, | |
+ &stride, | |
+ &pass_stride, | |
+ &channel_offset, | |
+ &r, | |
+ &f}; | |
+ | |
+ CUDA_LAUNCH_KERNEL_1D(cuNLMCalcDifference, calc_difference_args); | |
+ CUDA_LAUNCH_KERNEL_1D(cuNLMBlur, blur_args); | |
+ CUDA_LAUNCH_KERNEL_1D(cuNLMCalcWeight, calc_weight_args); | |
+ CUDA_LAUNCH_KERNEL_1D(cuNLMBlur, blur_args); | |
+ CUDA_LAUNCH_KERNEL_1D(cuNLMUpdateOutput, update_output_args); | |
+ } | |
+ | |
+ { | |
+ CUfunction cuNLMNormalize; | |
+ cuda_assert(cuModuleGetFunction( | |
+ &cuNLMNormalize, cuFilterModule, "kernel_cuda_filter_nlm_normalize")); | |
+ cuda_assert(cuFuncSetCacheConfig(cuNLMNormalize, CU_FUNC_CACHE_PREFER_L1)); | |
+ void *normalize_args[] = {&out_ptr, &weightAccum, &w, &h, &stride}; | |
+ CUDA_GET_BLOCKSIZE(cuNLMNormalize, w, h); | |
+ CUDA_LAUNCH_KERNEL(cuNLMNormalize, normalize_args); | |
+ cuda_assert(cuCtxSynchronize()); | |
+ } | |
+ | |
+ return !have_error(); | |
+ } | |
+ | |
+ bool denoising_construct_transform(DenoisingTask *task) | |
+ { | |
+ if (have_error()) | |
+ return false; | |
+ | |
+ CUDAContextScope scope(this); | |
+ | |
+ CUfunction cuFilterConstructTransform; | |
+ cuda_assert(cuModuleGetFunction( | |
+ &cuFilterConstructTransform, cuFilterModule, "kernel_cuda_filter_construct_transform")); | |
+ cuda_assert(cuFuncSetCacheConfig(cuFilterConstructTransform, CU_FUNC_CACHE_PREFER_SHARED)); | |
+ CUDA_GET_BLOCKSIZE(cuFilterConstructTransform, task->storage.w, task->storage.h); | |
+ | |
+ void *args[] = {&task->buffer.mem.device_pointer, | |
+ &task->tile_info_mem.device_pointer, | |
+ &task->storage.transform.device_pointer, | |
+ &task->storage.rank.device_pointer, | |
+ &task->filter_area, | |
+ &task->rect, | |
+ &task->radius, | |
+ &task->pca_threshold, | |
+ &task->buffer.pass_stride, | |
+ &task->buffer.frame_stride, | |
+ &task->buffer.use_time}; | |
+ CUDA_LAUNCH_KERNEL(cuFilterConstructTransform, args); | |
+ cuda_assert(cuCtxSynchronize()); | |
+ | |
+ return !have_error(); | |
+ } | |
+ | |
+ bool denoising_accumulate(device_ptr color_ptr, | |
+ device_ptr color_variance_ptr, | |
+ device_ptr scale_ptr, | |
+ int frame, | |
+ DenoisingTask *task) | |
+ { | |
+ if (have_error()) | |
+ return false; | |
+ | |
+ CUDAContextScope scope(this); | |
+ | |
+ int r = task->radius; | |
+ int f = 4; | |
+ float a = 1.0f; | |
+ float k_2 = task->nlm_k_2; | |
+ | |
+ int w = task->reconstruction_state.source_w; | |
+ int h = task->reconstruction_state.source_h; | |
+ int stride = task->buffer.stride; | |
+ int frame_offset = frame * task->buffer.frame_stride; | |
+ int t = task->tile_info->frames[frame]; | |
+ | |
+ int pass_stride = task->buffer.pass_stride; | |
+ int num_shifts = (2 * r + 1) * (2 * r + 1); | |
+ | |
+ if (have_error()) | |
+ return false; | |
+ | |
+ CUdeviceptr difference = cuda_device_ptr(task->buffer.temporary_mem.device_pointer); | |
+ CUdeviceptr blurDifference = difference + sizeof(float) * pass_stride * num_shifts; | |
+ | |
+ CUfunction cuNLMCalcDifference, cuNLMBlur, cuNLMCalcWeight, cuNLMConstructGramian; | |
+ cuda_assert(cuModuleGetFunction( | |
+ &cuNLMCalcDifference, cuFilterModule, "kernel_cuda_filter_nlm_calc_difference")); | |
+ cuda_assert(cuModuleGetFunction(&cuNLMBlur, cuFilterModule, "kernel_cuda_filter_nlm_blur")); | |
+ cuda_assert(cuModuleGetFunction( | |
+ &cuNLMCalcWeight, cuFilterModule, "kernel_cuda_filter_nlm_calc_weight")); | |
+ cuda_assert(cuModuleGetFunction( | |
+ &cuNLMConstructGramian, cuFilterModule, "kernel_cuda_filter_nlm_construct_gramian")); | |
+ | |
+ cuda_assert(cuFuncSetCacheConfig(cuNLMCalcDifference, CU_FUNC_CACHE_PREFER_L1)); | |
+ cuda_assert(cuFuncSetCacheConfig(cuNLMBlur, CU_FUNC_CACHE_PREFER_L1)); | |
+ cuda_assert(cuFuncSetCacheConfig(cuNLMCalcWeight, CU_FUNC_CACHE_PREFER_L1)); | |
+ cuda_assert(cuFuncSetCacheConfig(cuNLMConstructGramian, CU_FUNC_CACHE_PREFER_SHARED)); | |
+ | |
+ CUDA_GET_BLOCKSIZE_1D(cuNLMCalcDifference, | |
+ task->reconstruction_state.source_w * | |
+ task->reconstruction_state.source_h, | |
+ num_shifts); | |
+ | |
+ void *calc_difference_args[] = {&color_ptr, | |
+ &color_variance_ptr, | |
+ &scale_ptr, | |
+ &difference, | |
+ &w, | |
+ &h, | |
+ &stride, | |
+ &pass_stride, | |
+ &r, | |
+ &pass_stride, | |
+ &frame_offset, | |
+ &a, | |
+ &k_2}; | |
+ void *blur_args[] = {&difference, &blurDifference, &w, &h, &stride, &pass_stride, &r, &f}; | |
+ void *calc_weight_args[] = { | |
+ &blurDifference, &difference, &w, &h, &stride, &pass_stride, &r, &f}; | |
+ void *construct_gramian_args[] = {&t, | |
+ &blurDifference, | |
+ &task->buffer.mem.device_pointer, | |
+ &task->storage.transform.device_pointer, | |
+ &task->storage.rank.device_pointer, | |
+ &task->storage.XtWX.device_pointer, | |
+ &task->storage.XtWY.device_pointer, | |
+ &task->reconstruction_state.filter_window, | |
+ &w, | |
+ &h, | |
+ &stride, | |
+ &pass_stride, | |
+ &r, | |
+ &f, | |
+ &frame_offset, | |
+ &task->buffer.use_time}; | |
+ | |
+ CUDA_LAUNCH_KERNEL_1D(cuNLMCalcDifference, calc_difference_args); | |
+ CUDA_LAUNCH_KERNEL_1D(cuNLMBlur, blur_args); | |
+ CUDA_LAUNCH_KERNEL_1D(cuNLMCalcWeight, calc_weight_args); | |
+ CUDA_LAUNCH_KERNEL_1D(cuNLMBlur, blur_args); | |
+ CUDA_LAUNCH_KERNEL_1D(cuNLMConstructGramian, construct_gramian_args); | |
+ cuda_assert(cuCtxSynchronize()); | |
+ | |
+ return !have_error(); | |
+ } | |
+ | |
+ bool denoising_solve(device_ptr output_ptr, DenoisingTask *task) | |
+ { | |
+ CUfunction cuFinalize; | |
+ cuda_assert(cuModuleGetFunction(&cuFinalize, cuFilterModule, "kernel_cuda_filter_finalize")); | |
+ cuda_assert(cuFuncSetCacheConfig(cuFinalize, CU_FUNC_CACHE_PREFER_L1)); | |
+ void *finalize_args[] = {&output_ptr, | |
+ &task->storage.rank.device_pointer, | |
+ &task->storage.XtWX.device_pointer, | |
+ &task->storage.XtWY.device_pointer, | |
+ &task->filter_area, | |
+ &task->reconstruction_state.buffer_params.x, | |
+ &task->render_buffer.samples}; | |
+ CUDA_GET_BLOCKSIZE( | |
+ cuFinalize, task->reconstruction_state.source_w, task->reconstruction_state.source_h); | |
+ CUDA_LAUNCH_KERNEL(cuFinalize, finalize_args); | |
+ cuda_assert(cuCtxSynchronize()); | |
+ | |
+ return !have_error(); | |
+ } | |
+ | |
+ bool denoising_combine_halves(device_ptr a_ptr, | |
+ device_ptr b_ptr, | |
+ device_ptr mean_ptr, | |
+ device_ptr variance_ptr, | |
+ int r, | |
+ int4 rect, | |
+ DenoisingTask *task) | |
+ { | |
+ if (have_error()) | |
+ return false; | |
+ | |
+ CUDAContextScope scope(this); | |
+ | |
+ CUfunction cuFilterCombineHalves; | |
+ cuda_assert(cuModuleGetFunction( | |
+ &cuFilterCombineHalves, cuFilterModule, "kernel_cuda_filter_combine_halves")); | |
+ cuda_assert(cuFuncSetCacheConfig(cuFilterCombineHalves, CU_FUNC_CACHE_PREFER_L1)); | |
+ CUDA_GET_BLOCKSIZE( | |
+ cuFilterCombineHalves, task->rect.z - task->rect.x, task->rect.w - task->rect.y); | |
+ | |
+ void *args[] = {&mean_ptr, &variance_ptr, &a_ptr, &b_ptr, &rect, &r}; | |
+ CUDA_LAUNCH_KERNEL(cuFilterCombineHalves, args); | |
+ cuda_assert(cuCtxSynchronize()); | |
+ | |
+ return !have_error(); | |
+ } | |
+ | |
+ bool denoising_divide_shadow(device_ptr a_ptr, | |
+ device_ptr b_ptr, | |
+ device_ptr sample_variance_ptr, | |
+ device_ptr sv_variance_ptr, | |
+ device_ptr buffer_variance_ptr, | |
+ DenoisingTask *task) | |
+ { | |
+ if (have_error()) | |
+ return false; | |
+ | |
+ CUDAContextScope scope(this); | |
+ | |
+ CUfunction cuFilterDivideShadow; | |
+ cuda_assert(cuModuleGetFunction( | |
+ &cuFilterDivideShadow, cuFilterModule, "kernel_cuda_filter_divide_shadow")); | |
+ cuda_assert(cuFuncSetCacheConfig(cuFilterDivideShadow, CU_FUNC_CACHE_PREFER_L1)); | |
+ CUDA_GET_BLOCKSIZE( | |
+ cuFilterDivideShadow, task->rect.z - task->rect.x, task->rect.w - task->rect.y); | |
+ | |
+ void *args[] = {&task->render_buffer.samples, | |
+ &task->tile_info_mem.device_pointer, | |
+ &a_ptr, | |
+ &b_ptr, | |
+ &sample_variance_ptr, | |
+ &sv_variance_ptr, | |
+ &buffer_variance_ptr, | |
+ &task->rect, | |
+ &task->render_buffer.pass_stride, | |
+ &task->render_buffer.offset}; | |
+ CUDA_LAUNCH_KERNEL(cuFilterDivideShadow, args); | |
+ cuda_assert(cuCtxSynchronize()); | |
+ | |
+ return !have_error(); | |
+ } | |
+ | |
+ bool denoising_get_feature(int mean_offset, | |
+ int variance_offset, | |
+ device_ptr mean_ptr, | |
+ device_ptr variance_ptr, | |
+ float scale, | |
+ DenoisingTask *task) | |
+ { | |
+ if (have_error()) | |
+ return false; | |
+ | |
+ CUDAContextScope scope(this); | |
+ | |
+ CUfunction cuFilterGetFeature; | |
+ cuda_assert(cuModuleGetFunction( | |
+ &cuFilterGetFeature, cuFilterModule, "kernel_cuda_filter_get_feature")); | |
+ cuda_assert(cuFuncSetCacheConfig(cuFilterGetFeature, CU_FUNC_CACHE_PREFER_L1)); | |
+ CUDA_GET_BLOCKSIZE( | |
+ cuFilterGetFeature, task->rect.z - task->rect.x, task->rect.w - task->rect.y); | |
+ | |
+ void *args[] = {&task->render_buffer.samples, | |
+ &task->tile_info_mem.device_pointer, | |
+ &mean_offset, | |
+ &variance_offset, | |
+ &mean_ptr, | |
+ &variance_ptr, | |
+ &scale, | |
+ &task->rect, | |
+ &task->render_buffer.pass_stride, | |
+ &task->render_buffer.offset}; | |
+ CUDA_LAUNCH_KERNEL(cuFilterGetFeature, args); | |
+ cuda_assert(cuCtxSynchronize()); | |
+ | |
+ return !have_error(); | |
+ } | |
+ | |
+ bool denoising_write_feature(int out_offset, | |
+ device_ptr from_ptr, | |
+ device_ptr buffer_ptr, | |
+ DenoisingTask *task) | |
+ { | |
+ if (have_error()) | |
+ return false; | |
+ | |
+ CUDAContextScope scope(this); | |
+ | |
+ CUfunction cuFilterWriteFeature; | |
+ cuda_assert(cuModuleGetFunction( | |
+ &cuFilterWriteFeature, cuFilterModule, "kernel_cuda_filter_write_feature")); | |
+ cuda_assert(cuFuncSetCacheConfig(cuFilterWriteFeature, CU_FUNC_CACHE_PREFER_L1)); | |
+ CUDA_GET_BLOCKSIZE(cuFilterWriteFeature, task->filter_area.z, task->filter_area.w); | |
+ | |
+ void *args[] = {&task->render_buffer.samples, | |
+ &task->reconstruction_state.buffer_params, | |
+ &task->filter_area, | |
+ &from_ptr, | |
+ &buffer_ptr, | |
+ &out_offset, | |
+ &task->rect}; | |
+ CUDA_LAUNCH_KERNEL(cuFilterWriteFeature, args); | |
+ cuda_assert(cuCtxSynchronize()); | |
+ | |
+ return !have_error(); | |
+ } | |
+ | |
+ bool denoising_detect_outliers(device_ptr image_ptr, | |
+ device_ptr variance_ptr, | |
+ device_ptr depth_ptr, | |
+ device_ptr output_ptr, | |
+ DenoisingTask *task) | |
+ { | |
+ if (have_error()) | |
+ return false; | |
+ | |
+ CUDAContextScope scope(this); | |
+ | |
+ CUfunction cuFilterDetectOutliers; | |
+ cuda_assert(cuModuleGetFunction( | |
+ &cuFilterDetectOutliers, cuFilterModule, "kernel_cuda_filter_detect_outliers")); | |
+ cuda_assert(cuFuncSetCacheConfig(cuFilterDetectOutliers, CU_FUNC_CACHE_PREFER_L1)); | |
+ CUDA_GET_BLOCKSIZE( | |
+ cuFilterDetectOutliers, task->rect.z - task->rect.x, task->rect.w - task->rect.y); | |
+ | |
+ void *args[] = {&image_ptr, | |
+ &variance_ptr, | |
+ &depth_ptr, | |
+ &output_ptr, | |
+ &task->rect, | |
+ &task->buffer.pass_stride}; | |
+ | |
+ CUDA_LAUNCH_KERNEL(cuFilterDetectOutliers, args); | |
+ cuda_assert(cuCtxSynchronize()); | |
+ | |
+ return !have_error(); | |
+ } | |
+ | |
+ void denoise(RenderTile &rtile, DenoisingTask &denoising) | |
+ { | |
+ denoising.functions.construct_transform = function_bind( | |
+ &CUDADevice::denoising_construct_transform, this, &denoising); | |
+ denoising.functions.accumulate = function_bind( | |
+ &CUDADevice::denoising_accumulate, this, _1, _2, _3, _4, &denoising); | |
+ denoising.functions.solve = function_bind(&CUDADevice::denoising_solve, this, _1, &denoising); | |
+ denoising.functions.divide_shadow = function_bind( | |
+ &CUDADevice::denoising_divide_shadow, this, _1, _2, _3, _4, _5, &denoising); | |
+ denoising.functions.non_local_means = function_bind( | |
+ &CUDADevice::denoising_non_local_means, this, _1, _2, _3, _4, &denoising); | |
+ denoising.functions.combine_halves = function_bind( | |
+ &CUDADevice::denoising_combine_halves, this, _1, _2, _3, _4, _5, _6, &denoising); | |
+ denoising.functions.get_feature = function_bind( | |
+ &CUDADevice::denoising_get_feature, this, _1, _2, _3, _4, _5, &denoising); | |
+ denoising.functions.write_feature = function_bind( | |
+ &CUDADevice::denoising_write_feature, this, _1, _2, _3, &denoising); | |
+ denoising.functions.detect_outliers = function_bind( | |
+ &CUDADevice::denoising_detect_outliers, this, _1, _2, _3, _4, &denoising); | |
+ | |
+ denoising.filter_area = make_int4(rtile.x, rtile.y, rtile.w, rtile.h); | |
+ denoising.render_buffer.samples = rtile.sample; | |
+ denoising.buffer.gpu_temporary_mem = true; | |
+ | |
+ denoising.run_denoising(&rtile); | |
+ } | |
+ | |
+ void path_trace(DeviceTask &task, RenderTile &rtile, device_vector<WorkTile> &work_tiles) | |
+ { | |
+ scoped_timer timer(&rtile.buffers->render_time); | |
+ | |
+ if (have_error()) | |
+ return; | |
+ | |
+ CUDAContextScope scope(this); | |
+ CUfunction cuPathTrace; | |
+ | |
+ /* Get kernel function. */ | |
+ if (task.integrator_branched) { | |
+ cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_branched_path_trace")); | |
+ } | |
+ else { | |
+ cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_path_trace")); | |
+ } | |
+ | |
+ if (have_error()) { | |
+ return; | |
+ } | |
+ | |
+ cuda_assert(cuFuncSetCacheConfig(cuPathTrace, CU_FUNC_CACHE_PREFER_L1)); | |
+ | |
+ /* Allocate work tile. */ | |
+ work_tiles.alloc(1); | |
+ | |
+ WorkTile *wtile = work_tiles.data(); | |
+ wtile->x = rtile.x; | |
+ wtile->y = rtile.y; | |
+ wtile->w = rtile.w; | |
+ wtile->h = rtile.h; | |
+ wtile->offset = rtile.offset; | |
+ wtile->stride = rtile.stride; | |
+ wtile->buffer = (float *)cuda_device_ptr(rtile.buffer); | |
+ | |
+ /* Prepare work size. More step samples render faster, but for now we | |
+ * remain conservative for GPUs connected to a display to avoid driver | |
+ * timeouts and display freezing. */ | |
+ int min_blocks, num_threads_per_block; | |
+ cuda_assert(cuOccupancyMaxPotentialBlockSize( | |
+ &min_blocks, &num_threads_per_block, cuPathTrace, NULL, 0, 0)); | |
+ if (!info.display_device) { | |
+ min_blocks *= 8; | |
+ } | |
+ | |
+ uint step_samples = divide_up(min_blocks * num_threads_per_block, wtile->w * wtile->h); | |
+ | |
+ /* Render all samples. */ | |
+ int start_sample = rtile.start_sample; | |
+ int end_sample = rtile.start_sample + rtile.num_samples; | |
+ | |
+ for (int sample = start_sample; sample < end_sample; sample += step_samples) { | |
+ /* Setup and copy work tile to device. */ | |
+ wtile->start_sample = sample; | |
+ wtile->num_samples = min(step_samples, end_sample - sample); | |
+ work_tiles.copy_to_device(); | |
+ | |
+ CUdeviceptr d_work_tiles = cuda_device_ptr(work_tiles.device_pointer); | |
+ uint total_work_size = wtile->w * wtile->h * wtile->num_samples; | |
+ uint num_blocks = divide_up(total_work_size, num_threads_per_block); | |
+ | |
+ /* Launch kernel. */ | |
+ void *args[] = {&d_work_tiles, &total_work_size}; | |
+ | |
+ cuda_assert(cuLaunchKernel( | |
+ cuPathTrace, num_blocks, 1, 1, num_threads_per_block, 1, 1, 0, 0, args, 0)); | |
+ | |
+ cuda_assert(cuCtxSynchronize()); | |
+ | |
+ /* Update progress. */ | |
+ rtile.sample = sample + wtile->num_samples; | |
+ task.update_progress(&rtile, rtile.w * rtile.h * wtile->num_samples); | |
+ | |
+ if (task.get_cancel()) { | |
+ if (task.need_finish_queue == false) | |
+ break; | |
+ } | |
+ } | |
+ } | |
+ | |
+ void film_convert(DeviceTask &task, | |
+ device_ptr buffer, | |
+ device_ptr rgba_byte, | |
+ device_ptr rgba_half) | |
+ { | |
+ if (have_error()) | |
+ return; | |
+ | |
+ CUDAContextScope scope(this); | |
+ | |
+ CUfunction cuFilmConvert; | |
+ CUdeviceptr d_rgba = map_pixels((rgba_byte) ? rgba_byte : rgba_half); | |
+ CUdeviceptr d_buffer = cuda_device_ptr(buffer); | |
+ | |
+ /* get kernel function */ | |
+ if (rgba_half) { | |
+ cuda_assert( | |
+ cuModuleGetFunction(&cuFilmConvert, cuModule, "kernel_cuda_convert_to_half_float")); | |
+ } | |
+ else { | |
+ cuda_assert(cuModuleGetFunction(&cuFilmConvert, cuModule, "kernel_cuda_convert_to_byte")); | |
+ } | |
+ | |
+ float sample_scale = 1.0f / (task.sample + 1); | |
+ | |
+ /* pass in parameters */ | |
+ void *args[] = {&d_rgba, | |
+ &d_buffer, | |
+ &sample_scale, | |
+ &task.x, | |
+ &task.y, | |
+ &task.w, | |
+ &task.h, | |
+ &task.offset, | |
+ &task.stride}; | |
+ | |
+ /* launch kernel */ | |
+ int threads_per_block; | |
+ cuda_assert(cuFuncGetAttribute( | |
+ &threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, cuFilmConvert)); | |
+ | |
+ int xthreads = (int)sqrt(threads_per_block); | |
+ int ythreads = (int)sqrt(threads_per_block); | |
+ int xblocks = (task.w + xthreads - 1) / xthreads; | |
+ int yblocks = (task.h + ythreads - 1) / ythreads; | |
+ | |
+ cuda_assert(cuFuncSetCacheConfig(cuFilmConvert, CU_FUNC_CACHE_PREFER_L1)); | |
+ | |
+ cuda_assert(cuLaunchKernel(cuFilmConvert, | |
+ xblocks, | |
+ yblocks, | |
+ 1, /* blocks */ | |
+ xthreads, | |
+ ythreads, | |
+ 1, /* threads */ | |
+ 0, | |
+ 0, | |
+ args, | |
+ 0)); | |
+ | |
+ unmap_pixels((rgba_byte) ? rgba_byte : rgba_half); | |
+ | |
+ cuda_assert(cuCtxSynchronize()); | |
+ } | |
+ | |
+ void shader(DeviceTask &task) | |
+ { | |
+ if (have_error()) | |
+ return; | |
+ | |
+ CUDAContextScope scope(this); | |
+ | |
+ CUfunction cuShader; | |
+ CUdeviceptr d_input = cuda_device_ptr(task.shader_input); | |
+ CUdeviceptr d_output = cuda_device_ptr(task.shader_output); | |
+ | |
+ /* get kernel function */ | |
+ if (task.shader_eval_type >= SHADER_EVAL_BAKE) { | |
+ cuda_assert(cuModuleGetFunction(&cuShader, cuModule, "kernel_cuda_bake")); | |
+ } | |
+ else if (task.shader_eval_type == SHADER_EVAL_DISPLACE) { | |
+ cuda_assert(cuModuleGetFunction(&cuShader, cuModule, "kernel_cuda_displace")); | |
+ } | |
+ else { | |
+ cuda_assert(cuModuleGetFunction(&cuShader, cuModule, "kernel_cuda_background")); | |
+ } | |
+ | |
+ /* do tasks in smaller chunks, so we can cancel it */ | |
+ const int shader_chunk_size = 65536; | |
+ const int start = task.shader_x; | |
+ const int end = task.shader_x + task.shader_w; | |
+ int offset = task.offset; | |
+ | |
+ bool canceled = false; | |
+ for (int sample = 0; sample < task.num_samples && !canceled; sample++) { | |
+ for (int shader_x = start; shader_x < end; shader_x += shader_chunk_size) { | |
+ int shader_w = min(shader_chunk_size, end - shader_x); | |
+ | |
+ /* pass in parameters */ | |
+ void *args[8]; | |
+ int arg = 0; | |
+ args[arg++] = &d_input; | |
+ args[arg++] = &d_output; | |
+ args[arg++] = &task.shader_eval_type; | |
+ if (task.shader_eval_type >= SHADER_EVAL_BAKE) { | |
+ args[arg++] = &task.shader_filter; | |
+ } | |
+ args[arg++] = &shader_x; | |
+ args[arg++] = &shader_w; | |
+ args[arg++] = &offset; | |
+ args[arg++] = &sample; | |
+ | |
+ /* launch kernel */ | |
+ int threads_per_block; | |
+ cuda_assert(cuFuncGetAttribute( | |
+ &threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, cuShader)); | |
+ | |
+ int xblocks = (shader_w + threads_per_block - 1) / threads_per_block; | |
+ | |
+ cuda_assert(cuFuncSetCacheConfig(cuShader, CU_FUNC_CACHE_PREFER_L1)); | |
+ cuda_assert(cuLaunchKernel(cuShader, | |
+ xblocks, | |
+ 1, | |
+ 1, /* blocks */ | |
+ threads_per_block, | |
+ 1, | |
+ 1, /* threads */ | |
+ 0, | |
+ 0, | |
+ args, | |
+ 0)); | |
+ | |
+ cuda_assert(cuCtxSynchronize()); | |
+ | |
+ if (task.get_cancel()) { | |
+ canceled = true; | |
+ break; | |
+ } | |
+ } | |
+ | |
+ task.update_progress(NULL); | |
+ } | |
+ } | |
+ | |
+ CUdeviceptr map_pixels(device_ptr mem) | |
+ { | |
+ if (!background) { | |
+ PixelMem pmem = pixel_mem_map[mem]; | |
+ CUdeviceptr buffer; | |
+ | |
+ size_t bytes; | |
+ cuda_assert(cuGraphicsMapResources(1, &pmem.cuPBOresource, 0)); | |
+ cuda_assert(cuGraphicsResourceGetMappedPointer(&buffer, &bytes, pmem.cuPBOresource)); | |
+ | |
+ return buffer; | |
+ } | |
+ | |
+ return cuda_device_ptr(mem); | |
+ } | |
+ | |
+ void unmap_pixels(device_ptr mem) | |
+ { | |
+ if (!background) { | |
+ PixelMem pmem = pixel_mem_map[mem]; | |
+ | |
+ cuda_assert(cuGraphicsUnmapResources(1, &pmem.cuPBOresource, 0)); | |
+ } | |
+ } | |
+ | |
+ void pixels_alloc(device_memory &mem) | |
+ { | |
+ PixelMem pmem; | |
+ | |
+ pmem.w = mem.data_width; | |
+ pmem.h = mem.data_height; | |
+ | |
+ CUDAContextScope scope(this); | |
+ | |
+ glGenBuffers(1, &pmem.cuPBO); | |
+ glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pmem.cuPBO); | |
+ if (mem.data_type == TYPE_HALF) | |
+ glBufferData( | |
+ GL_PIXEL_UNPACK_BUFFER, pmem.w * pmem.h * sizeof(GLhalf) * 4, NULL, GL_DYNAMIC_DRAW); | |
+ else | |
+ glBufferData( | |
+ GL_PIXEL_UNPACK_BUFFER, pmem.w * pmem.h * sizeof(uint8_t) * 4, NULL, GL_DYNAMIC_DRAW); | |
+ | |
+ glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); | |
+ | |
+ glActiveTexture(GL_TEXTURE0); | |
+ glGenTextures(1, &pmem.cuTexId); | |
+ glBindTexture(GL_TEXTURE_2D, pmem.cuTexId); | |
+ if (mem.data_type == TYPE_HALF) | |
+ glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA16F, pmem.w, pmem.h, 0, GL_RGBA, GL_HALF_FLOAT, NULL); | |
+ else | |
+ glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, pmem.w, pmem.h, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); | |
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); | |
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); | |
+ glBindTexture(GL_TEXTURE_2D, 0); | |
+ | |
+ CUresult result = cuGraphicsGLRegisterBuffer( | |
+ &pmem.cuPBOresource, pmem.cuPBO, CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE); | |
+ | |
+ if (result == CUDA_SUCCESS) { | |
+ mem.device_pointer = pmem.cuTexId; | |
+ pixel_mem_map[mem.device_pointer] = pmem; | |
+ | |
+ mem.device_size = mem.memory_size(); | |
+ stats.mem_alloc(mem.device_size); | |
+ | |
+ return; | |
+ } | |
+ else { | |
+ /* failed to register buffer, fallback to no interop */ | |
+ glDeleteBuffers(1, &pmem.cuPBO); | |
+ glDeleteTextures(1, &pmem.cuTexId); | |
+ | |
+ background = true; | |
+ } | |
+ } | |
+ | |
+ void pixels_copy_from(device_memory &mem, int y, int w, int h) | |
+ { | |
+ PixelMem pmem = pixel_mem_map[mem.device_pointer]; | |
+ | |
+ CUDAContextScope scope(this); | |
+ | |
+ glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pmem.cuPBO); | |
+ uchar *pixels = (uchar *)glMapBuffer(GL_PIXEL_UNPACK_BUFFER, GL_READ_ONLY); | |
+ size_t offset = sizeof(uchar) * 4 * y * w; | |
+ memcpy((uchar *)mem.host_pointer + offset, pixels + offset, sizeof(uchar) * 4 * w * h); | |
+ glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER); | |
+ glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); | |
+ } | |
+ | |
+ void pixels_free(device_memory &mem) | |
+ { | |
+ if (mem.device_pointer) { | |
+ PixelMem pmem = pixel_mem_map[mem.device_pointer]; | |
+ | |
+ CUDAContextScope scope(this); | |
+ | |
+ cuda_assert(cuGraphicsUnregisterResource(pmem.cuPBOresource)); | |
+ glDeleteBuffers(1, &pmem.cuPBO); | |
+ glDeleteTextures(1, &pmem.cuTexId); | |
+ | |
+ pixel_mem_map.erase(pixel_mem_map.find(mem.device_pointer)); | |
+ mem.device_pointer = 0; | |
+ | |
+ stats.mem_free(mem.device_size); | |
+ mem.device_size = 0; | |
+ } | |
+ } | |
+ | |
+ void draw_pixels(device_memory &mem, | |
+ int y, | |
+ int w, | |
+ int h, | |
+ int width, | |
+ int height, | |
+ int dx, | |
+ int dy, | |
+ int dw, | |
+ int dh, | |
+ bool transparent, | |
+ const DeviceDrawParams &draw_params) | |
+ { | |
+ assert(mem.type == MEM_PIXELS); | |
+ | |
+ if (!background) { | |
+ const bool use_fallback_shader = (draw_params.bind_display_space_shader_cb == NULL); | |
+ PixelMem pmem = pixel_mem_map[mem.device_pointer]; | |
+ float *vpointer; | |
+ | |
+ CUDAContextScope scope(this); | |
+ | |
+ /* for multi devices, this assumes the inefficient method that we allocate | |
+ * all pixels on the device even though we only render to a subset */ | |
+ size_t offset = 4 * y * w; | |
+ | |
+ if (mem.data_type == TYPE_HALF) | |
+ offset *= sizeof(GLhalf); | |
+ else | |
+ offset *= sizeof(uint8_t); | |
+ | |
+ glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pmem.cuPBO); | |
+ glActiveTexture(GL_TEXTURE0); | |
+ glBindTexture(GL_TEXTURE_2D, pmem.cuTexId); | |
+ if (mem.data_type == TYPE_HALF) { | |
+ glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, w, h, GL_RGBA, GL_HALF_FLOAT, (void *)offset); | |
+ } | |
+ else { | |
+ glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, w, h, GL_RGBA, GL_UNSIGNED_BYTE, (void *)offset); | |
+ } | |
+ glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); | |
+ | |
+ if (transparent) { | |
+ glEnable(GL_BLEND); | |
+ glBlendFunc(GL_ONE, GL_ONE_MINUS_SRC_ALPHA); | |
+ } | |
+ | |
+ GLint shader_program; | |
+ if (use_fallback_shader) { | |
+ if (!bind_fallback_display_space_shader(dw, dh)) { | |
+ return; | |
+ } | |
+ shader_program = fallback_shader_program; | |
+ } | |
+ else { | |
+ draw_params.bind_display_space_shader_cb(); | |
+ glGetIntegerv(GL_CURRENT_PROGRAM, &shader_program); | |
+ } | |
+ | |
+ if (!vertex_buffer) { | |
+ glGenBuffers(1, &vertex_buffer); | |
+ } | |
+ | |
+ glBindBuffer(GL_ARRAY_BUFFER, vertex_buffer); | |
+ /* invalidate old contents - | |
+ * avoids stalling if buffer is still waiting in queue to be rendered */ | |
+ glBufferData(GL_ARRAY_BUFFER, 16 * sizeof(float), NULL, GL_STREAM_DRAW); | |
+ | |
+ vpointer = (float *)glMapBuffer(GL_ARRAY_BUFFER, GL_WRITE_ONLY); | |
+ | |
+ if (vpointer) { | |
+ /* texture coordinate - vertex pair */ | |
+ vpointer[0] = 0.0f; | |
+ vpointer[1] = 0.0f; | |
+ vpointer[2] = dx; | |
+ vpointer[3] = dy; | |
+ | |
+ vpointer[4] = (float)w / (float)pmem.w; | |
+ vpointer[5] = 0.0f; | |
+ vpointer[6] = (float)width + dx; | |
+ vpointer[7] = dy; | |
+ | |
+ vpointer[8] = (float)w / (float)pmem.w; | |
+ vpointer[9] = (float)h / (float)pmem.h; | |
+ vpointer[10] = (float)width + dx; | |
+ vpointer[11] = (float)height + dy; | |
+ | |
+ vpointer[12] = 0.0f; | |
+ vpointer[13] = (float)h / (float)pmem.h; | |
+ vpointer[14] = dx; | |
+ vpointer[15] = (float)height + dy; | |
+ | |
+ glUnmapBuffer(GL_ARRAY_BUFFER); | |
+ } | |
+ | |
+ GLuint vertex_array_object; | |
+ GLuint position_attribute, texcoord_attribute; | |
+ | |
+ glGenVertexArrays(1, &vertex_array_object); | |
+ glBindVertexArray(vertex_array_object); | |
+ | |
+ texcoord_attribute = glGetAttribLocation(shader_program, "texCoord"); | |
+ position_attribute = glGetAttribLocation(shader_program, "pos"); | |
+ | |
+ glEnableVertexAttribArray(texcoord_attribute); | |
+ glEnableVertexAttribArray(position_attribute); | |
+ | |
+ glVertexAttribPointer( | |
+ texcoord_attribute, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), (const GLvoid *)0); | |
+ glVertexAttribPointer(position_attribute, | |
+ 2, | |
+ GL_FLOAT, | |
+ GL_FALSE, | |
+ 4 * sizeof(float), | |
+ (const GLvoid *)(sizeof(float) * 2)); | |
+ | |
+ glDrawArrays(GL_TRIANGLE_FAN, 0, 4); | |
+ | |
+ if (use_fallback_shader) { | |
+ glUseProgram(0); | |
+ } | |
+ else { | |
+ draw_params.unbind_display_space_shader_cb(); | |
+ } | |
+ | |
+ if (transparent) { | |
+ glDisable(GL_BLEND); | |
+ } | |
+ | |
+ glBindTexture(GL_TEXTURE_2D, 0); | |
+ | |
+ return; | |
+ } | |
+ | |
+ Device::draw_pixels(mem, y, w, h, width, height, dx, dy, dw, dh, transparent, draw_params); | |
+ } | |
+ | |
+ void thread_run(DeviceTask *task) | |
+ { | |
+ CUDAContextScope scope(this); | |
+ | |
+ if (task->type == DeviceTask::RENDER) { | |
+ DeviceRequestedFeatures requested_features; | |
+ if (use_split_kernel()) { | |
+ if (split_kernel == NULL) { | |
+ split_kernel = new CUDASplitKernel(this); | |
+ split_kernel->load_kernels(requested_features); | |
+ } | |
+ } | |
+ | |
+ device_vector<WorkTile> work_tiles(this, "work_tiles", MEM_READ_ONLY); | |
+ | |
+ /* keep rendering tiles until done */ | |
+ RenderTile tile; | |
+ DenoisingTask denoising(this, *task); | |
+ | |
+ while (task->acquire_tile(this, tile)) { | |
+ if (tile.task == RenderTile::PATH_TRACE) { | |
+ if (use_split_kernel()) { | |
+ device_only_memory<uchar> void_buffer(this, "void_buffer"); | |
+ split_kernel->path_trace(task, tile, void_buffer, void_buffer); | |
+ } | |
+ else { | |
+ path_trace(*task, tile, work_tiles); | |
+ } | |
+ } | |
+ else if (tile.task == RenderTile::DENOISE) { | |
+ tile.sample = tile.start_sample + tile.num_samples; | |
+ | |
+ denoise(tile, denoising); | |
+ | |
+ task->update_progress(&tile, tile.w * tile.h); | |
+ } | |
+ | |
+ task->release_tile(tile); | |
+ | |
+ if (task->get_cancel()) { | |
+ if (task->need_finish_queue == false) | |
+ break; | |
+ } | |
+ } | |
+ | |
+ work_tiles.free(); | |
+ } | |
+ else if (task->type == DeviceTask::SHADER) { | |
+ shader(*task); | |
+ | |
+ cuda_assert(cuCtxSynchronize()); | |
+ } | |
+ } | |
+ | |
+ class CUDADeviceTask : public DeviceTask { | |
+ public: | |
+ CUDADeviceTask(CUDADevice *device, DeviceTask &task) : DeviceTask(task) | |
+ { | |
+ run = function_bind(&CUDADevice::thread_run, device, this); | |
+ } | |
+ }; | |
+ | |
+ void task_add(DeviceTask &task) | |
+ { | |
+ CUDAContextScope scope(this); | |
+ | |
+ /* Load texture info. */ | |
+ load_texture_info(); | |
+ | |
+ /* Synchronize all memory copies before executing task. */ | |
+ cuda_assert(cuCtxSynchronize()); | |
+ | |
+ if (task.type == DeviceTask::FILM_CONVERT) { | |
+ /* must be done in main thread due to opengl access */ | |
+ film_convert(task, task.buffer, task.rgba_byte, task.rgba_half); | |
+ } | |
+ else { | |
+ task_pool.push(new CUDADeviceTask(this, task)); | |
+ } | |
+ } | |
+ | |
+ void task_wait() | |
+ { | |
+ task_pool.wait(); | |
+ } | |
+ | |
+ void task_cancel() | |
+ { | |
+ task_pool.cancel(); | |
+ } | |
+ | |
+ friend class CUDASplitKernelFunction; | |
+ friend class CUDASplitKernel; | |
+ friend class CUDAContextScope; | |
+}; | |
+ | |
+/* redefine the cuda_assert macro so it can be used outside of the CUDADevice class | |
+ * now that the definition of that class is complete | |
+ */ | |
+#undef cuda_assert | |
+#define cuda_assert(stmt) \ | |
+ { \ | |
+ CUresult result = stmt; \ | |
+\ | |
+ if (result != CUDA_SUCCESS) { \ | |
+ string message = string_printf("CUDA error: %s in %s", cuewErrorString(result), #stmt); \ | |
+ if (device->error_msg == "") \ | |
+ device->error_msg = message; \ | |
+ fprintf(stderr, "%s\n", message.c_str()); \ | |
+ /*cuda_abort();*/ \ | |
+ device->cuda_error_documentation(); \ | |
+ } \ | |
+ } \ | |
+ (void)0 | |
+ | |
+/* CUDA context scope. */ | |
+ | |
+CUDAContextScope::CUDAContextScope(CUDADevice *device) : device(device) | |
+{ | |
+ cuda_assert(cuCtxPushCurrent(device->cuContext)); | |
+} | |
+ | |
+CUDAContextScope::~CUDAContextScope() | |
+{ | |
+ cuda_assert(cuCtxPopCurrent(NULL)); | |
+} | |
+ | |
+/* split kernel */ | |
+ | |
+class CUDASplitKernelFunction : public SplitKernelFunction { | |
+ CUDADevice *device; | |
+ CUfunction func; | |
+ | |
+ public: | |
+ CUDASplitKernelFunction(CUDADevice *device, CUfunction func) : device(device), func(func) | |
+ { | |
+ } | |
+ | |
+ /* enqueue the kernel, returns false if there is an error */ | |
+ bool enqueue(const KernelDimensions &dim, device_memory & /*kg*/, device_memory & /*data*/) | |
+ { | |
+ return enqueue(dim, NULL); | |
+ } | |
+ | |
+ /* enqueue the kernel, returns false if there is an error */ | |
+ bool enqueue(const KernelDimensions &dim, void *args[]) | |
+ { | |
+ if (device->have_error()) | |
+ return false; | |
+ | |
+ CUDAContextScope scope(device); | |
+ | |
+ /* we ignore dim.local_size for now, as this is faster */ | |
+ int threads_per_block; | |
+ cuda_assert( | |
+ cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, func)); | |
+ | |
+ int xblocks = (dim.global_size[0] * dim.global_size[1] + threads_per_block - 1) / | |
+ threads_per_block; | |
+ | |
+ cuda_assert(cuFuncSetCacheConfig(func, CU_FUNC_CACHE_PREFER_L1)); | |
+ | |
+ cuda_assert(cuLaunchKernel(func, | |
+ xblocks, | |
+ 1, | |
+ 1, /* blocks */ | |
+ threads_per_block, | |
+ 1, | |
+ 1, /* threads */ | |
+ 0, | |
+ 0, | |
+ args, | |
+ 0)); | |
+ | |
+ return !device->have_error(); | |
+ } | |
+}; | |
+ | |
+CUDASplitKernel::CUDASplitKernel(CUDADevice *device) : DeviceSplitKernel(device), device(device) | |
+{ | |
+} | |
+ | |
+uint64_t CUDASplitKernel::state_buffer_size(device_memory & /*kg*/, | |
+ device_memory & /*data*/, | |
+ size_t num_threads) | |
+{ | |
+ CUDAContextScope scope(device); | |
+ | |
+ device_vector<uint64_t> size_buffer(device, "size_buffer", MEM_READ_WRITE); | |
+ size_buffer.alloc(1); | |
+ size_buffer.zero_to_device(); | |
+ | |
+ uint threads = num_threads; | |
+ CUdeviceptr d_size = device->cuda_device_ptr(size_buffer.device_pointer); | |
+ | |
+ struct args_t { | |
+ uint *num_threads; | |
+ CUdeviceptr *size; | |
+ }; | |
+ | |
+ args_t args = {&threads, &d_size}; | |
+ | |
+ CUfunction state_buffer_size; | |
+ cuda_assert( | |
+ cuModuleGetFunction(&state_buffer_size, device->cuModule, "kernel_cuda_state_buffer_size")); | |
+ | |
+ cuda_assert(cuLaunchKernel(state_buffer_size, 1, 1, 1, 1, 1, 1, 0, 0, (void **)&args, 0)); | |
+ | |
+ size_buffer.copy_from_device(0, 1, 1); | |
+ size_t size = size_buffer[0]; | |
+ size_buffer.free(); | |
+ | |
+ return size; | |
+} | |
+ | |
+bool CUDASplitKernel::enqueue_split_kernel_data_init(const KernelDimensions &dim, | |
+ RenderTile &rtile, | |
+ int num_global_elements, | |
+ device_memory & /*kernel_globals*/, | |
+ device_memory & /*kernel_data*/, | |
+ device_memory &split_data, | |
+ device_memory &ray_state, | |
+ device_memory &queue_index, | |
+ device_memory &use_queues_flag, | |
+ device_memory &work_pool_wgs) | |
+{ | |
+ CUDAContextScope scope(device); | |
+ | |
+ CUdeviceptr d_split_data = device->cuda_device_ptr(split_data.device_pointer); | |
+ CUdeviceptr d_ray_state = device->cuda_device_ptr(ray_state.device_pointer); | |
+ CUdeviceptr d_queue_index = device->cuda_device_ptr(queue_index.device_pointer); | |
+ CUdeviceptr d_use_queues_flag = device->cuda_device_ptr(use_queues_flag.device_pointer); | |
+ CUdeviceptr d_work_pool_wgs = device->cuda_device_ptr(work_pool_wgs.device_pointer); | |
+ | |
+ CUdeviceptr d_buffer = device->cuda_device_ptr(rtile.buffer); | |
+ | |
+ int end_sample = rtile.start_sample + rtile.num_samples; | |
+ int queue_size = dim.global_size[0] * dim.global_size[1]; | |
+ | |
+ struct args_t { | |
+ CUdeviceptr *split_data_buffer; | |
+ int *num_elements; | |
+ CUdeviceptr *ray_state; | |
+ int *start_sample; | |
+ int *end_sample; | |
+ int *sx; | |
+ int *sy; | |
+ int *sw; | |
+ int *sh; | |
+ int *offset; | |
+ int *stride; | |
+ CUdeviceptr *queue_index; | |
+ int *queuesize; | |
+ CUdeviceptr *use_queues_flag; | |
+ CUdeviceptr *work_pool_wgs; | |
+ int *num_samples; | |
+ CUdeviceptr *buffer; | |
+ }; | |
+ | |
+ args_t args = {&d_split_data, | |
+ &num_global_elements, | |
+ &d_ray_state, | |
+ &rtile.start_sample, | |
+ &end_sample, | |
+ &rtile.x, | |
+ &rtile.y, | |
+ &rtile.w, | |
+ &rtile.h, | |
+ &rtile.offset, | |
+ &rtile.stride, | |
+ &d_queue_index, | |
+ &queue_size, | |
+ &d_use_queues_flag, | |
+ &d_work_pool_wgs, | |
+ &rtile.num_samples, | |
+ &d_buffer}; | |
+ | |
+ CUfunction data_init; | |
+ cuda_assert( | |
+ cuModuleGetFunction(&data_init, device->cuModule, "kernel_cuda_path_trace_data_init")); | |
+ if (device->have_error()) { | |
+ return false; | |
+ } | |
+ | |
+ CUDASplitKernelFunction(device, data_init).enqueue(dim, (void **)&args); | |
+ | |
+ return !device->have_error(); | |
+} | |
+ | |
+SplitKernelFunction *CUDASplitKernel::get_split_kernel_function(const string &kernel_name, | |
+ const DeviceRequestedFeatures &) | |
+{ | |
+ CUDAContextScope scope(device); | |
+ CUfunction func; | |
+ | |
+ cuda_assert( | |
+ cuModuleGetFunction(&func, device->cuModule, (string("kernel_cuda_") + kernel_name).data())); | |
+ if (device->have_error()) { | |
+ device->cuda_error_message( | |
+ string_printf("kernel \"kernel_cuda_%s\" not found in module", kernel_name.data())); | |
+ return NULL; | |
+ } | |
+ | |
+ return new CUDASplitKernelFunction(device, func); | |
+} | |
+ | |
+int2 CUDASplitKernel::split_kernel_local_size() | |
+{ | |
+ return make_int2(32, 1); | |
+} | |
+ | |
+int2 CUDASplitKernel::split_kernel_global_size(device_memory &kg, | |
+ device_memory &data, | |
+ DeviceTask * /*task*/) | |
+{ | |
+ CUDAContextScope scope(device); | |
+ size_t free; | |
+ size_t total; | |
+ | |
+ cuda_assert(cuMemGetInfo(&free, &total)); | |
+ | |
+ VLOG(1) << "Maximum device allocation size: " << string_human_readable_number(free) | |
+ << " bytes. (" << string_human_readable_size(free) << ")."; | |
+ | |
+ size_t num_elements = max_elements_for_max_buffer_size(kg, data, free / 2); | |
+ size_t side = round_down((int)sqrt(num_elements), 32); | |
+ int2 global_size = make_int2(side, round_down(num_elements / side, 16)); | |
+ VLOG(1) << "Global size: " << global_size << "."; | |
+ return global_size; | |
+} | |
+ | |
+bool device_cuda_init() | |
+{ | |
+#ifdef WITH_CUDA_DYNLOAD | |
+ static bool initialized = false; | |
+ static bool result = false; | |
+ | |
+ if (initialized) | |
+ return result; | |
+ | |
+ initialized = true; | |
+ int cuew_result = cuewInit(CUEW_INIT_CUDA); | |
+ if (cuew_result == CUEW_SUCCESS) { | |
+ VLOG(1) << "CUEW initialization succeeded"; | |
+ if (CUDADevice::have_precompiled_kernels()) { | |
+ VLOG(1) << "Found precompiled kernels"; | |
+ result = true; | |
+ } | |
+# ifndef _WIN32 | |
+ else if (cuewCompilerPath() != NULL) { | |
+ VLOG(1) << "Found CUDA compiler " << cuewCompilerPath(); | |
+ result = true; | |
+ } | |
+ else { | |
+ VLOG(1) << "Neither precompiled kernels nor CUDA compiler was found," | |
+ << " unable to use CUDA"; | |
+ } | |
+# endif | |
+ } | |
+ else { | |
+ VLOG(1) << "CUEW initialization failed: " | |
+ << ((cuew_result == CUEW_ERROR_ATEXIT_FAILED) ? "Error setting up atexit() handler" : | |
+ "Error opening the library"); | |
+ } | |
+ | |
+ return result; | |
+#else /* WITH_CUDA_DYNLOAD */ | |
+ return true; | |
+#endif /* WITH_CUDA_DYNLOAD */ | |
+} | |
+ | |
+Device *device_cuda_create(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background) | |
+{ | |
+ return new CUDADevice(info, stats, profiler, background); | |
+} | |
+ | |
+static CUresult device_cuda_safe_init() | |
+{ | |
+#ifdef _WIN32 | |
+ __try { | |
+ return cuInit(0); | |
+ } | |
+ __except (EXCEPTION_EXECUTE_HANDLER) { | |
+ /* Ignore crashes inside the CUDA driver and hope we can | |
+ * survive even with corrupted CUDA installs. */ | |
+ fprintf(stderr, "Cycles CUDA: driver crashed, continuing without CUDA.\n"); | |
+ } | |
+ | |
+ return CUDA_ERROR_NO_DEVICE; | |
+#else | |
+ return cuInit(0); | |
+#endif | |
+} | |
+ | |
+void device_cuda_info(vector<DeviceInfo> &devices) | |
+{ | |
+ CUresult result = device_cuda_safe_init(); | |
+ if (result != CUDA_SUCCESS) { | |
+ if (result != CUDA_ERROR_NO_DEVICE) | |
+ fprintf(stderr, "CUDA cuInit: %s\n", cuewErrorString(result)); | |
+ return; | |
+ } | |
+ | |
+ int count = 0; | |
+ result = cuDeviceGetCount(&count); | |
+ if (result != CUDA_SUCCESS) { | |
+ fprintf(stderr, "CUDA cuDeviceGetCount: %s\n", cuewErrorString(result)); | |
+ return; | |
+ } | |
+ | |
+ vector<DeviceInfo> display_devices; | |
+ | |
+ for (int num = 0; num < count; num++) { | |
+ char name[256]; | |
+ | |
+ result = cuDeviceGetName(name, 256, num); | |
+ if (result != CUDA_SUCCESS) { | |
+ fprintf(stderr, "CUDA cuDeviceGetName: %s\n", cuewErrorString(result)); | |
+ continue; | |
+ } | |
+ | |
+ int major; | |
+ cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, num); | |
+ if (major < 3) { | |
+ VLOG(1) << "Ignoring device \"" << name << "\", this graphics card is no longer supported."; | |
+ continue; | |
+ } | |
+ | |
+ DeviceInfo info; | |
+ | |
+ info.type = DEVICE_CUDA; | |
+ info.description = string(name); | |
+ info.num = num; | |
+ | |
+ info.has_half_images = (major >= 3); | |
+ info.has_volume_decoupled = false; | |
+ | |
+ int pci_location[3] = {0, 0, 0}; | |
+ cuDeviceGetAttribute(&pci_location[0], CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID, num); | |
+ cuDeviceGetAttribute(&pci_location[1], CU_DEVICE_ATTRIBUTE_PCI_BUS_ID, num); | |
+ cuDeviceGetAttribute(&pci_location[2], CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID, num); | |
+ info.id = string_printf("CUDA_%s_%04x:%02x:%02x", | |
+ name, | |
+ (unsigned int)pci_location[0], | |
+ (unsigned int)pci_location[1], | |
+ (unsigned int)pci_location[2]); | |
+ | |
+ /* If device has a kernel timeout and no compute preemption, we assume | |
+ * it is connected to a display and will freeze the display while doing | |
+ * computations. */ | |
+ int timeout_attr = 0, preempt_attr = 0; | |
+ cuDeviceGetAttribute(&timeout_attr, CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT, num); | |
+ cuDeviceGetAttribute(&preempt_attr, CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED, num); | |
+ | |
+ /* The CUDA driver reports compute preemption as not being available on | |
+ * Windows 10 even when it is, due to an issue in application profiles. | |
+ * Detect case where we expect it to be available and override. */ | |
+ if (preempt_attr == 0 && (major >= 6) && system_windows_version_at_least(10, 17134)) { | |
+ VLOG(1) << "Assuming device has compute preemption on Windows 10."; | |
+ preempt_attr = 1; | |
+ } | |
+ | |
+ if (timeout_attr && !preempt_attr) { | |
+ VLOG(1) << "Device is recognized as display."; | |
+ info.description += " (Display)"; | |
+ info.display_device = true; | |
+ display_devices.push_back(info); | |
+ } | |
+ else { | |
+ VLOG(1) << "Device has compute preemption or is not used for display."; | |
+ devices.push_back(info); | |
+ } | |
+ VLOG(1) << "Added device \"" << name << "\" with id \"" << info.id << "\"."; | |
+ } | |
+ | |
+ if (!display_devices.empty()) | |
+ devices.insert(devices.end(), display_devices.begin(), display_devices.end()); | |
+} | |
+ | |
+string device_cuda_capabilities() | |
+{ | |
+ CUresult result = device_cuda_safe_init(); | |
+ if (result != CUDA_SUCCESS) { | |
+ if (result != CUDA_ERROR_NO_DEVICE) { | |
+ return string("Error initializing CUDA: ") + cuewErrorString(result); | |
+ } | |
+ return "No CUDA device found\n"; | |
+ } | |
+ | |
+ int count; | |
+ result = cuDeviceGetCount(&count); | |
+ if (result != CUDA_SUCCESS) { | |
+ return string("Error getting devices: ") + cuewErrorString(result); | |
+ } | |
+ | |
+ string capabilities = ""; | |
+ for (int num = 0; num < count; num++) { | |
+ char name[256]; | |
+ if (cuDeviceGetName(name, 256, num) != CUDA_SUCCESS) { | |
+ continue; | |
+ } | |
+ capabilities += string("\t") + name + "\n"; | |
+ int value; | |
+#define GET_ATTR(attr) \ | |
+ { \ | |
+ if (cuDeviceGetAttribute(&value, CU_DEVICE_ATTRIBUTE_##attr, num) == CUDA_SUCCESS) { \ | |
+ capabilities += string_printf("\t\tCU_DEVICE_ATTRIBUTE_" #attr "\t\t\t%d\n", value); \ | |
+ } \ | |
+ } \ | |
+ (void)0 | |
+ /* TODO(sergey): Strip all attributes which are not useful for us | |
+ * or does not depend on the driver. | |
+ */ | |
+ GET_ATTR(MAX_THREADS_PER_BLOCK); | |
+ GET_ATTR(MAX_BLOCK_DIM_X); | |
+ GET_ATTR(MAX_BLOCK_DIM_Y); | |
+ GET_ATTR(MAX_BLOCK_DIM_Z); | |
+ GET_ATTR(MAX_GRID_DIM_X); | |
+ GET_ATTR(MAX_GRID_DIM_Y); | |
+ GET_ATTR(MAX_GRID_DIM_Z); | |
+ GET_ATTR(MAX_SHARED_MEMORY_PER_BLOCK); | |
+ GET_ATTR(SHARED_MEMORY_PER_BLOCK); | |
+ GET_ATTR(TOTAL_CONSTANT_MEMORY); | |
+ GET_ATTR(WARP_SIZE); | |
+ GET_ATTR(MAX_PITCH); | |
+ GET_ATTR(MAX_REGISTERS_PER_BLOCK); | |
+ GET_ATTR(REGISTERS_PER_BLOCK); | |
+ GET_ATTR(CLOCK_RATE); | |
+ GET_ATTR(TEXTURE_ALIGNMENT); | |
+ GET_ATTR(GPU_OVERLAP); | |
+ GET_ATTR(MULTIPROCESSOR_COUNT); | |
+ GET_ATTR(KERNEL_EXEC_TIMEOUT); | |
+ GET_ATTR(INTEGRATED); | |
+ GET_ATTR(CAN_MAP_HOST_MEMORY); | |
+ GET_ATTR(COMPUTE_MODE); | |
+ GET_ATTR(MAXIMUM_TEXTURE1D_WIDTH); | |
+ GET_ATTR(MAXIMUM_TEXTURE2D_WIDTH); | |
+ GET_ATTR(MAXIMUM_TEXTURE2D_HEIGHT); | |
+ GET_ATTR(MAXIMUM_TEXTURE3D_WIDTH); | |
+ GET_ATTR(MAXIMUM_TEXTURE3D_HEIGHT); | |
+ GET_ATTR(MAXIMUM_TEXTURE3D_DEPTH); | |
+ GET_ATTR(MAXIMUM_TEXTURE2D_LAYERED_WIDTH); | |
+ GET_ATTR(MAXIMUM_TEXTURE2D_LAYERED_HEIGHT); | |
+ GET_ATTR(MAXIMUM_TEXTURE2D_LAYERED_LAYERS); | |
+ GET_ATTR(MAXIMUM_TEXTURE2D_ARRAY_WIDTH); | |
+ GET_ATTR(MAXIMUM_TEXTURE2D_ARRAY_HEIGHT); | |
+ GET_ATTR(MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES); | |
+ GET_ATTR(SURFACE_ALIGNMENT); | |
+ GET_ATTR(CONCURRENT_KERNELS); | |
+ GET_ATTR(ECC_ENABLED); | |
+ GET_ATTR(TCC_DRIVER); | |
+ GET_ATTR(MEMORY_CLOCK_RATE); | |
+ GET_ATTR(GLOBAL_MEMORY_BUS_WIDTH); | |
+ GET_ATTR(L2_CACHE_SIZE); | |
+ GET_ATTR(MAX_THREADS_PER_MULTIPROCESSOR); | |
+ GET_ATTR(ASYNC_ENGINE_COUNT); | |
+ GET_ATTR(UNIFIED_ADDRESSING); | |
+ GET_ATTR(MAXIMUM_TEXTURE1D_LAYERED_WIDTH); | |
+ GET_ATTR(MAXIMUM_TEXTURE1D_LAYERED_LAYERS); | |
+ GET_ATTR(CAN_TEX2D_GATHER); | |
+ GET_ATTR(MAXIMUM_TEXTURE2D_GATHER_WIDTH); | |
+ GET_ATTR(MAXIMUM_TEXTURE2D_GATHER_HEIGHT); | |
+ GET_ATTR(MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE); | |
+ GET_ATTR(MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE); | |
+ GET_ATTR(MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE); | |
+ GET_ATTR(TEXTURE_PITCH_ALIGNMENT); | |
+ GET_ATTR(MAXIMUM_TEXTURECUBEMAP_WIDTH); | |
+ GET_ATTR(MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH); | |
+ GET_ATTR(MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS); | |
+ GET_ATTR(MAXIMUM_SURFACE1D_WIDTH); | |
+ GET_ATTR(MAXIMUM_SURFACE2D_WIDTH); | |
+ GET_ATTR(MAXIMUM_SURFACE2D_HEIGHT); | |
+ GET_ATTR(MAXIMUM_SURFACE3D_WIDTH); | |
+ GET_ATTR(MAXIMUM_SURFACE3D_HEIGHT); | |
+ GET_ATTR(MAXIMUM_SURFACE3D_DEPTH); | |
+ GET_ATTR(MAXIMUM_SURFACE1D_LAYERED_WIDTH); | |
+ GET_ATTR(MAXIMUM_SURFACE1D_LAYERED_LAYERS); | |
+ GET_ATTR(MAXIMUM_SURFACE2D_LAYERED_WIDTH); | |
+ GET_ATTR(MAXIMUM_SURFACE2D_LAYERED_HEIGHT); | |
+ GET_ATTR(MAXIMUM_SURFACE2D_LAYERED_LAYERS); | |
+ GET_ATTR(MAXIMUM_SURFACECUBEMAP_WIDTH); | |
+ GET_ATTR(MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH); | |
+ GET_ATTR(MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS); | |
+ GET_ATTR(MAXIMUM_TEXTURE1D_LINEAR_WIDTH); | |
+ GET_ATTR(MAXIMUM_TEXTURE2D_LINEAR_WIDTH); | |
+ GET_ATTR(MAXIMUM_TEXTURE2D_LINEAR_HEIGHT); | |
+ GET_ATTR(MAXIMUM_TEXTURE2D_LINEAR_PITCH); | |
+ GET_ATTR(MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH); | |
+ GET_ATTR(MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT); | |
+ GET_ATTR(COMPUTE_CAPABILITY_MAJOR); | |
+ GET_ATTR(COMPUTE_CAPABILITY_MINOR); | |
+ GET_ATTR(MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH); | |
+ GET_ATTR(STREAM_PRIORITIES_SUPPORTED); | |
+ GET_ATTR(GLOBAL_L1_CACHE_SUPPORTED); | |
+ GET_ATTR(LOCAL_L1_CACHE_SUPPORTED); | |
+ GET_ATTR(MAX_SHARED_MEMORY_PER_MULTIPROCESSOR); | |
+ GET_ATTR(MAX_REGISTERS_PER_MULTIPROCESSOR); | |
+ GET_ATTR(MANAGED_MEMORY); | |
+ GET_ATTR(MULTI_GPU_BOARD); | |
+ GET_ATTR(MULTI_GPU_BOARD_GROUP_ID); | |
+#undef GET_ATTR | |
+ capabilities += "\n"; | |
+ } | |
+ | |
+ return capabilities; | |
+} | |
+ | |
+CCL_NAMESPACE_END | |
diff -Naur a/intern/cycles/device/device_split_kernel.cpp b/intern/cycles/device/device_split_kernel.cpp | |
--- a/intern/cycles/device/device_split_kernel.cpp 2020-01-10 20:37:06.000000000 +0300 | |
+++ b/intern/cycles/device/device_split_kernel.cpp 2020-01-10 20:42:43.460923388 +0300 | |
@@ -55,6 +55,10 @@ | |
kernel_next_iteration_setup = NULL; | |
kernel_indirect_subsurface = NULL; | |
kernel_buffer_update = NULL; | |
+ kernel_adaptive_stopping = NULL; | |
+ kernel_adaptive_filter_x = NULL; | |
+ kernel_adaptive_filter_y = NULL; | |
+ kernel_adaptive_adjust_samples = NULL; | |
} | |
DeviceSplitKernel::~DeviceSplitKernel() | |
@@ -83,6 +87,10 @@ | |
delete kernel_next_iteration_setup; | |
delete kernel_indirect_subsurface; | |
delete kernel_buffer_update; | |
+ delete kernel_adaptive_stopping; | |
+ delete kernel_adaptive_filter_x; | |
+ delete kernel_adaptive_filter_y; | |
+ delete kernel_adaptive_adjust_samples; | |
} | |
bool DeviceSplitKernel::load_kernels(const DeviceRequestedFeatures &requested_features) | |
@@ -114,6 +122,10 @@ | |
LOAD_KERNEL(next_iteration_setup); | |
LOAD_KERNEL(indirect_subsurface); | |
LOAD_KERNEL(buffer_update); | |
+ LOAD_KERNEL(adaptive_stopping); | |
+ LOAD_KERNEL(adaptive_filter_x); | |
+ LOAD_KERNEL(adaptive_filter_y); | |
+ LOAD_KERNEL(adaptive_adjust_samples); | |
#undef LOAD_KERNEL | |
@@ -208,6 +220,19 @@ | |
RenderTile subtile = tile; | |
subtile.start_sample = tile.sample; | |
+ | |
+ if (task->integrator_adaptive) { | |
+ int step_samples = subtile.start_sample % 4; | |
+ /* Round so that we end up on multiples of four for adaptive sampling. */ | |
+ if (step_samples == 3) { | |
+ step_samples = 2; | |
+ } | |
+ else if (step_samples > 4) { | |
+ step_samples &= 0xfffffffc; | |
+ } | |
+ samples_per_second = max(1, step_samples - (subtile.start_sample % 4)); | |
+ } | |
+ | |
subtile.num_samples = min(samples_per_second, | |
tile.start_sample + tile.num_samples - tile.sample); | |
@@ -302,6 +327,22 @@ | |
} | |
} | |
+ if (task->integrator_adaptive && ((tile.sample + subtile.num_samples - 1) & 3) == 3) { | |
+ size_t buffer_size[2]; | |
+ buffer_size[0] = round_up(tile.w, local_size[0]); | |
+ buffer_size[1] = round_up(tile.h, local_size[1]); | |
+ kernel_adaptive_stopping->enqueue( | |
+ KernelDimensions(buffer_size, local_size), kgbuffer, kernel_data); | |
+ buffer_size[0] = round_up(tile.h, local_size[0]); | |
+ buffer_size[1] = round_up(1, local_size[1]); | |
+ kernel_adaptive_filter_x->enqueue( | |
+ KernelDimensions(buffer_size, local_size), kgbuffer, kernel_data); | |
+ buffer_size[0] = round_up(tile.w, local_size[0]); | |
+ buffer_size[1] = round_up(1, local_size[1]); | |
+ kernel_adaptive_filter_y->enqueue( | |
+ KernelDimensions(buffer_size, local_size), kgbuffer, kernel_data); | |
+ } | |
+ | |
double time_per_sample = ((time_dt() - start_time) / subtile.num_samples); | |
if (avg_time_per_sample == 0.0) { | |
@@ -324,6 +365,28 @@ | |
} | |
} | |
+ if (task->integrator_adaptive) { | |
+ /* Reset the start samples. */ | |
+ RenderTile subtile = tile; | |
+ subtile.start_sample = tile.start_sample; | |
+ subtile.num_samples = tile.sample - tile.start_sample; | |
+ enqueue_split_kernel_data_init(KernelDimensions(global_size, local_size), | |
+ subtile, | |
+ num_global_elements, | |
+ kgbuffer, | |
+ kernel_data, | |
+ split_data, | |
+ ray_state, | |
+ queue_index, | |
+ use_queues_flag, | |
+ work_pool_wgs); | |
+ size_t buffer_size[2]; | |
+ buffer_size[0] = round_up(tile.w, local_size[0]); | |
+ buffer_size[1] = round_up(tile.h, local_size[1]); | |
+ kernel_adaptive_adjust_samples->enqueue( | |
+ KernelDimensions(buffer_size, local_size), kgbuffer, kernel_data); | |
+ } | |
+ | |
return true; | |
} | |
diff -Naur a/intern/cycles/device/device_split_kernel.h b/intern/cycles/device/device_split_kernel.h | |
--- a/intern/cycles/device/device_split_kernel.h 2020-01-10 20:37:06.000000000 +0300 | |
+++ b/intern/cycles/device/device_split_kernel.h 2020-01-10 20:42:43.460923388 +0300 | |
@@ -75,6 +75,10 @@ | |
SplitKernelFunction *kernel_next_iteration_setup; | |
SplitKernelFunction *kernel_indirect_subsurface; | |
SplitKernelFunction *kernel_buffer_update; | |
+ SplitKernelFunction *kernel_adaptive_stopping; | |
+ SplitKernelFunction *kernel_adaptive_filter_x; | |
+ SplitKernelFunction *kernel_adaptive_filter_y; | |
+ SplitKernelFunction *kernel_adaptive_adjust_samples; | |
/* Global memory variables [porting]; These memory is used for | |
* co-operation between different kernels; Data written by one | |
diff -Naur a/intern/cycles/device/device_task.h b/intern/cycles/device/device_task.h | |
--- a/intern/cycles/device/device_task.h 2020-01-10 20:37:06.000000000 +0300 | |
+++ b/intern/cycles/device/device_task.h 2020-01-10 20:42:43.460923388 +0300 | |
@@ -114,6 +114,7 @@ | |
bool need_finish_queue; | |
bool integrator_branched; | |
+ bool integrator_adaptive; | |
int2 requested_tile_size; | |
protected: | |
diff -Naur a/intern/cycles/device/device_task.h.orig b/intern/cycles/device/device_task.h.orig | |
--- a/intern/cycles/device/device_task.h.orig 1970-01-01 03:00:00.000000000 +0300 | |
+++ b/intern/cycles/device/device_task.h.orig 2020-01-10 20:37:06.000000000 +0300 | |
@@ -0,0 +1,125 @@ | |
+/* | |
+ * Copyright 2011-2013 Blender Foundation | |
+ * | |
+ * Licensed under the Apache License, Version 2.0 (the "License"); | |
+ * you may not use this file except in compliance with the License. | |
+ * You may obtain a copy of the License at | |
+ * | |
+ * http://www.apache.org/licenses/LICENSE-2.0 | |
+ * | |
+ * Unless required by applicable law or agreed to in writing, software | |
+ * distributed under the License is distributed on an "AS IS" BASIS, | |
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
+ * See the License for the specific language governing permissions and | |
+ * limitations under the License. | |
+ */ | |
+ | |
+#ifndef __DEVICE_TASK_H__ | |
+#define __DEVICE_TASK_H__ | |
+ | |
+#include "device/device_memory.h" | |
+ | |
+#include "util/util_function.h" | |
+#include "util/util_list.h" | |
+#include "util/util_task.h" | |
+ | |
+CCL_NAMESPACE_BEGIN | |
+ | |
+/* Device Task */ | |
+ | |
+class Device; | |
+class RenderBuffers; | |
+class RenderTile; | |
+class Tile; | |
+ | |
+class DenoiseParams { | |
+ public: | |
+ /* Pixel radius for neighboring pixels to take into account. */ | |
+ int radius; | |
+ /* Controls neighbor pixel weighting for the denoising filter. */ | |
+ float strength; | |
+ /* Preserve more or less detail based on feature passes. */ | |
+ float feature_strength; | |
+ /* When removing pixels that don't carry information, | |
+ * use a relative threshold instead of an absolute one. */ | |
+ bool relative_pca; | |
+ /* How many frames before and after the current center frame are included. */ | |
+ int neighbor_frames; | |
+ /* Clamp the input to the range of +-1e8. Should be enough for any legitimate data. */ | |
+ bool clamp_input; | |
+ /* Controls which passes the OptiX AI denoiser should use as input. */ | |
+ int optix_input_passes; | |
+ | |
+ DenoiseParams() | |
+ { | |
+ radius = 8; | |
+ strength = 0.5f; | |
+ feature_strength = 0.5f; | |
+ relative_pca = false; | |
+ neighbor_frames = 2; | |
+ clamp_input = true; | |
+ optix_input_passes = 1; | |
+ } | |
+}; | |
+ | |
+class DeviceTask : public Task { | |
+ public: | |
+ typedef enum { RENDER, FILM_CONVERT, SHADER } Type; | |
+ Type type; | |
+ | |
+ int x, y, w, h; | |
+ device_ptr rgba_byte; | |
+ device_ptr rgba_half; | |
+ device_ptr buffer; | |
+ int sample; | |
+ int num_samples; | |
+ int offset, stride; | |
+ | |
+ device_ptr shader_input; | |
+ device_ptr shader_output; | |
+ int shader_eval_type; | |
+ int shader_filter; | |
+ int shader_x, shader_w; | |
+ | |
+ int passes_size; | |
+ | |
+ explicit DeviceTask(Type type = RENDER); | |
+ | |
+ int get_subtask_count(int num, int max_size = 0); | |
+ void split(list<DeviceTask> &tasks, int num, int max_size = 0); | |
+ | |
+ void update_progress(RenderTile *rtile, int pixel_samples = -1); | |
+ | |
+ function<bool(Device *device, RenderTile &)> acquire_tile; | |
+ function<void(long, int)> update_progress_sample; | |
+ function<void(RenderTile &)> update_tile_sample; | |
+ function<void(RenderTile &)> release_tile; | |
+ function<bool()> get_cancel; | |
+ function<void(RenderTile *, Device *)> map_neighbor_tiles; | |
+ function<void(RenderTile *, Device *)> unmap_neighbor_tiles; | |
+ | |
+ DenoiseParams denoising; | |
+ bool denoising_from_render; | |
+ vector<int> denoising_frames; | |
+ | |
+ bool denoising_do_filter; | |
+ bool denoising_use_optix; | |
+ bool denoising_write_passes; | |
+ | |
+ int pass_stride; | |
+ int frame_stride; | |
+ int target_pass_stride; | |
+ int pass_denoising_data; | |
+ int pass_denoising_clean; | |
+ | |
+ bool need_finish_queue; | |
+ bool integrator_branched; | |
+ int2 requested_tile_size; | |
+ | |
+ protected: | |
+ double last_update_time; | |
+}; | |
+ | |
+CCL_NAMESPACE_END | |
+ | |
+#endif /* __DEVICE_TASK_H__ */ | |
diff -Naur a/intern/cycles/device/opencl/opencl.h b/intern/cycles/device/opencl/opencl.h | |
--- a/intern/cycles/device/opencl/opencl.h 2020-01-10 20:37:06.000000000 +0300 | |
+++ b/intern/cycles/device/opencl/opencl.h 2020-01-10 20:42:43.460923388 +0300 | |
@@ -445,6 +445,7 @@ | |
device_ptr rgba_byte, | |
device_ptr rgba_half); | |
void shader(DeviceTask &task); | |
+ void update_adaptive(DeviceTask &task, RenderTile &tile, int sample); | |
void denoise(RenderTile &tile, DenoisingTask &denoising); | |
diff -Naur a/intern/cycles/device/opencl/opencl.h.orig b/intern/cycles/device/opencl/opencl.h.orig | |
--- a/intern/cycles/device/opencl/opencl.h.orig 1970-01-01 03:00:00.000000000 +0300 | |
+++ b/intern/cycles/device/opencl/opencl.h.orig 2020-01-10 20:37:06.000000000 +0300 | |
@@ -0,0 +1,656 @@ | |
+/* | |
+ * Copyright 2011-2013 Blender Foundation | |
+ * | |
+ * Licensed under the Apache License, Version 2.0 (the "License"); | |
+ * you may not use this file except in compliance with the License. | |
+ * You may obtain a copy of the License at | |
+ * | |
+ * http://www.apache.org/licenses/LICENSE-2.0 | |
+ * | |
+ * Unless required by applicable law or agreed to in writing, software | |
+ * distributed under the License is distributed on an "AS IS" BASIS, | |
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
+ * See the License for the specific language governing permissions and | |
+ * limitations under the License. | |
+ */ | |
+ | |
+#ifdef WITH_OPENCL | |
+ | |
+# include "device/device.h" | |
+# include "device/device_denoising.h" | |
+# include "device/device_split_kernel.h" | |
+ | |
+# include "util/util_map.h" | |
+# include "util/util_param.h" | |
+# include "util/util_string.h" | |
+ | |
+# include "clew.h" | |
+ | |
+# include "device/opencl/memory_manager.h" | |
+ | |
+CCL_NAMESPACE_BEGIN | |
+ | |
+/* Disable workarounds, seems to be working fine on latest drivers. */ | |
+# define CYCLES_DISABLE_DRIVER_WORKAROUNDS | |
+ | |
+/* Define CYCLES_DISABLE_DRIVER_WORKAROUNDS to disable workaounds for testing */ | |
+# ifndef CYCLES_DISABLE_DRIVER_WORKAROUNDS | |
+/* Work around AMD driver hangs by ensuring each command is finished before doing anything else. */ | |
+# undef clEnqueueNDRangeKernel | |
+# define clEnqueueNDRangeKernel(a, b, c, d, e, f, g, h, i) \ | |
+ CLEW_GET_FUN(__clewEnqueueNDRangeKernel)(a, b, c, d, e, f, g, h, i); \ | |
+ clFinish(a); | |
+ | |
+# undef clEnqueueWriteBuffer | |
+# define clEnqueueWriteBuffer(a, b, c, d, e, f, g, h, i) \ | |
+ CLEW_GET_FUN(__clewEnqueueWriteBuffer)(a, b, c, d, e, f, g, h, i); \ | |
+ clFinish(a); | |
+ | |
+# undef clEnqueueReadBuffer | |
+# define clEnqueueReadBuffer(a, b, c, d, e, f, g, h, i) \ | |
+ CLEW_GET_FUN(__clewEnqueueReadBuffer)(a, b, c, d, e, f, g, h, i); \ | |
+ clFinish(a); | |
+# endif /* CYCLES_DISABLE_DRIVER_WORKAROUNDS */ | |
+ | |
+# define CL_MEM_PTR(p) ((cl_mem)(uintptr_t)(p)) | |
+ | |
+struct OpenCLPlatformDevice { | |
+ OpenCLPlatformDevice(cl_platform_id platform_id, | |
+ const string &platform_name, | |
+ cl_device_id device_id, | |
+ cl_device_type device_type, | |
+ const string &device_name, | |
+ const string &hardware_id, | |
+ const string &device_extensions) | |
+ : platform_id(platform_id), | |
+ platform_name(platform_name), | |
+ device_id(device_id), | |
+ device_type(device_type), | |
+ device_name(device_name), | |
+ hardware_id(hardware_id), | |
+ device_extensions(device_extensions) | |
+ { | |
+ } | |
+ cl_platform_id platform_id; | |
+ string platform_name; | |
+ cl_device_id device_id; | |
+ cl_device_type device_type; | |
+ string device_name; | |
+ string hardware_id; | |
+ string device_extensions; | |
+}; | |
+ | |
+/* Contains all static OpenCL helper functions. */ | |
+class OpenCLInfo { | |
+ public: | |
+ static cl_device_type device_type(); | |
+ static bool use_debug(); | |
+ static bool device_supported(const string &platform_name, const cl_device_id device_id); | |
+ static bool platform_version_check(cl_platform_id platform, string *error = NULL); | |
+ static bool device_version_check(cl_device_id device, string *error = NULL); | |
+ static string get_hardware_id(const string &platform_name, cl_device_id device_id); | |
+ static void get_usable_devices(vector<OpenCLPlatformDevice> *usable_devices, | |
+ bool force_all = false); | |
+ | |
+ /* ** Some handy shortcuts to low level cl*GetInfo() functions. ** */ | |
+ | |
+ /* Platform information. */ | |
+ static bool get_num_platforms(cl_uint *num_platforms, cl_int *error = NULL); | |
+ static cl_uint get_num_platforms(); | |
+ | |
+ static bool get_platforms(vector<cl_platform_id> *platform_ids, cl_int *error = NULL); | |
+ static vector<cl_platform_id> get_platforms(); | |
+ | |
+ static bool get_platform_name(cl_platform_id platform_id, string *platform_name); | |
+ static string get_platform_name(cl_platform_id platform_id); | |
+ | |
+ static bool get_num_platform_devices(cl_platform_id platform_id, | |
+ cl_device_type device_type, | |
+ cl_uint *num_devices, | |
+ cl_int *error = NULL); | |
+ static cl_uint get_num_platform_devices(cl_platform_id platform_id, cl_device_type device_type); | |
+ | |
+ static bool get_platform_devices(cl_platform_id platform_id, | |
+ cl_device_type device_type, | |
+ vector<cl_device_id> *device_ids, | |
+ cl_int *error = NULL); | |
+ static vector<cl_device_id> get_platform_devices(cl_platform_id platform_id, | |
+ cl_device_type device_type); | |
+ | |
+ /* Device information. */ | |
+ static bool get_device_name(cl_device_id device_id, string *device_name, cl_int *error = NULL); | |
+ | |
+ static string get_device_name(cl_device_id device_id); | |
+ | |
+ static bool get_device_extensions(cl_device_id device_id, | |
+ string *device_extensions, | |
+ cl_int *error = NULL); | |
+ | |
+ static string get_device_extensions(cl_device_id device_id); | |
+ | |
+ static bool get_device_type(cl_device_id device_id, | |
+ cl_device_type *device_type, | |
+ cl_int *error = NULL); | |
+ static cl_device_type get_device_type(cl_device_id device_id); | |
+ | |
+ static bool get_driver_version(cl_device_id device_id, | |
+ int *major, | |
+ int *minor, | |
+ cl_int *error = NULL); | |
+ | |
+ static int mem_sub_ptr_alignment(cl_device_id device_id); | |
+ | |
+ /* Get somewhat more readable device name. | |
+ * Main difference is AMD OpenCL here which only gives code name | |
+ * for the regular device name. This will give more sane device | |
+ * name using some extensions. | |
+ */ | |
+ static string get_readable_device_name(cl_device_id device_id); | |
+}; | |
+ | |
+/* Thread safe cache for contexts and programs. | |
+ */ | |
+class OpenCLCache { | |
+ struct Slot { | |
+ struct ProgramEntry { | |
+ ProgramEntry(); | |
+ ProgramEntry(const ProgramEntry &rhs); | |
+ ~ProgramEntry(); | |
+ cl_program program; | |
+ thread_mutex *mutex; | |
+ }; | |
+ | |
+ Slot(); | |
+ Slot(const Slot &rhs); | |
+ ~Slot(); | |
+ | |
+ thread_mutex *context_mutex; | |
+ cl_context context; | |
+ typedef map<ustring, ProgramEntry> EntryMap; | |
+ EntryMap programs; | |
+ }; | |
+ | |
+ /* key is combination of platform ID and device ID */ | |
+ typedef pair<cl_platform_id, cl_device_id> PlatformDevicePair; | |
+ | |
+ /* map of Slot objects */ | |
+ typedef map<PlatformDevicePair, Slot> CacheMap; | |
+ CacheMap cache; | |
+ | |
+ /* MD5 hash of the kernel source. */ | |
+ string kernel_md5; | |
+ | |
+ thread_mutex cache_lock; | |
+ thread_mutex kernel_md5_lock; | |
+ | |
+ /* lazy instantiate */ | |
+ static OpenCLCache &global_instance(); | |
+ | |
+ public: | |
+ enum ProgramName { | |
+ OCL_DEV_BASE_PROGRAM, | |
+ OCL_DEV_MEGAKERNEL_PROGRAM, | |
+ }; | |
+ | |
+ /* Lookup context in the cache. If this returns NULL, slot_locker | |
+ * will be holding a lock for the cache. slot_locker should refer to a | |
+ * default constructed thread_scoped_lock. */ | |
+ static cl_context get_context(cl_platform_id platform, | |
+ cl_device_id device, | |
+ thread_scoped_lock &slot_locker); | |
+ /* Same as above. */ | |
+ static cl_program get_program(cl_platform_id platform, | |
+ cl_device_id device, | |
+ ustring key, | |
+ thread_scoped_lock &slot_locker); | |
+ | |
+ /* Store context in the cache. You MUST have tried to get the item before storing to it. */ | |
+ static void store_context(cl_platform_id platform, | |
+ cl_device_id device, | |
+ cl_context context, | |
+ thread_scoped_lock &slot_locker); | |
+ /* Same as above. */ | |
+ static void store_program(cl_platform_id platform, | |
+ cl_device_id device, | |
+ cl_program program, | |
+ ustring key, | |
+ thread_scoped_lock &slot_locker); | |
+ | |
+ static string get_kernel_md5(); | |
+}; | |
+ | |
+# define opencl_device_assert(device, stmt) \ | |
+ { \ | |
+ cl_int err = stmt; \ | |
+\ | |
+ if (err != CL_SUCCESS) { \ | |
+ string message = string_printf( \ | |
+ "OpenCL error: %s in %s (%s:%d)", clewErrorString(err), #stmt, __FILE__, __LINE__); \ | |
+ if ((device)->error_message() == "") \ | |
+ (device)->set_error(message); \ | |
+ fprintf(stderr, "%s\n", message.c_str()); \ | |
+ } \ | |
+ } \ | |
+ (void)0 | |
+ | |
+# define opencl_assert(stmt) \ | |
+ { \ | |
+ cl_int err = stmt; \ | |
+\ | |
+ if (err != CL_SUCCESS) { \ | |
+ string message = string_printf( \ | |
+ "OpenCL error: %s in %s (%s:%d)", clewErrorString(err), #stmt, __FILE__, __LINE__); \ | |
+ if (error_msg == "") \ | |
+ error_msg = message; \ | |
+ fprintf(stderr, "%s\n", message.c_str()); \ | |
+ } \ | |
+ } \ | |
+ (void)0 | |
+ | |
+class OpenCLDevice : public Device { | |
+ public: | |
+ DedicatedTaskPool task_pool; | |
+ | |
+ /* Task pool for required kernels (base, AO kernels during foreground rendering) */ | |
+ TaskPool load_required_kernel_task_pool; | |
+ /* Task pool for optional kernels (feature kernels during foreground rendering) */ | |
+ TaskPool load_kernel_task_pool; | |
+ cl_context cxContext; | |
+ cl_command_queue cqCommandQueue; | |
+ cl_platform_id cpPlatform; | |
+ cl_device_id cdDevice; | |
+ cl_int ciErr; | |
+ int device_num; | |
+ bool use_preview_kernels; | |
+ | |
+ class OpenCLProgram { | |
+ public: | |
+ OpenCLProgram() : loaded(false), needs_compiling(true), program(NULL), device(NULL) | |
+ { | |
+ } | |
+ OpenCLProgram(OpenCLDevice *device, | |
+ const string &program_name, | |
+ const string &kernel_name, | |
+ const string &kernel_build_options, | |
+ bool use_stdout = true); | |
+ ~OpenCLProgram(); | |
+ | |
+ void add_kernel(ustring name); | |
+ | |
+ /* Try to load the program from device cache or disk */ | |
+ bool load(); | |
+ /* Compile the kernel (first separate, failback to local) */ | |
+ void compile(); | |
+ /* Create the OpenCL kernels after loading or compiling */ | |
+ void create_kernels(); | |
+ | |
+ bool is_loaded() const | |
+ { | |
+ return loaded; | |
+ } | |
+ const string &get_log() const | |
+ { | |
+ return log; | |
+ } | |
+ void report_error(); | |
+ | |
+ /* Wait until this kernel is available to be used | |
+ * It will return true when the kernel is available. | |
+ * It will return false when the kernel is not available | |
+ * or could not be loaded. */ | |
+ bool wait_for_availability(); | |
+ | |
+ cl_kernel operator()(); | |
+ cl_kernel operator()(ustring name); | |
+ | |
+ void release(); | |
+ | |
+ private: | |
+ bool build_kernel(const string *debug_src); | |
+ /* Build the program by calling the own process. | |
+ * This is required for multithreaded OpenCL compilation, since most Frameworks serialize | |
+ * build calls internally if they come from the same process. | |
+ * If that is not supported, this function just returns false. | |
+ */ | |
+ bool compile_separate(const string &clbin); | |
+ /* Build the program by calling OpenCL directly. */ | |
+ bool compile_kernel(const string *debug_src); | |
+ /* Loading and saving the program from/to disk. */ | |
+ bool load_binary(const string &clbin, const string *debug_src = NULL); | |
+ bool save_binary(const string &clbin); | |
+ | |
+ void add_log(const string &msg, bool is_debug); | |
+ void add_error(const string &msg); | |
+ | |
+ bool loaded; | |
+ bool needs_compiling; | |
+ | |
+ cl_program program; | |
+ OpenCLDevice *device; | |
+ | |
+ /* Used for the OpenCLCache key. */ | |
+ string program_name; | |
+ | |
+ string kernel_file, kernel_build_options, device_md5; | |
+ | |
+ bool use_stdout; | |
+ string log, error_msg; | |
+ string compile_output; | |
+ | |
+ map<ustring, cl_kernel> kernels; | |
+ }; | |
+ | |
+ /* Container for all types of split programs. */ | |
+ class OpenCLSplitPrograms { | |
+ public: | |
+ OpenCLDevice *device; | |
+ OpenCLProgram program_split; | |
+ OpenCLProgram program_lamp_emission; | |
+ OpenCLProgram program_do_volume; | |
+ OpenCLProgram program_indirect_background; | |
+ OpenCLProgram program_shader_eval; | |
+ OpenCLProgram program_holdout_emission_blurring_pathtermination_ao; | |
+ OpenCLProgram program_subsurface_scatter; | |
+ OpenCLProgram program_direct_lighting; | |
+ OpenCLProgram program_shadow_blocked_ao; | |
+ OpenCLProgram program_shadow_blocked_dl; | |
+ | |
+ OpenCLSplitPrograms(OpenCLDevice *device); | |
+ ~OpenCLSplitPrograms(); | |
+ | |
+ /* Load the kernels and put the created kernels in the given | |
+ * `programs` parameter. */ | |
+ void load_kernels(vector<OpenCLProgram *> &programs, | |
+ const DeviceRequestedFeatures &requested_features, | |
+ bool is_preview = false); | |
+ }; | |
+ | |
+ DeviceSplitKernel *split_kernel; | |
+ | |
+ OpenCLProgram base_program; | |
+ OpenCLProgram bake_program; | |
+ OpenCLProgram displace_program; | |
+ OpenCLProgram background_program; | |
+ OpenCLProgram denoising_program; | |
+ | |
+ OpenCLSplitPrograms kernel_programs; | |
+ OpenCLSplitPrograms preview_programs; | |
+ | |
+ typedef map<string, device_vector<uchar> *> ConstMemMap; | |
+ typedef map<string, device_ptr> MemMap; | |
+ | |
+ ConstMemMap const_mem_map; | |
+ MemMap mem_map; | |
+ | |
+ bool device_initialized; | |
+ string platform_name; | |
+ string device_name; | |
+ | |
+ bool opencl_error(cl_int err); | |
+ void opencl_error(const string &message); | |
+ void opencl_assert_err(cl_int err, const char *where); | |
+ | |
+ OpenCLDevice(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background); | |
+ ~OpenCLDevice(); | |
+ | |
+ static void CL_CALLBACK context_notify_callback(const char *err_info, | |
+ const void * /*private_info*/, | |
+ size_t /*cb*/, | |
+ void *user_data); | |
+ | |
+ bool opencl_version_check(); | |
+ OpenCLSplitPrograms *get_split_programs(); | |
+ | |
+ string device_md5_hash(string kernel_custom_build_options = ""); | |
+ bool load_kernels(const DeviceRequestedFeatures &requested_features); | |
+ void load_required_kernels(const DeviceRequestedFeatures &requested_features); | |
+ void load_preview_kernels(); | |
+ | |
+ bool wait_for_availability(const DeviceRequestedFeatures &requested_features); | |
+ DeviceKernelStatus get_active_kernel_switch_state(); | |
+ | |
+ /* Get the name of the opencl program for the given kernel */ | |
+ const string get_opencl_program_name(const string &kernel_name); | |
+ /* Get the program file name to compile (*.cl) for the given kernel */ | |
+ const string get_opencl_program_filename(const string &kernel_name); | |
+ string get_build_options(const DeviceRequestedFeatures &requested_features, | |
+ const string &opencl_program_name, | |
+ bool preview_kernel = false); | |
+ /* Enable the default features to reduce recompilation events */ | |
+ void enable_default_features(DeviceRequestedFeatures &features); | |
+ | |
+ void mem_alloc(device_memory &mem); | |
+ void mem_copy_to(device_memory &mem); | |
+ void mem_copy_from(device_memory &mem, int y, int w, int h, int elem); | |
+ void mem_zero(device_memory &mem); | |
+ void mem_free(device_memory &mem); | |
+ | |
+ int mem_sub_ptr_alignment(); | |
+ | |
+ void const_copy_to(const char *name, void *host, size_t size); | |
+ void tex_alloc(device_memory &mem); | |
+ void tex_free(device_memory &mem); | |
+ | |
+ size_t global_size_round_up(int group_size, int global_size); | |
+ void enqueue_kernel(cl_kernel kernel, | |
+ size_t w, | |
+ size_t h, | |
+ bool x_workgroups = false, | |
+ size_t max_workgroup_size = -1); | |
+ void set_kernel_arg_mem(cl_kernel kernel, cl_uint *narg, const char *name); | |
+ void set_kernel_arg_buffers(cl_kernel kernel, cl_uint *narg); | |
+ | |
+ void film_convert(DeviceTask &task, | |
+ device_ptr buffer, | |
+ device_ptr rgba_byte, | |
+ device_ptr rgba_half); | |
+ void shader(DeviceTask &task); | |
+ | |
+ void denoise(RenderTile &tile, DenoisingTask &denoising); | |
+ | |
+ class OpenCLDeviceTask : public DeviceTask { | |
+ public: | |
+ OpenCLDeviceTask(OpenCLDevice *device, DeviceTask &task) : DeviceTask(task) | |
+ { | |
+ run = function_bind(&OpenCLDevice::thread_run, device, this); | |
+ } | |
+ }; | |
+ | |
+ int get_split_task_count(DeviceTask & /*task*/) | |
+ { | |
+ return 1; | |
+ } | |
+ | |
+ void task_add(DeviceTask &task) | |
+ { | |
+ task_pool.push(new OpenCLDeviceTask(this, task)); | |
+ } | |
+ | |
+ void task_wait() | |
+ { | |
+ task_pool.wait(); | |
+ } | |
+ | |
+ void task_cancel() | |
+ { | |
+ task_pool.cancel(); | |
+ } | |
+ | |
+ void thread_run(DeviceTask *task); | |
+ | |
+ virtual BVHLayoutMask get_bvh_layout_mask() const | |
+ { | |
+ return BVH_LAYOUT_BVH2; | |
+ } | |
+ | |
+ virtual bool show_samples() const | |
+ { | |
+ return true; | |
+ } | |
+ | |
+ protected: | |
+ string kernel_build_options(const string *debug_src = NULL); | |
+ | |
+ void mem_zero_kernel(device_ptr ptr, size_t size); | |
+ | |
+ bool denoising_non_local_means(device_ptr image_ptr, | |
+ device_ptr guide_ptr, | |
+ device_ptr variance_ptr, | |
+ device_ptr out_ptr, | |
+ DenoisingTask *task); | |
+ bool denoising_construct_transform(DenoisingTask *task); | |
+ bool denoising_accumulate(device_ptr color_ptr, | |
+ device_ptr color_variance_ptr, | |
+ device_ptr scale_ptr, | |
+ int frame, | |
+ DenoisingTask *task); | |
+ bool denoising_solve(device_ptr output_ptr, DenoisingTask *task); | |
+ bool denoising_combine_halves(device_ptr a_ptr, | |
+ device_ptr b_ptr, | |
+ device_ptr mean_ptr, | |
+ device_ptr variance_ptr, | |
+ int r, | |
+ int4 rect, | |
+ DenoisingTask *task); | |
+ bool denoising_divide_shadow(device_ptr a_ptr, | |
+ device_ptr b_ptr, | |
+ device_ptr sample_variance_ptr, | |
+ device_ptr sv_variance_ptr, | |
+ device_ptr buffer_variance_ptr, | |
+ DenoisingTask *task); | |
+ bool denoising_get_feature(int mean_offset, | |
+ int variance_offset, | |
+ device_ptr mean_ptr, | |
+ device_ptr variance_ptr, | |
+ float scale, | |
+ DenoisingTask *task); | |
+ bool denoising_write_feature(int to_offset, | |
+ device_ptr from_ptr, | |
+ device_ptr buffer_ptr, | |
+ DenoisingTask *task); | |
+ bool denoising_detect_outliers(device_ptr image_ptr, | |
+ device_ptr variance_ptr, | |
+ device_ptr depth_ptr, | |
+ device_ptr output_ptr, | |
+ DenoisingTask *task); | |
+ | |
+ device_ptr mem_alloc_sub_ptr(device_memory &mem, int offset, int size); | |
+ void mem_free_sub_ptr(device_ptr ptr); | |
+ | |
+ class ArgumentWrapper { | |
+ public: | |
+ ArgumentWrapper() : size(0), pointer(NULL) | |
+ { | |
+ } | |
+ | |
+ ArgumentWrapper(device_memory &argument) | |
+ : size(sizeof(void *)), pointer((void *)(&argument.device_pointer)) | |
+ { | |
+ } | |
+ | |
+ template<typename T> | |
+ ArgumentWrapper(device_vector<T> &argument) | |
+ : size(sizeof(void *)), pointer((void *)(&argument.device_pointer)) | |
+ { | |
+ } | |
+ | |
+ template<typename T> | |
+ ArgumentWrapper(device_only_memory<T> &argument) | |
+ : size(sizeof(void *)), pointer((void *)(&argument.device_pointer)) | |
+ { | |
+ } | |
+ template<typename T> ArgumentWrapper(T &argument) : size(sizeof(argument)), pointer(&argument) | |
+ { | |
+ } | |
+ | |
+ ArgumentWrapper(int argument) : size(sizeof(int)), int_value(argument), pointer(&int_value) | |
+ { | |
+ } | |
+ | |
+ ArgumentWrapper(float argument) | |
+ : size(sizeof(float)), float_value(argument), pointer(&float_value) | |
+ { | |
+ } | |
+ | |
+ size_t size; | |
+ int int_value; | |
+ float float_value; | |
+ void *pointer; | |
+ }; | |
+ | |
+ /* TODO(sergey): In the future we can use variadic templates, once | |
+ * C++0x is allowed. Should allow to clean this up a bit. | |
+ */ | |
+ int kernel_set_args(cl_kernel kernel, | |
+ int start_argument_index, | |
+ const ArgumentWrapper &arg1 = ArgumentWrapper(), | |
+ const ArgumentWrapper &arg2 = ArgumentWrapper(), | |
+ const ArgumentWrapper &arg3 = ArgumentWrapper(), | |
+ const ArgumentWrapper &arg4 = ArgumentWrapper(), | |
+ const ArgumentWrapper &arg5 = ArgumentWrapper(), | |
+ const ArgumentWrapper &arg6 = ArgumentWrapper(), | |
+ const ArgumentWrapper &arg7 = ArgumentWrapper(), | |
+ const ArgumentWrapper &arg8 = ArgumentWrapper(), | |
+ const ArgumentWrapper &arg9 = ArgumentWrapper(), | |
+ const ArgumentWrapper &arg10 = ArgumentWrapper(), | |
+ const ArgumentWrapper &arg11 = ArgumentWrapper(), | |
+ const ArgumentWrapper &arg12 = ArgumentWrapper(), | |
+ const ArgumentWrapper &arg13 = ArgumentWrapper(), | |
+ const ArgumentWrapper &arg14 = ArgumentWrapper(), | |
+ const ArgumentWrapper &arg15 = ArgumentWrapper(), | |
+ const ArgumentWrapper &arg16 = ArgumentWrapper(), | |
+ const ArgumentWrapper &arg17 = ArgumentWrapper(), | |
+ const ArgumentWrapper &arg18 = ArgumentWrapper(), | |
+ const ArgumentWrapper &arg19 = ArgumentWrapper(), | |
+ const ArgumentWrapper &arg20 = ArgumentWrapper(), | |
+ const ArgumentWrapper &arg21 = ArgumentWrapper(), | |
+ const ArgumentWrapper &arg22 = ArgumentWrapper(), | |
+ const ArgumentWrapper &arg23 = ArgumentWrapper(), | |
+ const ArgumentWrapper &arg24 = ArgumentWrapper(), | |
+ const ArgumentWrapper &arg25 = ArgumentWrapper(), | |
+ const ArgumentWrapper &arg26 = ArgumentWrapper(), | |
+ const ArgumentWrapper &arg27 = ArgumentWrapper(), | |
+ const ArgumentWrapper &arg28 = ArgumentWrapper(), | |
+ const ArgumentWrapper &arg29 = ArgumentWrapper(), | |
+ const ArgumentWrapper &arg30 = ArgumentWrapper(), | |
+ const ArgumentWrapper &arg31 = ArgumentWrapper(), | |
+ const ArgumentWrapper &arg32 = ArgumentWrapper(), | |
+ const ArgumentWrapper &arg33 = ArgumentWrapper()); | |
+ | |
+ void release_kernel_safe(cl_kernel kernel); | |
+ void release_mem_object_safe(cl_mem mem); | |
+ void release_program_safe(cl_program program); | |
+ | |
+ /* ** Those guys are for workign around some compiler-specific bugs ** */ | |
+ | |
+ cl_program load_cached_kernel(ustring key, thread_scoped_lock &cache_locker); | |
+ | |
+ void store_cached_kernel(cl_program program, ustring key, thread_scoped_lock &cache_locker); | |
+ | |
+ private: | |
+ MemoryManager memory_manager; | |
+ friend class MemoryManager; | |
+ | |
+ static_assert_align(TextureInfo, 16); | |
+ device_vector<TextureInfo> texture_info; | |
+ | |
+ typedef map<string, device_memory *> TexturesMap; | |
+ TexturesMap textures; | |
+ | |
+ bool textures_need_update; | |
+ | |
+ protected: | |
+ void flush_texture_buffers(); | |
+ | |
+ friend class OpenCLSplitKernel; | |
+ friend class OpenCLSplitKernelFunction; | |
+}; | |
+ | |
+Device *opencl_create_split_device(DeviceInfo &info, | |
+ Stats &stats, | |
+ Profiler &profiler, | |
+ bool background); | |
+ | |
+CCL_NAMESPACE_END | |
+ | |
+#endif | |
diff -Naur a/intern/cycles/device/opencl/opencl_split.cpp b/intern/cycles/device/opencl/opencl_split.cpp | |
--- a/intern/cycles/device/opencl/opencl_split.cpp 2020-01-10 20:37:06.000000000 +0300 | |
+++ b/intern/cycles/device/opencl/opencl_split.cpp 2020-01-10 20:42:43.460923388 +0300 | |
@@ -56,7 +56,11 @@ | |
"enqueue_inactive " | |
"next_iteration_setup " | |
"indirect_subsurface " | |
- "buffer_update"; | |
+ "buffer_update " | |
+ "adaptive_stopping " | |
+ "adaptive_filter_x " | |
+ "adaptive_filter_y " | |
+ "adaptive_adjust_samples"; | |
const string OpenCLDevice::get_opencl_program_name(const string &kernel_name) | |
{ | |
@@ -283,6 +287,10 @@ | |
ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(next_iteration_setup); | |
ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(indirect_subsurface); | |
ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(buffer_update); | |
+ ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(adaptive_stopping); | |
+ ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(adaptive_filter_x); | |
+ ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(adaptive_filter_y); | |
+ ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(adaptive_adjust_samples); | |
programs.push_back(&program_split); | |
# undef ADD_SPLIT_KERNEL_PROGRAM | |
diff -Naur a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt | |
--- a/intern/cycles/kernel/CMakeLists.txt 2020-01-10 20:37:06.000000000 +0300 | |
+++ b/intern/cycles/kernel/CMakeLists.txt 2020-01-10 20:42:43.460923388 +0300 | |
@@ -36,6 +36,10 @@ | |
) | |
set(SRC_OPENCL_KERNELS | |
+ kernels/opencl/kernel_adaptive_stopping.cl | |
+ kernels/opencl/kernel_adaptive_filter_x.cl | |
+ kernels/opencl/kernel_adaptive_filter_y.cl | |
+ kernels/opencl/kernel_adaptive_adjust_samples.cl | |
kernels/opencl/kernel_bake.cl | |
kernels/opencl/kernel_base.cl | |
kernels/opencl/kernel_displace.cl | |
@@ -94,6 +98,7 @@ | |
set(SRC_HEADERS | |
kernel_accumulate.h | |
+ kernel_adaptive_sampling.h | |
kernel_bake.h | |
kernel_camera.h | |
kernel_color.h | |
@@ -323,6 +328,10 @@ | |
) | |
set(SRC_SPLIT_HEADERS | |
+ split/kernel_adaptive_adjust_samples.h | |
+ split/kernel_adaptive_filter_x.h | |
+ split/kernel_adaptive_filter_y.h | |
+ split/kernel_adaptive_stopping.h | |
split/kernel_branched.h | |
split/kernel_buffer_update.h | |
split/kernel_data_init.h | |
diff -Naur a/intern/cycles/kernel/CMakeLists.txt.orig b/intern/cycles/kernel/CMakeLists.txt.orig | |
--- a/intern/cycles/kernel/CMakeLists.txt.orig 1970-01-01 03:00:00.000000000 +0300 | |
+++ b/intern/cycles/kernel/CMakeLists.txt.orig 2020-01-10 20:37:06.000000000 +0300 | |
@@ -0,0 +1,662 @@ | |
+remove_extra_strict_flags() | |
+ | |
+set(INC | |
+ .. | |
+) | |
+ | |
+set(INC_SYS | |
+ | |
+) | |
+ | |
+set(SRC_CPU_KERNELS | |
+ kernels/cpu/kernel.cpp | |
+ kernels/cpu/kernel_sse2.cpp | |
+ kernels/cpu/kernel_sse3.cpp | |
+ kernels/cpu/kernel_sse41.cpp | |
+ kernels/cpu/kernel_avx.cpp | |
+ kernels/cpu/kernel_avx2.cpp | |
+ kernels/cpu/kernel_split.cpp | |
+ kernels/cpu/kernel_split_sse2.cpp | |
+ kernels/cpu/kernel_split_sse3.cpp | |
+ kernels/cpu/kernel_split_sse41.cpp | |
+ kernels/cpu/kernel_split_avx.cpp | |
+ kernels/cpu/kernel_split_avx2.cpp | |
+ kernels/cpu/filter.cpp | |
+ kernels/cpu/filter_sse2.cpp | |
+ kernels/cpu/filter_sse3.cpp | |
+ kernels/cpu/filter_sse41.cpp | |
+ kernels/cpu/filter_avx.cpp | |
+ kernels/cpu/filter_avx2.cpp | |
+) | |
+ | |
+set(SRC_CUDA_KERNELS | |
+ kernels/cuda/kernel.cu | |
+ kernels/cuda/kernel_split.cu | |
+ kernels/cuda/filter.cu | |
+) | |
+ | |
+set(SRC_OPENCL_KERNELS | |
+ kernels/opencl/kernel_bake.cl | |
+ kernels/opencl/kernel_base.cl | |
+ kernels/opencl/kernel_displace.cl | |
+ kernels/opencl/kernel_background.cl | |
+ kernels/opencl/kernel_state_buffer_size.cl | |
+ kernels/opencl/kernel_split_bundle.cl | |
+ kernels/opencl/kernel_data_init.cl | |
+ kernels/opencl/kernel_path_init.cl | |
+ kernels/opencl/kernel_queue_enqueue.cl | |
+ kernels/opencl/kernel_scene_intersect.cl | |
+ kernels/opencl/kernel_lamp_emission.cl | |
+ kernels/opencl/kernel_do_volume.cl | |
+ kernels/opencl/kernel_indirect_background.cl | |
+ kernels/opencl/kernel_shader_setup.cl | |
+ kernels/opencl/kernel_shader_sort.cl | |
+ kernels/opencl/kernel_shader_eval.cl | |
+ kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl | |
+ kernels/opencl/kernel_subsurface_scatter.cl | |
+ kernels/opencl/kernel_direct_lighting.cl | |
+ kernels/opencl/kernel_shadow_blocked_ao.cl | |
+ kernels/opencl/kernel_shadow_blocked_dl.cl | |
+ kernels/opencl/kernel_enqueue_inactive.cl | |
+ kernels/opencl/kernel_next_iteration_setup.cl | |
+ kernels/opencl/kernel_indirect_subsurface.cl | |
+ kernels/opencl/kernel_buffer_update.cl | |
+ kernels/opencl/filter.cl | |
+) | |
+ | |
+set(SRC_OPTIX_KERNELS | |
+ kernels/optix/kernel_optix.cu | |
+) | |
+ | |
+set(SRC_BVH_HEADERS | |
+ bvh/bvh.h | |
+ bvh/bvh_nodes.h | |
+ bvh/bvh_shadow_all.h | |
+ bvh/bvh_local.h | |
+ bvh/bvh_traversal.h | |
+ bvh/bvh_types.h | |
+ bvh/bvh_volume.h | |
+ bvh/bvh_volume_all.h | |
+ bvh/qbvh_nodes.h | |
+ bvh/qbvh_shadow_all.h | |
+ bvh/qbvh_local.h | |
+ bvh/qbvh_traversal.h | |
+ bvh/qbvh_volume.h | |
+ bvh/qbvh_volume_all.h | |
+ bvh/obvh_nodes.h | |
+ bvh/obvh_shadow_all.h | |
+ bvh/obvh_local.h | |
+ bvh/obvh_traversal.h | |
+ bvh/obvh_volume.h | |
+ bvh/obvh_volume_all.h | |
+ bvh/bvh_embree.h | |
+) | |
+ | |
+set(SRC_HEADERS | |
+ kernel_accumulate.h | |
+ kernel_bake.h | |
+ kernel_camera.h | |
+ kernel_color.h | |
+ kernel_compat_cpu.h | |
+ kernel_compat_cuda.h | |
+ kernel_compat_optix.h | |
+ kernel_compat_opencl.h | |
+ kernel_differential.h | |
+ kernel_emission.h | |
+ kernel_film.h | |
+ kernel_globals.h | |
+ kernel_id_passes.h | |
+ kernel_jitter.h | |
+ kernel_light.h | |
+ kernel_math.h | |
+ kernel_montecarlo.h | |
+ kernel_passes.h | |
+ kernel_path.h | |
+ kernel_path_branched.h | |
+ kernel_path_common.h | |
+ kernel_path_state.h | |
+ kernel_path_surface.h | |
+ kernel_path_subsurface.h | |
+ kernel_path_volume.h | |
+ kernel_profiling.h | |
+ kernel_projection.h | |
+ kernel_queues.h | |
+ kernel_random.h | |
+ kernel_shader.h | |
+ kernel_shadow.h | |
+ kernel_subsurface.h | |
+ kernel_textures.h | |
+ kernel_types.h | |
+ kernel_volume.h | |
+ kernel_work_stealing.h | |
+ kernel_write_passes.h | |
+) | |
+ | |
+set(SRC_KERNELS_CPU_HEADERS | |
+ kernel.h | |
+ kernels/cpu/kernel_cpu.h | |
+ kernels/cpu/kernel_cpu_impl.h | |
+ kernels/cpu/kernel_cpu_image.h | |
+ kernels/cpu/filter_cpu.h | |
+ kernels/cpu/filter_cpu_impl.h | |
+) | |
+ | |
+set(SRC_KERNELS_CUDA_HEADERS | |
+ kernels/cuda/kernel_config.h | |
+ kernels/cuda/kernel_cuda_image.h | |
+) | |
+ | |
+set(SRC_KERNELS_OPTIX_HEADERS | |
+) | |
+ | |
+set(SRC_KERNELS_OPENCL_HEADERS | |
+ kernels/opencl/kernel_split_function.h | |
+ kernels/opencl/kernel_opencl_image.h | |
+) | |
+ | |
+set(SRC_CLOSURE_HEADERS | |
+ closure/alloc.h | |
+ closure/bsdf.h | |
+ closure/bsdf_ashikhmin_velvet.h | |
+ closure/bsdf_diffuse.h | |
+ closure/bsdf_diffuse_ramp.h | |
+ closure/bsdf_microfacet.h | |
+ closure/bsdf_microfacet_multi.h | |
+ closure/bsdf_microfacet_multi_impl.h | |
+ closure/bsdf_oren_nayar.h | |
+ closure/bsdf_phong_ramp.h | |
+ closure/bsdf_reflection.h | |
+ closure/bsdf_refraction.h | |
+ closure/bsdf_toon.h | |
+ closure/bsdf_transparent.h | |
+ closure/bsdf_util.h | |
+ closure/bsdf_ashikhmin_shirley.h | |
+ closure/bsdf_hair.h | |
+ closure/bssrdf.h | |
+ closure/emissive.h | |
+ closure/volume.h | |
+ closure/bsdf_principled_diffuse.h | |
+ closure/bsdf_principled_sheen.h | |
+ closure/bsdf_hair_principled.h | |
+) | |
+ | |
+set(SRC_SVM_HEADERS | |
+ svm/svm.h | |
+ svm/svm_ao.h | |
+ svm/svm_aov.h | |
+ svm/svm_attribute.h | |
+ svm/svm_bevel.h | |
+ svm/svm_blackbody.h | |
+ svm/svm_bump.h | |
+ svm/svm_camera.h | |
+ svm/svm_clamp.h | |
+ svm/svm_closure.h | |
+ svm/svm_convert.h | |
+ svm/svm_checker.h | |
+ svm/svm_color_util.h | |
+ svm/svm_brick.h | |
+ svm/svm_displace.h | |
+ svm/svm_fresnel.h | |
+ svm/svm_wireframe.h | |
+ svm/svm_wavelength.h | |
+ svm/svm_gamma.h | |
+ svm/svm_brightness.h | |
+ svm/svm_geometry.h | |
+ svm/svm_gradient.h | |
+ svm/svm_hsv.h | |
+ svm/svm_ies.h | |
+ svm/svm_image.h | |
+ svm/svm_invert.h | |
+ svm/svm_light_path.h | |
+ svm/svm_magic.h | |
+ svm/svm_map_range.h | |
+ svm/svm_mapping.h | |
+ svm/svm_mapping_util.h | |
+ svm/svm_math.h | |
+ svm/svm_math_util.h | |
+ svm/svm_mix.h | |
+ svm/svm_musgrave.h | |
+ svm/svm_noise.h | |
+ svm/svm_noisetex.h | |
+ svm/svm_normal.h | |
+ svm/svm_ramp.h | |
+ svm/svm_ramp_util.h | |
+ svm/svm_sepcomb_hsv.h | |
+ svm/svm_sepcomb_vector.h | |
+ svm/svm_sky.h | |
+ svm/svm_tex_coord.h | |
+ svm/svm_fractal_noise.h | |
+ svm/svm_types.h | |
+ svm/svm_value.h | |
+ svm/svm_vector_transform.h | |
+ svm/svm_voronoi.h | |
+ svm/svm_voxel.h | |
+ svm/svm_wave.h | |
+ svm/svm_white_noise.h | |
+ svm/svm_vertex_color.h | |
+) | |
+ | |
+set(SRC_GEOM_HEADERS | |
+ geom/geom.h | |
+ geom/geom_attribute.h | |
+ geom/geom_curve.h | |
+ geom/geom_curve_intersect.h | |
+ geom/geom_motion_curve.h | |
+ geom/geom_motion_triangle.h | |
+ geom/geom_motion_triangle_intersect.h | |
+ geom/geom_motion_triangle_shader.h | |
+ geom/geom_object.h | |
+ geom/geom_patch.h | |
+ geom/geom_primitive.h | |
+ geom/geom_subd_triangle.h | |
+ geom/geom_triangle.h | |
+ geom/geom_triangle_intersect.h | |
+ geom/geom_volume.h | |
+) | |
+ | |
+set(SRC_FILTER_HEADERS | |
+ filter/filter.h | |
+ filter/filter_defines.h | |
+ filter/filter_features.h | |
+ filter/filter_features_sse.h | |
+ filter/filter_kernel.h | |
+ filter/filter_nlm_cpu.h | |
+ filter/filter_nlm_gpu.h | |
+ filter/filter_prefilter.h | |
+ filter/filter_reconstruction.h | |
+ filter/filter_transform.h | |
+ filter/filter_transform_gpu.h | |
+ filter/filter_transform_sse.h | |
+) | |
+ | |
+set(SRC_UTIL_HEADERS | |
+ ../util/util_atomic.h | |
+ ../util/util_color.h | |
+ ../util/util_defines.h | |
+ ../util/util_half.h | |
+ ../util/util_hash.h | |
+ ../util/util_math.h | |
+ ../util/util_math_fast.h | |
+ ../util/util_math_intersect.h | |
+ ../util/util_math_float2.h | |
+ ../util/util_math_float3.h | |
+ ../util/util_math_float4.h | |
+ ../util/util_math_int2.h | |
+ ../util/util_math_int3.h | |
+ ../util/util_math_int4.h | |
+ ../util/util_math_matrix.h | |
+ ../util/util_projection.h | |
+ ../util/util_rect.h | |
+ ../util/util_static_assert.h | |
+ ../util/util_transform.h | |
+ ../util/util_texture.h | |
+ ../util/util_types.h | |
+ ../util/util_types_float2.h | |
+ ../util/util_types_float2_impl.h | |
+ ../util/util_types_float3.h | |
+ ../util/util_types_float3_impl.h | |
+ ../util/util_types_float4.h | |
+ ../util/util_types_float4_impl.h | |
+ ../util/util_types_float8.h | |
+ ../util/util_types_float8_impl.h | |
+ ../util/util_types_int2.h | |
+ ../util/util_types_int2_impl.h | |
+ ../util/util_types_int3.h | |
+ ../util/util_types_int3_impl.h | |
+ ../util/util_types_int4.h | |
+ ../util/util_types_int4_impl.h | |
+ ../util/util_types_uchar2.h | |
+ ../util/util_types_uchar2_impl.h | |
+ ../util/util_types_uchar3.h | |
+ ../util/util_types_uchar3_impl.h | |
+ ../util/util_types_uchar4.h | |
+ ../util/util_types_uchar4_impl.h | |
+ ../util/util_types_uint2.h | |
+ ../util/util_types_uint2_impl.h | |
+ ../util/util_types_uint3.h | |
+ ../util/util_types_uint3_impl.h | |
+ ../util/util_types_uint4.h | |
+ ../util/util_types_uint4_impl.h | |
+ ../util/util_types_ushort4.h | |
+ ../util/util_types_vector3.h | |
+ ../util/util_types_vector3_impl.h | |
+) | |
+ | |
+set(SRC_SPLIT_HEADERS | |
+ split/kernel_branched.h | |
+ split/kernel_buffer_update.h | |
+ split/kernel_data_init.h | |
+ split/kernel_direct_lighting.h | |
+ split/kernel_do_volume.h | |
+ split/kernel_enqueue_inactive.h | |
+ split/kernel_holdout_emission_blurring_pathtermination_ao.h | |
+ split/kernel_indirect_background.h | |
+ split/kernel_indirect_subsurface.h | |
+ split/kernel_lamp_emission.h | |
+ split/kernel_next_iteration_setup.h | |
+ split/kernel_path_init.h | |
+ split/kernel_queue_enqueue.h | |
+ split/kernel_scene_intersect.h | |
+ split/kernel_shader_setup.h | |
+ split/kernel_shader_sort.h | |
+ split/kernel_shader_eval.h | |
+ split/kernel_shadow_blocked_ao.h | |
+ split/kernel_shadow_blocked_dl.h | |
+ split/kernel_split_common.h | |
+ split/kernel_split_data.h | |
+ split/kernel_split_data_types.h | |
+ split/kernel_subsurface_scatter.h | |
+) | |
+ | |
+set(LIB | |
+ | |
+) | |
+ | |
+# CUDA module | |
+ | |
+if(WITH_CYCLES_CUDA_BINARIES) | |
+ # 64 bit only | |
+ set(CUDA_BITS 64) | |
+ | |
+ # CUDA version | |
+ execute_process(COMMAND ${CUDA_NVCC_EXECUTABLE} "--version" OUTPUT_VARIABLE NVCC_OUT) | |
+ string(REGEX REPLACE ".*release ([0-9]+)\\.([0-9]+).*" "\\1" CUDA_VERSION_MAJOR "${NVCC_OUT}") | |
+ string(REGEX REPLACE ".*release ([0-9]+)\\.([0-9]+).*" "\\2" CUDA_VERSION_MINOR "${NVCC_OUT}") | |
+ set(CUDA_VERSION "${CUDA_VERSION_MAJOR}${CUDA_VERSION_MINOR}") | |
+ | |
+ # warn for other versions | |
+ if(CUDA_VERSION MATCHES "101") | |
+ else() | |
+ message(WARNING | |
+ "CUDA version ${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR} detected, " | |
+ "build may succeed but only CUDA 10.1 is officially supported") | |
+ endif() | |
+ | |
+ # build for each arch | |
+ set(cuda_sources kernels/cuda/kernel.cu kernels/cuda/kernel_split.cu | |
+ ${SRC_HEADERS} | |
+ ${SRC_KERNELS_CUDA_HEADERS} | |
+ ${SRC_BVH_HEADERS} | |
+ ${SRC_SVM_HEADERS} | |
+ ${SRC_GEOM_HEADERS} | |
+ ${SRC_CLOSURE_HEADERS} | |
+ ${SRC_UTIL_HEADERS} | |
+ ) | |
+ set(cuda_filter_sources kernels/cuda/filter.cu | |
+ ${SRC_HEADERS} | |
+ ${SRC_KERNELS_CUDA_HEADERS} | |
+ ${SRC_FILTER_HEADERS} | |
+ ${SRC_UTIL_HEADERS} | |
+ ) | |
+ set(cuda_cubins) | |
+ | |
+ macro(CYCLES_CUDA_KERNEL_ADD arch prev_arch name flags sources experimental) | |
+ if(${arch} MATCHES "compute_.*") | |
+ set(format "ptx") | |
+ else() | |
+ set(format "cubin") | |
+ endif() | |
+ set(cuda_file ${name}_${arch}.${format}) | |
+ | |
+ set(kernel_sources ${sources}) | |
+ if(NOT ${prev_arch} STREQUAL "none") | |
+ if(${prev_arch} MATCHES "compute_.*") | |
+ set(kernel_sources ${kernel_sources} ${name}_${prev_arch}.ptx) | |
+ else() | |
+ set(kernel_sources ${kernel_sources} ${name}_${prev_arch}.cubin) | |
+ endif() | |
+ endif() | |
+ | |
+ set(cuda_kernel_src "/kernels/cuda/${name}.cu") | |
+ | |
+ set(cuda_flags | |
+ -D CCL_NAMESPACE_BEGIN= | |
+ -D CCL_NAMESPACE_END= | |
+ -D NVCC | |
+ -m ${CUDA_BITS} | |
+ -I ${CMAKE_CURRENT_SOURCE_DIR}/.. | |
+ -I ${CMAKE_CURRENT_SOURCE_DIR}/kernels/cuda | |
+ --use_fast_math | |
+ -o ${CMAKE_CURRENT_BINARY_DIR}/${cuda_file}) | |
+ | |
+ if(${experimental}) | |
+ set(cuda_flags ${cuda_flags} -D __KERNEL_EXPERIMENTAL__) | |
+ set(name ${name}_experimental) | |
+ endif() | |
+ | |
+ if(WITH_CYCLES_DEBUG) | |
+ set(cuda_flags ${cuda_flags} -D __KERNEL_DEBUG__) | |
+ endif() | |
+ | |
+ if(WITH_CYCLES_CUBIN_COMPILER) | |
+ string(SUBSTRING ${arch} 3 -1 CUDA_ARCH) | |
+ | |
+ # Needed to find libnvrtc-builtins.so. Can't do it from inside | |
+ # cycles_cubin_cc since the env variable is read before main() | |
+ if(APPLE) | |
+ set(CUBIN_CC_ENV ${CMAKE_COMMAND} | |
+ -E env DYLD_LIBRARY_PATH="${CUDA_TOOLKIT_ROOT_DIR}/lib") | |
+ elseif(UNIX) | |
+ set(CUBIN_CC_ENV ${CMAKE_COMMAND} | |
+ -E env LD_LIBRARY_PATH="${CUDA_TOOLKIT_ROOT_DIR}/lib64") | |
+ endif() | |
+ | |
+ add_custom_command( | |
+ OUTPUT ${cuda_cubin} | |
+ COMMAND ${CUBIN_CC_ENV} | |
+ "$<TARGET_FILE:cycles_cubin_cc>" | |
+ -target ${CUDA_ARCH} | |
+ -i ${CMAKE_CURRENT_SOURCE_DIR}${cuda_kernel_src} | |
+ ${cuda_flags} | |
+ -v | |
+ -cuda-toolkit-dir "${CUDA_TOOLKIT_ROOT_DIR}" | |
+ DEPENDS ${kernel_sources} cycles_cubin_cc) | |
+ set(cuda_file ${cuda_cubin}) | |
+ else() | |
+ add_custom_command( | |
+ OUTPUT ${cuda_file} | |
+ COMMAND ${CUDA_NVCC_EXECUTABLE} | |
+ -arch=${arch} | |
+ ${CUDA_NVCC_FLAGS} | |
+ --${format} | |
+ ${CMAKE_CURRENT_SOURCE_DIR}${cuda_kernel_src} | |
+ --ptxas-options="-v" | |
+ ${cuda_flags} | |
+ DEPENDS ${kernel_sources}) | |
+ endif() | |
+ delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${cuda_file}" ${CYCLES_INSTALL_PATH}/lib) | |
+ list(APPEND cuda_cubins ${cuda_file}) | |
+ | |
+ unset(cuda_debug_flags) | |
+ endmacro() | |
+ | |
+ set(prev_arch "none") | |
+ foreach(arch ${CYCLES_CUDA_BINARIES_ARCH}) | |
+ if(${arch} MATCHES "sm_2.") | |
+ message(STATUS "CUDA binaries for ${arch} are no longer supported, skipped.") | |
+ elseif(${arch} MATCHES "sm_7." AND ${CUDA_VERSION} LESS 100) | |
+ message(STATUS "CUDA binaries for ${arch} require CUDA 10.0+, skipped.") | |
+ else() | |
+ # Compile regular kernel | |
+ CYCLES_CUDA_KERNEL_ADD(${arch} ${prev_arch} filter "" "${cuda_filter_sources}" FALSE) | |
+ CYCLES_CUDA_KERNEL_ADD(${arch} ${prev_arch} kernel "" "${cuda_sources}" FALSE) | |
+ | |
+ if(WITH_CYCLES_CUDA_SPLIT_KERNEL_BINARIES) | |
+ # Compile split kernel | |
+ CYCLES_CUDA_KERNEL_ADD(${arch} ${prev_arch} kernel_split "-D __SPLIT__" "${cuda_sources}" FALSE) | |
+ endif() | |
+ | |
+ if(WITH_CYCLES_CUDA_BUILD_SERIAL) | |
+ set(prev_arch ${arch}) | |
+ endif() | |
+ endif() | |
+ endforeach() | |
+ | |
+ add_custom_target(cycles_kernel_cuda ALL DEPENDS ${cuda_cubins}) | |
+ cycles_set_solution_folder(cycles_kernel_cuda) | |
+endif() | |
+ | |
+# OptiX PTX modules | |
+ | |
+if(WITH_CYCLES_DEVICE_OPTIX) | |
+ foreach(input ${SRC_OPTIX_KERNELS}) | |
+ get_filename_component(input_we ${input} NAME_WE) | |
+ | |
+ set(output "${CMAKE_CURRENT_BINARY_DIR}/${input_we}.ptx") | |
+ set(cuda_flags | |
+ -I "${OPTIX_INCLUDE_DIR}" | |
+ -I "${CMAKE_CURRENT_SOURCE_DIR}/.." | |
+ -I "${CMAKE_CURRENT_SOURCE_DIR}/kernels/cuda" | |
+ -arch=sm_30 | |
+ --use_fast_math | |
+ -o ${output}) | |
+ | |
+ if(WITH_CYCLES_DEBUG) | |
+ set(cuda_flags ${cuda_flags} | |
+ -D __KERNEL_DEBUG__) | |
+ endif() | |
+ | |
+ add_custom_command( | |
+ OUTPUT | |
+ ${output} | |
+ DEPENDS | |
+ ${input} | |
+ ${SRC_HEADERS} | |
+ ${SRC_KERNELS_CUDA_HEADERS} | |
+ ${SRC_KERNELS_OPTIX_HEADERS} | |
+ ${SRC_BVH_HEADERS} | |
+ ${SRC_SVM_HEADERS} | |
+ ${SRC_GEOM_HEADERS} | |
+ ${SRC_CLOSURE_HEADERS} | |
+ ${SRC_UTIL_HEADERS} | |
+ COMMAND | |
+ ${CUDA_NVCC_EXECUTABLE} --ptx ${cuda_flags} ${input} | |
+ WORKING_DIRECTORY | |
+ "${CMAKE_CURRENT_SOURCE_DIR}") | |
+ | |
+ list(APPEND optix_ptx ${output}) | |
+ | |
+ delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${output}" ${CYCLES_INSTALL_PATH}/lib) | |
+ endforeach() | |
+ | |
+ add_custom_target(cycles_kernel_optix ALL DEPENDS ${optix_ptx}) | |
+ cycles_set_solution_folder(cycles_kernel_optix) | |
+endif() | |
+ | |
+# OSL module | |
+ | |
+if(WITH_CYCLES_OSL) | |
+ list(APPEND LIB | |
+ cycles_kernel_osl | |
+ ) | |
+ add_subdirectory(osl) | |
+ add_subdirectory(shaders) | |
+endif() | |
+ | |
+# CPU module | |
+ | |
+include_directories(${INC}) | |
+include_directories(SYSTEM ${INC_SYS}) | |
+ | |
+if(WITH_COMPILER_ASAN) | |
+ if(CMAKE_COMPILER_IS_GNUCC AND (NOT WITH_CYCLES_KERNEL_ASAN)) | |
+ # GCC hangs compiling the big kernel files with asan and release, so disable by default. | |
+ set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -fno-sanitize=all") | |
+ set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fno-sanitize=vptr") | |
+ elseif(CMAKE_C_COMPILER_ID MATCHES "Clang") | |
+ # With OSL, Cycles disables rtti in some modules, wich then breaks at linking | |
+ # when trying to use vptr sanitizer (included into 'undefined' general option). | |
+ set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -fno-sanitize=vptr") | |
+ set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fno-sanitize=vptr") | |
+ endif() | |
+endif() | |
+ | |
+set_source_files_properties(kernels/cpu/kernel.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_KERNEL_FLAGS}") | |
+set_source_files_properties(kernels/cpu/kernel_split.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_KERNEL_FLAGS}") | |
+set_source_files_properties(kernels/cpu/filter.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_KERNEL_FLAGS}") | |
+ | |
+if(CXX_HAS_SSE) | |
+ set_source_files_properties(kernels/cpu/kernel_sse2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS}") | |
+ set_source_files_properties(kernels/cpu/kernel_sse3.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS}") | |
+ set_source_files_properties(kernels/cpu/kernel_sse41.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS}") | |
+ set_source_files_properties(kernels/cpu/kernel_split_sse2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS}") | |
+ set_source_files_properties(kernels/cpu/kernel_split_sse3.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS}") | |
+ set_source_files_properties(kernels/cpu/kernel_split_sse41.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS}") | |
+ set_source_files_properties(kernels/cpu/filter_sse2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS}") | |
+ set_source_files_properties(kernels/cpu/filter_sse3.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS}") | |
+ set_source_files_properties(kernels/cpu/filter_sse41.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS}") | |
+endif() | |
+ | |
+if(CXX_HAS_AVX) | |
+ set_source_files_properties(kernels/cpu/kernel_avx.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX_KERNEL_FLAGS}") | |
+ set_source_files_properties(kernels/cpu/kernel_split_avx.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX_KERNEL_FLAGS}") | |
+ set_source_files_properties(kernels/cpu/filter_avx.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX_KERNEL_FLAGS}") | |
+endif() | |
+ | |
+if(CXX_HAS_AVX2) | |
+ set_source_files_properties(kernels/cpu/kernel_avx2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_KERNEL_FLAGS}") | |
+ set_source_files_properties(kernels/cpu/kernel_split_avx2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_KERNEL_FLAGS}") | |
+ set_source_files_properties(kernels/cpu/filter_avx2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_KERNEL_FLAGS}") | |
+endif() | |
+ | |
+cycles_add_library(cycles_kernel "${LIB}" | |
+ ${SRC_CPU_KERNELS} | |
+ ${SRC_CUDA_KERNELS} | |
+ ${SRC_OPTIX_KERNELS} | |
+ ${SRC_OPENCL_KERNELS} | |
+ ${SRC_HEADERS} | |
+ ${SRC_KERNELS_CPU_HEADERS} | |
+ ${SRC_KERNELS_CUDA_HEADERS} | |
+ ${SRC_KERNELS_OPTIX_HEADERS} | |
+ ${SRC_KERNELS_OPENCL_HEADERS} | |
+ ${SRC_BVH_HEADERS} | |
+ ${SRC_CLOSURE_HEADERS} | |
+ ${SRC_FILTER_HEADERS} | |
+ ${SRC_SVM_HEADERS} | |
+ ${SRC_GEOM_HEADERS} | |
+ ${SRC_SPLIT_HEADERS} | |
+) | |
+ | |
+source_group("bvh" FILES ${SRC_BVH_HEADERS}) | |
+source_group("closure" FILES ${SRC_CLOSURE_HEADERS}) | |
+source_group("filter" FILES ${SRC_FILTER_HEADERS}) | |
+source_group("geom" FILES ${SRC_GEOM_HEADERS}) | |
+source_group("kernel" FILES ${SRC_HEADERS}) | |
+source_group("kernel\\split" FILES ${SRC_SPLIT_HEADERS}) | |
+source_group("kernels\\cpu" FILES ${SRC_CPU_KERNELS} ${SRC_KERNELS_CPU_HEADERS}) | |
+source_group("kernels\\cuda" FILES ${SRC_CUDA_KERNELS} ${SRC_KERNELS_CUDA_HEADERS}) | |
+source_group("kernels\\opencl" FILES ${SRC_OPENCL_KERNELS} ${SRC_KERNELS_OPENCL_HEADERS}) | |
+source_group("kernels\\optix" FILES ${SRC_OPTIX_KERNELS} ${SRC_KERNELS_OPTIX_HEADERS}) | |
+source_group("svm" FILES ${SRC_SVM_HEADERS}) | |
+ | |
+if(WITH_CYCLES_CUDA) | |
+ add_dependencies(cycles_kernel cycles_kernel_cuda) | |
+endif() | |
+if(WITH_CYCLES_DEVICE_OPTIX) | |
+ add_dependencies(cycles_kernel cycles_kernel_optix) | |
+endif() | |
+ | |
+# OpenCL kernel | |
+ | |
+# set(KERNEL_PREPROCESSED ${CMAKE_CURRENT_BINARY_DIR}/kernel_preprocessed.cl) | |
+# add_custom_command( | |
+# OUTPUT ${KERNEL_PREPROCESSED} | |
+# COMMAND gcc -x c++ -E ${CMAKE_CURRENT_SOURCE_DIR}/kernel.cl -I ${CMAKE_CURRENT_SOURCE_DIR}/../util/ -DCCL_NAMESPACE_BEGIN= -DCCL_NAMESPACE_END= -o ${KERNEL_PREPROCESSED} | |
+# DEPENDS ${SRC_KERNEL} ${SRC_UTIL_HEADERS}) | |
+# add_custom_target(cycles_kernel_preprocess ALL DEPENDS ${KERNEL_PREPROCESSED}) | |
+# delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${KERNEL_PREPROCESSED}" ${CYCLES_INSTALL_PATH}/kernel) | |
+ | |
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_OPENCL_KERNELS}" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl) | |
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_CUDA_KERNELS}" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/cuda) | |
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_OPTIX_KERNELS}" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/optix) | |
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel) | |
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNELS_OPENCL_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl) | |
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNELS_CUDA_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/cuda) | |
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNELS_OPTIX_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/optix) | |
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_BVH_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/bvh) | |
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_CLOSURE_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/closure) | |
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_FILTER_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/filter) | |
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_SVM_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/svm) | |
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_GEOM_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/geom) | |
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_UTIL_HEADERS}" ${CYCLES_INSTALL_PATH}/source/util) | |
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_SPLIT_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/split) | |
diff -Naur a/intern/cycles/kernel/kernel_adaptive_sampling.h b/intern/cycles/kernel/kernel_adaptive_sampling.h | |
--- a/intern/cycles/kernel/kernel_adaptive_sampling.h 1970-01-01 03:00:00.000000000 +0300 | |
+++ b/intern/cycles/kernel/kernel_adaptive_sampling.h 2020-01-10 20:42:43.464256721 +0300 | |
@@ -0,0 +1,239 @@ | |
+/* | |
+ * Copyright 2019 Blender Foundation | |
+ * | |
+ * Licensed under the Apache License, Version 2.0 (the "License"); | |
+ * you may not use this file except in compliance with the License. | |
+ * You may obtain a copy of the License at | |
+ * | |
+ * http://www.apache.org/licenses/LICENSE-2.0 | |
+ * | |
+ * Unless required by applicable law or agreed to in writing, software | |
+ * distributed under the License is distributed on an "AS IS" BASIS, | |
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
+ * See the License for the specific language governing permissions and | |
+ * limitations under the License. | |
+ */ | |
+ | |
+#ifndef __KERNEL_ADAPTIVE_SAMPLING_H__ | |
+#define __KERNEL_ADAPTIVE_SAMPLING_H__ | |
+ | |
+CCL_NAMESPACE_BEGIN | |
+ | |
+/* Determines whether to continue sampling a given pixel or if it has sufficiently converged. */ | |
+ | |
+ccl_device void kernel_do_adaptive_stopping(KernelGlobals *kg, | |
+ ccl_global float *buffer, | |
+ int sample) | |
+{ | |
+ /* TODO Stefan: Is this better in linear, sRGB or something else? */ | |
+ float4 I = *((ccl_global float4 *)buffer); | |
+ float4 A = *(ccl_global float4 *)(buffer + kernel_data.film.pass_adaptive_aux_buffer); | |
+ /* The per pixel error as seen in section 2.1 of | |
+ * "A hierarchical automatic stopping condition for Monte Carlo global illumination" | |
+ * A small epsilon is added to the divisor to prevent division by zero. */ | |
+ float error = (fabsf(I.x - A.x) + fabsf(I.y - A.y) + fabsf(I.z - A.z)) / | |
+ (sample * 0.0001f + sqrtf(I.x + I.y + I.z)); | |
+ if (error < kernel_data.integrator.adaptive_threshold * (float)sample) { | |
+ /* Set the fourth component to non-zero value to indicate that this pixel has converged. */ | |
+ buffer[kernel_data.film.pass_adaptive_aux_buffer + 3] += 1.0f; | |
+ } | |
+} | |
+ | |
+/* Adjust the values of an adaptively sampled pixel. */ | |
+ | |
+ccl_device void kernel_adaptive_post_adjust(KernelGlobals *kg, | |
+ ccl_global float *buffer, | |
+ float sample_multiplier) | |
+{ | |
+ *(ccl_global float4 *)(buffer) *= sample_multiplier; | |
+ | |
+ /* Scale the aux pass too, this is necessary for progressive rendering to work properly. */ | |
+ kernel_assert(kernel_data.film.pass_adaptive_aux_buffer); | |
+ *(ccl_global float4 *)(buffer + kernel_data.film.pass_adaptive_aux_buffer) *= sample_multiplier; | |
+ | |
+#ifdef __PASSES__ | |
+ int flag = kernel_data.film.pass_flag; | |
+ | |
+ if (flag & PASSMASK(SHADOW)) | |
+ *(ccl_global float3 *)(buffer + kernel_data.film.pass_shadow) *= sample_multiplier; | |
+ | |
+ if (flag & PASSMASK(MIST)) | |
+ *(ccl_global float *)(buffer + kernel_data.film.pass_mist) *= sample_multiplier; | |
+ | |
+ if (flag & PASSMASK(NORMAL)) | |
+ *(ccl_global float3 *)(buffer + kernel_data.film.pass_normal) *= sample_multiplier; | |
+ | |
+ if (flag & PASSMASK(UV)) | |
+ *(ccl_global float3 *)(buffer + kernel_data.film.pass_uv) *= sample_multiplier; | |
+ | |
+ if (flag & PASSMASK(MOTION)) { | |
+ *(ccl_global float4 *)(buffer + kernel_data.film.pass_motion) *= sample_multiplier; | |
+ *(ccl_global float *)(buffer + kernel_data.film.pass_motion_weight) *= sample_multiplier; | |
+ } | |
+ | |
+ if (kernel_data.film.use_light_pass) { | |
+ int light_flag = kernel_data.film.light_pass_flag; | |
+ | |
+ if (light_flag & PASSMASK(DIFFUSE_INDIRECT)) | |
+ *(ccl_global float3 *)(buffer + kernel_data.film.pass_diffuse_indirect) *= sample_multiplier; | |
+ if (light_flag & PASSMASK(GLOSSY_INDIRECT)) | |
+ *(ccl_global float3 *)(buffer + kernel_data.film.pass_glossy_indirect) *= sample_multiplier; | |
+ if (light_flag & PASSMASK(TRANSMISSION_INDIRECT)) | |
+ *(ccl_global float3 *)(buffer + | |
+ kernel_data.film.pass_transmission_indirect) *= sample_multiplier; | |
+ if (light_flag & PASSMASK(SUBSURFACE_INDIRECT)) | |
+ *(ccl_global float3 *)(buffer + | |
+ kernel_data.film.pass_subsurface_indirect) *= sample_multiplier; | |
+ if (light_flag & PASSMASK(VOLUME_INDIRECT)) | |
+ *(ccl_global float3 *)(buffer + kernel_data.film.pass_volume_indirect) *= sample_multiplier; | |
+ if (light_flag & PASSMASK(DIFFUSE_DIRECT)) | |
+ *(ccl_global float3 *)(buffer + kernel_data.film.pass_diffuse_direct) *= sample_multiplier; | |
+ if (light_flag & PASSMASK(GLOSSY_DIRECT)) | |
+ *(ccl_global float3 *)(buffer + kernel_data.film.pass_glossy_direct) *= sample_multiplier; | |
+ if (light_flag & PASSMASK(TRANSMISSION_DIRECT)) | |
+ *(ccl_global float3 *)(buffer + | |
+ kernel_data.film.pass_transmission_direct) *= sample_multiplier; | |
+ if (light_flag & PASSMASK(SUBSURFACE_DIRECT)) | |
+ *(ccl_global float3 *)(buffer + | |
+ kernel_data.film.pass_subsurface_direct) *= sample_multiplier; | |
+ if (light_flag & PASSMASK(VOLUME_DIRECT)) | |
+ *(ccl_global float3 *)(buffer + kernel_data.film.pass_volume_direct) *= sample_multiplier; | |
+ | |
+ if (light_flag & PASSMASK(EMISSION)) | |
+ *(ccl_global float3 *)(buffer + kernel_data.film.pass_emission) *= sample_multiplier; | |
+ if (light_flag & PASSMASK(BACKGROUND)) | |
+ *(ccl_global float3 *)(buffer + kernel_data.film.pass_background) *= sample_multiplier; | |
+ if (light_flag & PASSMASK(AO)) | |
+ *(ccl_global float3 *)(buffer + kernel_data.film.pass_ao) *= sample_multiplier; | |
+ | |
+ if (light_flag & PASSMASK(DIFFUSE_COLOR)) | |
+ *(ccl_global float3 *)(buffer + kernel_data.film.pass_diffuse_color) *= sample_multiplier; | |
+ if (light_flag & PASSMASK(GLOSSY_COLOR)) | |
+ *(ccl_global float3 *)(buffer + kernel_data.film.pass_glossy_color) *= sample_multiplier; | |
+ if (light_flag & PASSMASK(TRANSMISSION_COLOR)) | |
+ *(ccl_global float3 *)(buffer + | |
+ kernel_data.film.pass_transmission_color) *= sample_multiplier; | |
+ if (light_flag & PASSMASK(SUBSURFACE_COLOR)) | |
+ *(ccl_global float3 *)(buffer + kernel_data.film.pass_subsurface_color) *= sample_multiplier; | |
+ } | |
+#endif | |
+ | |
+#ifdef __DENOISING_FEATURES__ | |
+ | |
+# define scale_float3_variance(buffer, offset, scale) \ | |
+ *(buffer + offset) *= scale; \ | |
+ *(buffer + offset + 1) *= scale; \ | |
+ *(buffer + offset + 2) *= scale; \ | |
+ *(buffer + offset + 3) *= scale * scale; \ | |
+ *(buffer + offset + 4) *= scale * scale; \ | |
+ *(buffer + offset + 5) *= scale * scale; | |
+ | |
+# define scale_shadow_variance(buffer, offset, scale) \ | |
+ *(buffer + offset) *= scale; \ | |
+ *(buffer + offset + 1) *= scale; \ | |
+ *(buffer + offset + 2) *= scale * scale; | |
+ | |
+ if (kernel_data.film.pass_denoising_data) { | |
+ scale_shadow_variance( | |
+ buffer, kernel_data.film.pass_denoising_data + DENOISING_PASS_SHADOW_A, sample_multiplier); | |
+ scale_shadow_variance( | |
+ buffer, kernel_data.film.pass_denoising_data + DENOISING_PASS_SHADOW_B, sample_multiplier); | |
+ if (kernel_data.film.pass_denoising_clean) { | |
+ scale_float3_variance( | |
+ buffer, kernel_data.film.pass_denoising_data + DENOISING_PASS_COLOR, sample_multiplier); | |
+ *(buffer + kernel_data.film.pass_denoising_clean) *= sample_multiplier; | |
+ *(buffer + kernel_data.film.pass_denoising_clean + 1) *= sample_multiplier; | |
+ *(buffer + kernel_data.film.pass_denoising_clean + 2) *= sample_multiplier; | |
+ } | |
+ else { | |
+ scale_float3_variance( | |
+ buffer, kernel_data.film.pass_denoising_data + DENOISING_PASS_COLOR, sample_multiplier); | |
+ } | |
+ scale_float3_variance( | |
+ buffer, kernel_data.film.pass_denoising_data + DENOISING_PASS_NORMAL, sample_multiplier); | |
+ scale_float3_variance( | |
+ buffer, kernel_data.film.pass_denoising_data + DENOISING_PASS_ALBEDO, sample_multiplier); | |
+ *(buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_DEPTH) *= sample_multiplier; | |
+ *(buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_DEPTH + | |
+ 1) *= sample_multiplier * sample_multiplier; | |
+ } | |
+#endif /* __DENOISING_FEATURES__ */ | |
+ | |
+ if (kernel_data.film.cryptomatte_passes) { | |
+ int num_slots = 0; | |
+ num_slots += (kernel_data.film.cryptomatte_passes & CRYPT_OBJECT) ? 1 : 0; | |
+ num_slots += (kernel_data.film.cryptomatte_passes & CRYPT_MATERIAL) ? 1 : 0; | |
+ num_slots += (kernel_data.film.cryptomatte_passes & CRYPT_ASSET) ? 1 : 0; | |
+ num_slots = num_slots * 2 * kernel_data.film.cryptomatte_depth; | |
+ ccl_global float2 *id_buffer = (ccl_global float2 *)(buffer + | |
+ kernel_data.film.pass_cryptomatte); | |
+ for (int slot = 0; slot < num_slots; slot++) { | |
+ id_buffer[slot].y *= sample_multiplier; | |
+ } | |
+ } | |
+} | |
+ | |
+/* This is a simple box filter in two passes. | |
+ * When a pixel demands more adaptive samples, let its neighboring pixels draw more samples too. */ | |
+ | |
+ccl_device bool kernel_do_adaptive_filter_x(KernelGlobals *kg, int y, ccl_global WorkTile *tile) | |
+{ | |
+ bool any = false; | |
+ bool prev = false; | |
+ for (int x = tile->x; x < tile->x + tile->w; ++x) { | |
+ int index = tile->offset + x + y * tile->stride; | |
+ ccl_global float *buffer = tile->buffer + index * kernel_data.film.pass_stride; | |
+ ccl_global float4 *aux = (ccl_global float4 *)(buffer + | |
+ kernel_data.film.pass_adaptive_aux_buffer); | |
+ if (aux->w == 0.0f) { | |
+ any = true; | |
+ if (x > tile->x && !prev) { | |
+ index = index - 1; | |
+ buffer = tile->buffer + index * kernel_data.film.pass_stride; | |
+ aux = (ccl_global float4 *)(buffer + kernel_data.film.pass_adaptive_aux_buffer); | |
+ aux->w = 0.0f; | |
+ } | |
+ prev = true; | |
+ } | |
+ else { | |
+ if (prev) { | |
+ aux->w = 0.0f; | |
+ } | |
+ prev = false; | |
+ } | |
+ } | |
+ return any; | |
+} | |
+ | |
+ccl_device bool kernel_do_adaptive_filter_y(KernelGlobals *kg, int x, ccl_global WorkTile *tile) | |
+{ | |
+ bool prev = false; | |
+ bool any = false; | |
+ for (int y = tile->y; y < tile->y + tile->h; ++y) { | |
+ int index = tile->offset + x + y * tile->stride; | |
+ ccl_global float *buffer = tile->buffer + index * kernel_data.film.pass_stride; | |
+ ccl_global float4 *aux = (ccl_global float4 *)(buffer + | |
+ kernel_data.film.pass_adaptive_aux_buffer); | |
+ if (aux->w == 0.0f) { | |
+ any = true; | |
+ if (y > tile->y && !prev) { | |
+ index = index - tile->stride; | |
+ buffer = tile->buffer + index * kernel_data.film.pass_stride; | |
+ aux = (ccl_global float4 *)(buffer + kernel_data.film.pass_adaptive_aux_buffer); | |
+ aux->w = 0.0f; | |
+ } | |
+ prev = true; | |
+ } | |
+ else { | |
+ if (prev) { | |
+ aux->w = 0.0f; | |
+ } | |
+ prev = false; | |
+ } | |
+ } | |
+ return any; | |
+} | |
+ | |
+CCL_NAMESPACE_END | |
+ | |
+#endif /* __KERNEL_ADAPTIVE_SAMPLING_H__ */ | |
diff -Naur a/intern/cycles/kernel/kernel_passes.h b/intern/cycles/kernel/kernel_passes.h | |
--- a/intern/cycles/kernel/kernel_passes.h 2020-01-10 20:37:06.000000000 +0300 | |
+++ b/intern/cycles/kernel/kernel_passes.h 2020-01-10 20:42:43.464256721 +0300 | |
@@ -29,7 +29,9 @@ | |
if (kernel_data.film.pass_denoising_data == 0) | |
return; | |
- buffer += (sample & 1) ? DENOISING_PASS_SHADOW_B : DENOISING_PASS_SHADOW_A; | |
+ buffer += sample_is_even(kernel_data.integrator.sampling_pattern, sample) ? | |
+ DENOISING_PASS_SHADOW_B : | |
+ DENOISING_PASS_SHADOW_A; | |
path_total = ensure_finite(path_total); | |
path_total_shaded = ensure_finite(path_total_shaded); | |
@@ -383,6 +385,38 @@ | |
#ifdef __KERNEL_DEBUG__ | |
kernel_write_debug_passes(kg, buffer, L); | |
#endif | |
+ | |
+ /* Adaptive Sampling. Fill the additional buffer with the odd samples and calculate our stopping criteria. | |
+ This is the heuristic from "A hierarchical automatic stopping condition for Monte Carlo global illumination" | |
+ except that here it is applied per pixel and not in hierarchical tiles. */ | |
+ if (kernel_data.film.pass_adaptive_aux_buffer && | |
+ kernel_data.integrator.adaptive_threshold > 0.0f) { | |
+ if (sample_is_even(kernel_data.integrator.sampling_pattern, sample)) { | |
+ kernel_write_pass_float4(buffer + kernel_data.film.pass_adaptive_aux_buffer, | |
+ make_float4(L_sum.x * 2.0f, L_sum.y * 2.0f, L_sum.z * 2.0f, 0.0f)); | |
+ } | |
+#ifdef __KERNEL_CPU__ | |
+ if (sample >= kernel_data.integrator.adaptive_min_samples - 1 && (sample & 0x3) == 3) { | |
+ kernel_do_adaptive_stopping(kg, buffer, sample); | |
+ } | |
+#endif | |
+ } | |
+ | |
+ /* Write the sample count as negative numbers initially to mark the samples as in progress. | |
+ * Once the tile has finished rendering, the sign gets flipped and all the pixel values | |
+ * are scaled as if they were taken at a uniform sample count. */ | |
+ if (kernel_data.film.pass_sample_count) { | |
+ /* Make sure it's a negative number. In progressive refine mode, this bit gets flipped between passes. */ | |
+#ifdef __ATOMIC_PASS_WRITE__ | |
+ atomic_fetch_and_or_uint32((ccl_global uint *)(buffer + kernel_data.film.pass_sample_count), | |
+ 0x80000000); | |
+#else | |
+ if (buffer[kernel_data.film.pass_sample_count] > 0) { | |
+ buffer[kernel_data.film.pass_sample_count] *= -1.0f; | |
+ } | |
+#endif | |
+ kernel_write_pass_float(buffer + kernel_data.film.pass_sample_count, -1.0f); | |
+ } | |
} | |
CCL_NAMESPACE_END | |
diff -Naur a/intern/cycles/kernel/kernel_passes.h.orig b/intern/cycles/kernel/kernel_passes.h.orig | |
--- a/intern/cycles/kernel/kernel_passes.h.orig 1970-01-01 03:00:00.000000000 +0300 | |
+++ b/intern/cycles/kernel/kernel_passes.h.orig 2020-01-10 20:37:06.000000000 +0300 | |
@@ -0,0 +1,388 @@ | |
+/* | |
+ * Copyright 2011-2013 Blender Foundation | |
+ * | |
+ * Licensed under the Apache License, Version 2.0 (the "License"); | |
+ * you may not use this file except in compliance with the License. | |
+ * You may obtain a copy of the License at | |
+ * | |
+ * http://www.apache.org/licenses/LICENSE-2.0 | |
+ * | |
+ * Unless required by applicable law or agreed to in writing, software | |
+ * distributed under the License is distributed on an "AS IS" BASIS, | |
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
+ * See the License for the specific language governing permissions and | |
+ * limitations under the License. | |
+ */ | |
+ | |
+#include "kernel/kernel_id_passes.h" | |
+ | |
+CCL_NAMESPACE_BEGIN | |
+ | |
+#ifdef __DENOISING_FEATURES__ | |
+ | |
+ccl_device_inline void kernel_write_denoising_shadow(KernelGlobals *kg, | |
+ ccl_global float *buffer, | |
+ int sample, | |
+ float path_total, | |
+ float path_total_shaded) | |
+{ | |
+ if (kernel_data.film.pass_denoising_data == 0) | |
+ return; | |
+ | |
+ buffer += (sample & 1) ? DENOISING_PASS_SHADOW_B : DENOISING_PASS_SHADOW_A; | |
+ | |
+ path_total = ensure_finite(path_total); | |
+ path_total_shaded = ensure_finite(path_total_shaded); | |
+ | |
+ kernel_write_pass_float(buffer, path_total); | |
+ kernel_write_pass_float(buffer + 1, path_total_shaded); | |
+ | |
+ float value = path_total_shaded / max(path_total, 1e-7f); | |
+ kernel_write_pass_float(buffer + 2, value * value); | |
+} | |
+ | |
+ccl_device_inline void kernel_update_denoising_features(KernelGlobals *kg, | |
+ ShaderData *sd, | |
+ ccl_addr_space PathState *state, | |
+ PathRadiance *L) | |
+{ | |
+ if (state->denoising_feature_weight == 0.0f) { | |
+ return; | |
+ } | |
+ | |
+ L->denoising_depth += ensure_finite(state->denoising_feature_weight * sd->ray_length); | |
+ | |
+ /* Skip implicitly transparent surfaces. */ | |
+ if (sd->flag & SD_HAS_ONLY_VOLUME) { | |
+ return; | |
+ } | |
+ | |
+ float3 normal = make_float3(0.0f, 0.0f, 0.0f); | |
+ float3 albedo = make_float3(0.0f, 0.0f, 0.0f); | |
+ float sum_weight = 0.0f, sum_nonspecular_weight = 0.0f; | |
+ | |
+ for (int i = 0; i < sd->num_closure; i++) { | |
+ ShaderClosure *sc = &sd->closure[i]; | |
+ | |
+ if (!CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) | |
+ continue; | |
+ | |
+ /* All closures contribute to the normal feature, but only diffuse-like ones to the albedo. */ | |
+ normal += sc->N * sc->sample_weight; | |
+ sum_weight += sc->sample_weight; | |
+ if (bsdf_get_specular_roughness_squared(sc) > sqr(0.075f)) { | |
+ float3 closure_albedo = sc->weight; | |
+ /* Closures that include a Fresnel term typically have weights close to 1 even though their | |
+ * actual contribution is significantly lower. | |
+ * To account for this, we scale their weight by the average fresnel factor (the same is also | |
+ * done for the sample weight in the BSDF setup, so we don't need to scale that here). */ | |
+ if (CLOSURE_IS_BSDF_MICROFACET_FRESNEL(sc->type)) { | |
+ MicrofacetBsdf *bsdf = (MicrofacetBsdf *)sc; | |
+ closure_albedo *= bsdf->extra->fresnel_color; | |
+ } | |
+ | |
+ albedo += closure_albedo; | |
+ sum_nonspecular_weight += sc->sample_weight; | |
+ } | |
+ } | |
+ | |
+ /* Wait for next bounce if 75% or more sample weight belongs to specular-like closures. */ | |
+ if ((sum_weight == 0.0f) || (sum_nonspecular_weight * 4.0f > sum_weight)) { | |
+ if (sum_weight != 0.0f) { | |
+ normal /= sum_weight; | |
+ } | |
+ | |
+ /* Transform normal into camera space. */ | |
+ const Transform worldtocamera = kernel_data.cam.worldtocamera; | |
+ normal = transform_direction(&worldtocamera, normal); | |
+ | |
+ L->denoising_normal += ensure_finite3(state->denoising_feature_weight * normal); | |
+ L->denoising_albedo += ensure_finite3(state->denoising_feature_weight * albedo); | |
+ | |
+ state->denoising_feature_weight = 0.0f; | |
+ } | |
+} | |
+#endif /* __DENOISING_FEATURES__ */ | |
+ | |
+#ifdef __KERNEL_DEBUG__ | |
+ccl_device_inline void kernel_write_debug_passes(KernelGlobals *kg, | |
+ ccl_global float *buffer, | |
+ PathRadiance *L) | |
+{ | |
+ int flag = kernel_data.film.pass_flag; | |
+ if (flag & PASSMASK(BVH_TRAVERSED_NODES)) { | |
+ kernel_write_pass_float(buffer + kernel_data.film.pass_bvh_traversed_nodes, | |
+ L->debug_data.num_bvh_traversed_nodes); | |
+ } | |
+ if (flag & PASSMASK(BVH_TRAVERSED_INSTANCES)) { | |
+ kernel_write_pass_float(buffer + kernel_data.film.pass_bvh_traversed_instances, | |
+ L->debug_data.num_bvh_traversed_instances); | |
+ } | |
+ if (flag & PASSMASK(BVH_INTERSECTIONS)) { | |
+ kernel_write_pass_float(buffer + kernel_data.film.pass_bvh_intersections, | |
+ L->debug_data.num_bvh_intersections); | |
+ } | |
+ if (flag & PASSMASK(RAY_BOUNCES)) { | |
+ kernel_write_pass_float(buffer + kernel_data.film.pass_ray_bounces, | |
+ L->debug_data.num_ray_bounces); | |
+ } | |
+} | |
+#endif /* __KERNEL_DEBUG__ */ | |
+ | |
+#ifdef __KERNEL_CPU__ | |
+# define WRITE_ID_SLOT(buffer, depth, id, matte_weight, name) \ | |
+ kernel_write_id_pass_cpu(buffer, depth * 2, id, matte_weight, kg->coverage_##name) | |
+ccl_device_inline size_t kernel_write_id_pass_cpu( | |
+ float *buffer, size_t depth, float id, float matte_weight, CoverageMap *map) | |
+{ | |
+ if (map) { | |
+ (*map)[id] += matte_weight; | |
+ return 0; | |
+ } | |
+#else /* __KERNEL_CPU__ */ | |
+# define WRITE_ID_SLOT(buffer, depth, id, matte_weight, name) \ | |
+ kernel_write_id_slots_gpu(buffer, depth * 2, id, matte_weight) | |
+ccl_device_inline size_t kernel_write_id_slots_gpu(ccl_global float *buffer, | |
+ size_t depth, | |
+ float id, | |
+ float matte_weight) | |
+{ | |
+#endif /* __KERNEL_CPU__ */ | |
+ kernel_write_id_slots(buffer, depth, id, matte_weight); | |
+ return depth * 2; | |
+} | |
+ | |
+ccl_device_inline void kernel_write_data_passes(KernelGlobals *kg, | |
+ ccl_global float *buffer, | |
+ PathRadiance *L, | |
+ ShaderData *sd, | |
+ ccl_addr_space PathState *state, | |
+ float3 throughput) | |
+{ | |
+#ifdef __PASSES__ | |
+ int path_flag = state->flag; | |
+ | |
+ if (!(path_flag & PATH_RAY_CAMERA)) | |
+ return; | |
+ | |
+ int flag = kernel_data.film.pass_flag; | |
+ int light_flag = kernel_data.film.light_pass_flag; | |
+ | |
+ if (!((flag | light_flag) & PASS_ANY)) | |
+ return; | |
+ | |
+ if (!(path_flag & PATH_RAY_SINGLE_PASS_DONE)) { | |
+ if (!(sd->flag & SD_TRANSPARENT) || kernel_data.film.pass_alpha_threshold == 0.0f || | |
+ average(shader_bsdf_alpha(kg, sd)) >= kernel_data.film.pass_alpha_threshold) { | |
+ if (state->sample == 0) { | |
+ if (flag & PASSMASK(DEPTH)) { | |
+ float depth = camera_distance(kg, sd->P); | |
+ kernel_write_pass_float(buffer + kernel_data.film.pass_depth, depth); | |
+ } | |
+ if (flag & PASSMASK(OBJECT_ID)) { | |
+ float id = object_pass_id(kg, sd->object); | |
+ kernel_write_pass_float(buffer + kernel_data.film.pass_object_id, id); | |
+ } | |
+ if (flag & PASSMASK(MATERIAL_ID)) { | |
+ float id = shader_pass_id(kg, sd); | |
+ kernel_write_pass_float(buffer + kernel_data.film.pass_material_id, id); | |
+ } | |
+ } | |
+ | |
+ if (flag & PASSMASK(NORMAL)) { | |
+ float3 normal = shader_bsdf_average_normal(kg, sd); | |
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_normal, normal); | |
+ } | |
+ if (flag & PASSMASK(UV)) { | |
+ float3 uv = primitive_uv(kg, sd); | |
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_uv, uv); | |
+ } | |
+ if (flag & PASSMASK(MOTION)) { | |
+ float4 speed = primitive_motion_vector(kg, sd); | |
+ kernel_write_pass_float4(buffer + kernel_data.film.pass_motion, speed); | |
+ kernel_write_pass_float(buffer + kernel_data.film.pass_motion_weight, 1.0f); | |
+ } | |
+ | |
+ state->flag |= PATH_RAY_SINGLE_PASS_DONE; | |
+ } | |
+ } | |
+ | |
+ if (kernel_data.film.cryptomatte_passes) { | |
+ const float matte_weight = average(throughput) * | |
+ (1.0f - average(shader_bsdf_transparency(kg, sd))); | |
+ if (matte_weight > 0.0f) { | |
+ ccl_global float *cryptomatte_buffer = buffer + kernel_data.film.pass_cryptomatte; | |
+ if (kernel_data.film.cryptomatte_passes & CRYPT_OBJECT) { | |
+ float id = object_cryptomatte_id(kg, sd->object); | |
+ cryptomatte_buffer += WRITE_ID_SLOT( | |
+ cryptomatte_buffer, kernel_data.film.cryptomatte_depth, id, matte_weight, object); | |
+ } | |
+ if (kernel_data.film.cryptomatte_passes & CRYPT_MATERIAL) { | |
+ float id = shader_cryptomatte_id(kg, sd->shader); | |
+ cryptomatte_buffer += WRITE_ID_SLOT( | |
+ cryptomatte_buffer, kernel_data.film.cryptomatte_depth, id, matte_weight, material); | |
+ } | |
+ if (kernel_data.film.cryptomatte_passes & CRYPT_ASSET) { | |
+ float id = object_cryptomatte_asset_id(kg, sd->object); | |
+ cryptomatte_buffer += WRITE_ID_SLOT( | |
+ cryptomatte_buffer, kernel_data.film.cryptomatte_depth, id, matte_weight, asset); | |
+ } | |
+ } | |
+ } | |
+ | |
+ if (light_flag & PASSMASK_COMPONENT(DIFFUSE)) | |
+ L->color_diffuse += shader_bsdf_diffuse(kg, sd) * throughput; | |
+ if (light_flag & PASSMASK_COMPONENT(GLOSSY)) | |
+ L->color_glossy += shader_bsdf_glossy(kg, sd) * throughput; | |
+ if (light_flag & PASSMASK_COMPONENT(TRANSMISSION)) | |
+ L->color_transmission += shader_bsdf_transmission(kg, sd) * throughput; | |
+ if (light_flag & PASSMASK_COMPONENT(SUBSURFACE)) | |
+ L->color_subsurface += shader_bsdf_subsurface(kg, sd) * throughput; | |
+ | |
+ if (light_flag & PASSMASK(MIST)) { | |
+ /* bring depth into 0..1 range */ | |
+ float mist_start = kernel_data.film.mist_start; | |
+ float mist_inv_depth = kernel_data.film.mist_inv_depth; | |
+ | |
+ float depth = camera_distance(kg, sd->P); | |
+ float mist = saturate((depth - mist_start) * mist_inv_depth); | |
+ | |
+ /* falloff */ | |
+ float mist_falloff = kernel_data.film.mist_falloff; | |
+ | |
+ if (mist_falloff == 1.0f) | |
+ ; | |
+ else if (mist_falloff == 2.0f) | |
+ mist = mist * mist; | |
+ else if (mist_falloff == 0.5f) | |
+ mist = sqrtf(mist); | |
+ else | |
+ mist = powf(mist, mist_falloff); | |
+ | |
+ /* modulate by transparency */ | |
+ float3 alpha = shader_bsdf_alpha(kg, sd); | |
+ L->mist += (1.0f - mist) * average(throughput * alpha); | |
+ } | |
+#endif | |
+} | |
+ | |
+ccl_device_inline void kernel_write_light_passes(KernelGlobals *kg, | |
+ ccl_global float *buffer, | |
+ PathRadiance *L) | |
+{ | |
+#ifdef __PASSES__ | |
+ int light_flag = kernel_data.film.light_pass_flag; | |
+ | |
+ if (!kernel_data.film.use_light_pass) | |
+ return; | |
+ | |
+ if (light_flag & PASSMASK(DIFFUSE_INDIRECT)) | |
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_diffuse_indirect, L->indirect_diffuse); | |
+ if (light_flag & PASSMASK(GLOSSY_INDIRECT)) | |
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_glossy_indirect, L->indirect_glossy); | |
+ if (light_flag & PASSMASK(TRANSMISSION_INDIRECT)) | |
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_transmission_indirect, | |
+ L->indirect_transmission); | |
+ if (light_flag & PASSMASK(SUBSURFACE_INDIRECT)) | |
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_subsurface_indirect, | |
+ L->indirect_subsurface); | |
+ if (light_flag & PASSMASK(VOLUME_INDIRECT)) | |
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_volume_indirect, L->indirect_scatter); | |
+ if (light_flag & PASSMASK(DIFFUSE_DIRECT)) | |
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_diffuse_direct, L->direct_diffuse); | |
+ if (light_flag & PASSMASK(GLOSSY_DIRECT)) | |
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_glossy_direct, L->direct_glossy); | |
+ if (light_flag & PASSMASK(TRANSMISSION_DIRECT)) | |
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_transmission_direct, | |
+ L->direct_transmission); | |
+ if (light_flag & PASSMASK(SUBSURFACE_DIRECT)) | |
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_subsurface_direct, | |
+ L->direct_subsurface); | |
+ if (light_flag & PASSMASK(VOLUME_DIRECT)) | |
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_volume_direct, L->direct_scatter); | |
+ | |
+ if (light_flag & PASSMASK(EMISSION)) | |
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_emission, L->emission); | |
+ if (light_flag & PASSMASK(BACKGROUND)) | |
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_background, L->background); | |
+ if (light_flag & PASSMASK(AO)) | |
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_ao, L->ao); | |
+ | |
+ if (light_flag & PASSMASK(DIFFUSE_COLOR)) | |
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_diffuse_color, L->color_diffuse); | |
+ if (light_flag & PASSMASK(GLOSSY_COLOR)) | |
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_glossy_color, L->color_glossy); | |
+ if (light_flag & PASSMASK(TRANSMISSION_COLOR)) | |
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_transmission_color, | |
+ L->color_transmission); | |
+ if (light_flag & PASSMASK(SUBSURFACE_COLOR)) | |
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_subsurface_color, L->color_subsurface); | |
+ if (light_flag & PASSMASK(SHADOW)) { | |
+ float4 shadow = L->shadow; | |
+ shadow.w = kernel_data.film.pass_shadow_scale; | |
+ kernel_write_pass_float4(buffer + kernel_data.film.pass_shadow, shadow); | |
+ } | |
+ if (light_flag & PASSMASK(MIST)) | |
+ kernel_write_pass_float(buffer + kernel_data.film.pass_mist, 1.0f - L->mist); | |
+#endif | |
+} | |
+ | |
+ccl_device_inline void kernel_write_result(KernelGlobals *kg, | |
+ ccl_global float *buffer, | |
+ int sample, | |
+ PathRadiance *L) | |
+{ | |
+ PROFILING_INIT(kg, PROFILING_WRITE_RESULT); | |
+ PROFILING_OBJECT(PRIM_NONE); | |
+ | |
+ float alpha; | |
+ float3 L_sum = path_radiance_clamp_and_sum(kg, L, &alpha); | |
+ | |
+ if (kernel_data.film.pass_flag & PASSMASK(COMBINED)) { | |
+ kernel_write_pass_float4(buffer, make_float4(L_sum.x, L_sum.y, L_sum.z, alpha)); | |
+ } | |
+ | |
+ kernel_write_light_passes(kg, buffer, L); | |
+ | |
+#ifdef __DENOISING_FEATURES__ | |
+ if (kernel_data.film.pass_denoising_data) { | |
+# ifdef __SHADOW_TRICKS__ | |
+ kernel_write_denoising_shadow(kg, | |
+ buffer + kernel_data.film.pass_denoising_data, | |
+ sample, | |
+ average(L->path_total), | |
+ average(L->path_total_shaded)); | |
+# else | |
+ kernel_write_denoising_shadow( | |
+ kg, buffer + kernel_data.film.pass_denoising_data, sample, 0.0f, 0.0f); | |
+# endif | |
+ if (kernel_data.film.pass_denoising_clean) { | |
+ float3 noisy, clean; | |
+ path_radiance_split_denoising(kg, L, &noisy, &clean); | |
+ kernel_write_pass_float3_variance( | |
+ buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_COLOR, noisy); | |
+ kernel_write_pass_float3_unaligned(buffer + kernel_data.film.pass_denoising_clean, clean); | |
+ } | |
+ else { | |
+ kernel_write_pass_float3_variance(buffer + kernel_data.film.pass_denoising_data + | |
+ DENOISING_PASS_COLOR, | |
+ ensure_finite3(L_sum)); | |
+ } | |
+ | |
+ kernel_write_pass_float3_variance(buffer + kernel_data.film.pass_denoising_data + | |
+ DENOISING_PASS_NORMAL, | |
+ L->denoising_normal); | |
+ kernel_write_pass_float3_variance(buffer + kernel_data.film.pass_denoising_data + | |
+ DENOISING_PASS_ALBEDO, | |
+ L->denoising_albedo); | |
+ kernel_write_pass_float_variance( | |
+ buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_DEPTH, L->denoising_depth); | |
+ } | |
+#endif /* __DENOISING_FEATURES__ */ | |
+ | |
+#ifdef __KERNEL_DEBUG__ | |
+ kernel_write_debug_passes(kg, buffer, L); | |
+#endif | |
+} | |
+ | |
+CCL_NAMESPACE_END | |
diff -Naur a/intern/cycles/kernel/kernel_path_branched.h b/intern/cycles/kernel/kernel_path_branched.h | |
--- a/intern/cycles/kernel/kernel_path_branched.h 2020-01-10 20:37:06.000000000 +0300 | |
+++ b/intern/cycles/kernel/kernel_path_branched.h 2020-01-10 20:42:43.464256721 +0300 | |
@@ -523,6 +523,14 @@ | |
buffer += index * pass_stride; | |
+ if (kernel_data.film.pass_adaptive_aux_buffer) { | |
+ ccl_global float4 *aux = (ccl_global float4 *)(buffer + | |
+ kernel_data.film.pass_adaptive_aux_buffer); | |
+ if (aux->w > 0.0f) { | |
+ return; | |
+ } | |
+ } | |
+ | |
/* initialize random numbers and ray */ | |
uint rng_hash; | |
Ray ray; | |
diff -Naur a/intern/cycles/kernel/kernel_path_branched.h.orig b/intern/cycles/kernel/kernel_path_branched.h.orig | |
--- a/intern/cycles/kernel/kernel_path_branched.h.orig 1970-01-01 03:00:00.000000000 +0300 | |
+++ b/intern/cycles/kernel/kernel_path_branched.h.orig 2020-01-10 20:37:06.000000000 +0300 | |
@@ -0,0 +1,545 @@ | |
+/* | |
+ * Copyright 2011-2013 Blender Foundation | |
+ * | |
+ * Licensed under the Apache License, Version 2.0 (the "License"); | |
+ * you may not use this file except in compliance with the License. | |
+ * You may obtain a copy of the License at | |
+ * | |
+ * http://www.apache.org/licenses/LICENSE-2.0 | |
+ * | |
+ * Unless required by applicable law or agreed to in writing, software | |
+ * distributed under the License is distributed on an "AS IS" BASIS, | |
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
+ * See the License for the specific language governing permissions and | |
+ * limitations under the License. | |
+ */ | |
+ | |
+CCL_NAMESPACE_BEGIN | |
+ | |
+#ifdef __BRANCHED_PATH__ | |
+ | |
+ccl_device_inline void kernel_branched_path_ao(KernelGlobals *kg, | |
+ ShaderData *sd, | |
+ ShaderData *emission_sd, | |
+ PathRadiance *L, | |
+ ccl_addr_space PathState *state, | |
+ float3 throughput) | |
+{ | |
+ int num_samples = kernel_data.integrator.ao_samples; | |
+ float num_samples_inv = 1.0f / num_samples; | |
+ float ao_factor = kernel_data.background.ao_factor; | |
+ float3 ao_N; | |
+ float3 ao_bsdf = shader_bsdf_ao(kg, sd, ao_factor, &ao_N); | |
+ float3 ao_alpha = shader_bsdf_alpha(kg, sd); | |
+ | |
+ for (int j = 0; j < num_samples; j++) { | |
+ float bsdf_u, bsdf_v; | |
+ path_branched_rng_2D( | |
+ kg, state->rng_hash, state, j, num_samples, PRNG_BSDF_U, &bsdf_u, &bsdf_v); | |
+ | |
+ float3 ao_D; | |
+ float ao_pdf; | |
+ | |
+ sample_cos_hemisphere(ao_N, bsdf_u, bsdf_v, &ao_D, &ao_pdf); | |
+ | |
+ if (dot(sd->Ng, ao_D) > 0.0f && ao_pdf != 0.0f) { | |
+ Ray light_ray; | |
+ float3 ao_shadow; | |
+ | |
+ light_ray.P = ray_offset(sd->P, sd->Ng); | |
+ light_ray.D = ao_D; | |
+ light_ray.t = kernel_data.background.ao_distance; | |
+ light_ray.time = sd->time; | |
+ light_ray.dP = sd->dP; | |
+ light_ray.dD = differential3_zero(); | |
+ | |
+ if (!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &ao_shadow)) { | |
+ path_radiance_accum_ao( | |
+ kg, L, state, throughput * num_samples_inv, ao_alpha, ao_bsdf, ao_shadow); | |
+ } | |
+ else { | |
+ path_radiance_accum_total_ao(L, state, throughput * num_samples_inv, ao_bsdf); | |
+ } | |
+ } | |
+ } | |
+} | |
+ | |
+# ifndef __SPLIT_KERNEL__ | |
+ | |
+# ifdef __VOLUME__ | |
+ccl_device_forceinline void kernel_branched_path_volume(KernelGlobals *kg, | |
+ ShaderData *sd, | |
+ PathState *state, | |
+ Ray *ray, | |
+ float3 *throughput, | |
+ ccl_addr_space Intersection *isect, | |
+ bool hit, | |
+ ShaderData *indirect_sd, | |
+ ShaderData *emission_sd, | |
+ PathRadiance *L) | |
+{ | |
+ /* Sanitize volume stack. */ | |
+ if (!hit) { | |
+ kernel_volume_clean_stack(kg, state->volume_stack); | |
+ } | |
+ | |
+ if (state->volume_stack[0].shader == SHADER_NONE) { | |
+ return; | |
+ } | |
+ | |
+ /* volume attenuation, emission, scatter */ | |
+ Ray volume_ray = *ray; | |
+ volume_ray.t = (hit) ? isect->t : FLT_MAX; | |
+ | |
+ bool heterogeneous = volume_stack_is_heterogeneous(kg, state->volume_stack); | |
+ | |
+# ifdef __VOLUME_DECOUPLED__ | |
+ /* decoupled ray marching only supported on CPU */ | |
+ if (kernel_data.integrator.volume_decoupled) { | |
+ /* cache steps along volume for repeated sampling */ | |
+ VolumeSegment volume_segment; | |
+ | |
+ shader_setup_from_volume(kg, sd, &volume_ray); | |
+ kernel_volume_decoupled_record(kg, state, &volume_ray, sd, &volume_segment, heterogeneous); | |
+ | |
+ /* direct light sampling */ | |
+ if (volume_segment.closure_flag & SD_SCATTER) { | |
+ volume_segment.sampling_method = volume_stack_sampling_method(kg, state->volume_stack); | |
+ | |
+ int all = kernel_data.integrator.sample_all_lights_direct; | |
+ | |
+ kernel_branched_path_volume_connect_light( | |
+ kg, sd, emission_sd, *throughput, state, L, all, &volume_ray, &volume_segment); | |
+ | |
+ /* indirect light sampling */ | |
+ int num_samples = kernel_data.integrator.volume_samples; | |
+ float num_samples_inv = 1.0f / num_samples; | |
+ | |
+ for (int j = 0; j < num_samples; j++) { | |
+ PathState ps = *state; | |
+ Ray pray = *ray; | |
+ float3 tp = *throughput; | |
+ | |
+ /* branch RNG state */ | |
+ path_state_branch(&ps, j, num_samples); | |
+ | |
+ /* scatter sample. if we use distance sampling and take just one | |
+ * sample for direct and indirect light, we could share this | |
+ * computation, but makes code a bit complex */ | |
+ float rphase = path_state_rng_1D(kg, &ps, PRNG_PHASE_CHANNEL); | |
+ float rscatter = path_state_rng_1D(kg, &ps, PRNG_SCATTER_DISTANCE); | |
+ | |
+ VolumeIntegrateResult result = kernel_volume_decoupled_scatter( | |
+ kg, &ps, &pray, sd, &tp, rphase, rscatter, &volume_segment, NULL, false); | |
+ | |
+ if (result == VOLUME_PATH_SCATTERED && | |
+ kernel_path_volume_bounce(kg, sd, &tp, &ps, &L->state, &pray)) { | |
+ kernel_path_indirect(kg, indirect_sd, emission_sd, &pray, tp * num_samples_inv, &ps, L); | |
+ | |
+ /* for render passes, sum and reset indirect light pass variables | |
+ * for the next samples */ | |
+ path_radiance_sum_indirect(L); | |
+ path_radiance_reset_indirect(L); | |
+ } | |
+ } | |
+ } | |
+ | |
+ /* emission and transmittance */ | |
+ if (volume_segment.closure_flag & SD_EMISSION) | |
+ path_radiance_accum_emission(kg, L, state, *throughput, volume_segment.accum_emission); | |
+ *throughput *= volume_segment.accum_transmittance; | |
+ | |
+ /* free cached steps */ | |
+ kernel_volume_decoupled_free(kg, &volume_segment); | |
+ } | |
+ else | |
+# endif /* __VOLUME_DECOUPLED__ */ | |
+ { | |
+ /* GPU: no decoupled ray marching, scatter probalistically */ | |
+ int num_samples = kernel_data.integrator.volume_samples; | |
+ float num_samples_inv = 1.0f / num_samples; | |
+ | |
+ /* todo: we should cache the shader evaluations from stepping | |
+ * through the volume, for now we redo them multiple times */ | |
+ | |
+ for (int j = 0; j < num_samples; j++) { | |
+ PathState ps = *state; | |
+ Ray pray = *ray; | |
+ float3 tp = (*throughput) * num_samples_inv; | |
+ | |
+ /* branch RNG state */ | |
+ path_state_branch(&ps, j, num_samples); | |
+ | |
+ VolumeIntegrateResult result = kernel_volume_integrate( | |
+ kg, &ps, sd, &volume_ray, L, &tp, heterogeneous); | |
+ | |
+# ifdef __VOLUME_SCATTER__ | |
+ if (result == VOLUME_PATH_SCATTERED) { | |
+ /* todo: support equiangular, MIS and all light sampling. | |
+ * alternatively get decoupled ray marching working on the GPU */ | |
+ kernel_path_volume_connect_light(kg, sd, emission_sd, tp, state, L); | |
+ | |
+ if (kernel_path_volume_bounce(kg, sd, &tp, &ps, &L->state, &pray)) { | |
+ kernel_path_indirect(kg, indirect_sd, emission_sd, &pray, tp, &ps, L); | |
+ | |
+ /* for render passes, sum and reset indirect light pass variables | |
+ * for the next samples */ | |
+ path_radiance_sum_indirect(L); | |
+ path_radiance_reset_indirect(L); | |
+ } | |
+ } | |
+# endif /* __VOLUME_SCATTER__ */ | |
+ } | |
+ | |
+ /* todo: avoid this calculation using decoupled ray marching */ | |
+ kernel_volume_shadow(kg, emission_sd, state, &volume_ray, throughput); | |
+ } | |
+} | |
+# endif /* __VOLUME__ */ | |
+ | |
+/* bounce off surface and integrate indirect light */ | |
+ccl_device_noinline_cpu void kernel_branched_path_surface_indirect_light(KernelGlobals *kg, | |
+ ShaderData *sd, | |
+ ShaderData *indirect_sd, | |
+ ShaderData *emission_sd, | |
+ float3 throughput, | |
+ float num_samples_adjust, | |
+ PathState *state, | |
+ PathRadiance *L) | |
+{ | |
+ float sum_sample_weight = 0.0f; | |
+# ifdef __DENOISING_FEATURES__ | |
+ if (state->denoising_feature_weight > 0.0f) { | |
+ for (int i = 0; i < sd->num_closure; i++) { | |
+ const ShaderClosure *sc = &sd->closure[i]; | |
+ | |
+ /* transparency is not handled here, but in outer loop */ | |
+ if (!CLOSURE_IS_BSDF(sc->type) || CLOSURE_IS_BSDF_TRANSPARENT(sc->type)) { | |
+ continue; | |
+ } | |
+ | |
+ sum_sample_weight += sc->sample_weight; | |
+ } | |
+ } | |
+ else { | |
+ sum_sample_weight = 1.0f; | |
+ } | |
+# endif /* __DENOISING_FEATURES__ */ | |
+ | |
+ for (int i = 0; i < sd->num_closure; i++) { | |
+ const ShaderClosure *sc = &sd->closure[i]; | |
+ | |
+ /* transparency is not handled here, but in outer loop */ | |
+ if (!CLOSURE_IS_BSDF(sc->type) || CLOSURE_IS_BSDF_TRANSPARENT(sc->type)) { | |
+ continue; | |
+ } | |
+ | |
+ int num_samples; | |
+ | |
+ if (CLOSURE_IS_BSDF_DIFFUSE(sc->type)) | |
+ num_samples = kernel_data.integrator.diffuse_samples; | |
+ else if (CLOSURE_IS_BSDF_BSSRDF(sc->type)) | |
+ num_samples = 1; | |
+ else if (CLOSURE_IS_BSDF_GLOSSY(sc->type)) | |
+ num_samples = kernel_data.integrator.glossy_samples; | |
+ else | |
+ num_samples = kernel_data.integrator.transmission_samples; | |
+ | |
+ num_samples = ceil_to_int(num_samples_adjust * num_samples); | |
+ | |
+ float num_samples_inv = num_samples_adjust / num_samples; | |
+ | |
+ for (int j = 0; j < num_samples; j++) { | |
+ PathState ps = *state; | |
+ float3 tp = throughput; | |
+ Ray bsdf_ray; | |
+# ifdef __SHADOW_TRICKS__ | |
+ float shadow_transparency = L->shadow_transparency; | |
+# endif | |
+ | |
+ ps.rng_hash = cmj_hash(state->rng_hash, i); | |
+ | |
+ if (!kernel_branched_path_surface_bounce( | |
+ kg, sd, sc, j, num_samples, &tp, &ps, &L->state, &bsdf_ray, sum_sample_weight)) { | |
+ continue; | |
+ } | |
+ | |
+ ps.rng_hash = state->rng_hash; | |
+ | |
+ kernel_path_indirect(kg, indirect_sd, emission_sd, &bsdf_ray, tp * num_samples_inv, &ps, L); | |
+ | |
+ /* for render passes, sum and reset indirect light pass variables | |
+ * for the next samples */ | |
+ path_radiance_sum_indirect(L); | |
+ path_radiance_reset_indirect(L); | |
+ | |
+# ifdef __SHADOW_TRICKS__ | |
+ L->shadow_transparency = shadow_transparency; | |
+# endif | |
+ } | |
+ } | |
+} | |
+ | |
+# ifdef __SUBSURFACE__ | |
+ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg, | |
+ ShaderData *sd, | |
+ ShaderData *indirect_sd, | |
+ ShaderData *emission_sd, | |
+ PathRadiance *L, | |
+ PathState *state, | |
+ Ray *ray, | |
+ float3 throughput) | |
+{ | |
+ for (int i = 0; i < sd->num_closure; i++) { | |
+ ShaderClosure *sc = &sd->closure[i]; | |
+ | |
+ if (!CLOSURE_IS_BSSRDF(sc->type)) | |
+ continue; | |
+ | |
+ /* set up random number generator */ | |
+ uint lcg_state = lcg_state_init(state, 0x68bc21eb); | |
+ int num_samples = kernel_data.integrator.subsurface_samples * 3; | |
+ float num_samples_inv = 1.0f / num_samples; | |
+ uint bssrdf_rng_hash = cmj_hash(state->rng_hash, i); | |
+ | |
+ /* do subsurface scatter step with copy of shader data, this will | |
+ * replace the BSSRDF with a diffuse BSDF closure */ | |
+ for (int j = 0; j < num_samples; j++) { | |
+ PathState hit_state = *state; | |
+ path_state_branch(&hit_state, j, num_samples); | |
+ hit_state.rng_hash = bssrdf_rng_hash; | |
+ | |
+ LocalIntersection ss_isect; | |
+ float bssrdf_u, bssrdf_v; | |
+ path_state_rng_2D(kg, &hit_state, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v); | |
+ int num_hits = subsurface_scatter_multi_intersect( | |
+ kg, &ss_isect, sd, &hit_state, sc, &lcg_state, bssrdf_u, bssrdf_v, true); | |
+ | |
+ hit_state.rng_offset += PRNG_BOUNCE_NUM; | |
+ | |
+# ifdef __VOLUME__ | |
+ Ray volume_ray = *ray; | |
+ bool need_update_volume_stack = kernel_data.integrator.use_volumes && | |
+ sd->object_flag & SD_OBJECT_INTERSECTS_VOLUME; | |
+# endif /* __VOLUME__ */ | |
+ | |
+ /* compute lighting with the BSDF closure */ | |
+ for (int hit = 0; hit < num_hits; hit++) { | |
+ ShaderData bssrdf_sd = *sd; | |
+ Bssrdf *bssrdf = (Bssrdf *)sc; | |
+ ClosureType bssrdf_type = sc->type; | |
+ float bssrdf_roughness = bssrdf->roughness; | |
+ subsurface_scatter_multi_setup( | |
+ kg, &ss_isect, hit, &bssrdf_sd, &hit_state, bssrdf_type, bssrdf_roughness); | |
+ | |
+# ifdef __VOLUME__ | |
+ if (need_update_volume_stack) { | |
+ /* Setup ray from previous surface point to the new one. */ | |
+ float3 P = ray_offset(bssrdf_sd.P, -bssrdf_sd.Ng); | |
+ volume_ray.D = normalize_len(P - volume_ray.P, &volume_ray.t); | |
+ | |
+ for (int k = 0; k < VOLUME_STACK_SIZE; k++) { | |
+ hit_state.volume_stack[k] = state->volume_stack[k]; | |
+ } | |
+ | |
+ kernel_volume_stack_update_for_subsurface( | |
+ kg, emission_sd, &volume_ray, hit_state.volume_stack); | |
+ } | |
+# endif /* __VOLUME__ */ | |
+ | |
+# ifdef __EMISSION__ | |
+ /* direct light */ | |
+ if (kernel_data.integrator.use_direct_light) { | |
+ int all = (kernel_data.integrator.sample_all_lights_direct) || | |
+ (hit_state.flag & PATH_RAY_SHADOW_CATCHER); | |
+ kernel_branched_path_surface_connect_light( | |
+ kg, &bssrdf_sd, emission_sd, &hit_state, throughput, num_samples_inv, L, all); | |
+ } | |
+# endif /* __EMISSION__ */ | |
+ | |
+ /* indirect light */ | |
+ kernel_branched_path_surface_indirect_light( | |
+ kg, &bssrdf_sd, indirect_sd, emission_sd, throughput, num_samples_inv, &hit_state, L); | |
+ } | |
+ } | |
+ } | |
+} | |
+# endif /* __SUBSURFACE__ */ | |
+ | |
+ccl_device void kernel_branched_path_integrate(KernelGlobals *kg, | |
+ uint rng_hash, | |
+ int sample, | |
+ Ray ray, | |
+ ccl_global float *buffer, | |
+ PathRadiance *L) | |
+{ | |
+ /* initialize */ | |
+ float3 throughput = make_float3(1.0f, 1.0f, 1.0f); | |
+ | |
+ path_radiance_init(kg, L); | |
+ | |
+ /* shader data memory used for both volumes and surfaces, saves stack space */ | |
+ ShaderData sd; | |
+ /* shader data used by emission, shadows, volume stacks, indirect path */ | |
+ ShaderDataTinyStorage emission_sd_storage; | |
+ ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage); | |
+ ShaderData indirect_sd; | |
+ | |
+ PathState state; | |
+ path_state_init(kg, emission_sd, &state, rng_hash, sample, &ray); | |
+ | |
+ /* Main Loop | |
+ * Here we only handle transparency intersections from the camera ray. | |
+ * Indirect bounces are handled in kernel_branched_path_surface_indirect_light(). | |
+ */ | |
+ for (;;) { | |
+ /* Find intersection with objects in scene. */ | |
+ Intersection isect; | |
+ bool hit = kernel_path_scene_intersect(kg, &state, &ray, &isect, L); | |
+ | |
+# ifdef __VOLUME__ | |
+ /* Volume integration. */ | |
+ kernel_branched_path_volume( | |
+ kg, &sd, &state, &ray, &throughput, &isect, hit, &indirect_sd, emission_sd, L); | |
+# endif /* __VOLUME__ */ | |
+ | |
+ /* Shade background. */ | |
+ if (!hit) { | |
+ kernel_path_background(kg, &state, &ray, throughput, &sd, buffer, L); | |
+ break; | |
+ } | |
+ | |
+ /* Setup and evaluate shader. */ | |
+ shader_setup_from_ray(kg, &sd, &isect, &ray); | |
+ | |
+ /* Skip most work for volume bounding surface. */ | |
+# ifdef __VOLUME__ | |
+ if (!(sd.flag & SD_HAS_ONLY_VOLUME)) { | |
+# endif | |
+ | |
+ shader_eval_surface(kg, &sd, &state, buffer, state.flag); | |
+ shader_merge_closures(&sd); | |
+ | |
+ /* Apply shadow catcher, holdout, emission. */ | |
+ if (!kernel_path_shader_apply(kg, &sd, &state, &ray, throughput, emission_sd, L, buffer)) { | |
+ break; | |
+ } | |
+ | |
+ /* transparency termination */ | |
+ if (state.flag & PATH_RAY_TRANSPARENT) { | |
+ /* path termination. this is a strange place to put the termination, it's | |
+ * mainly due to the mixed in MIS that we use. gives too many unneeded | |
+ * shader evaluations, only need emission if we are going to terminate */ | |
+ float probability = path_state_continuation_probability(kg, &state, throughput); | |
+ | |
+ if (probability == 0.0f) { | |
+ break; | |
+ } | |
+ else if (probability != 1.0f) { | |
+ float terminate = path_state_rng_1D(kg, &state, PRNG_TERMINATE); | |
+ | |
+ if (terminate >= probability) | |
+ break; | |
+ | |
+ throughput /= probability; | |
+ } | |
+ } | |
+ | |
+# ifdef __DENOISING_FEATURES__ | |
+ kernel_update_denoising_features(kg, &sd, &state, L); | |
+# endif | |
+ | |
+# ifdef __AO__ | |
+ /* ambient occlusion */ | |
+ if (kernel_data.integrator.use_ambient_occlusion) { | |
+ kernel_branched_path_ao(kg, &sd, emission_sd, L, &state, throughput); | |
+ } | |
+# endif /* __AO__ */ | |
+ | |
+# ifdef __SUBSURFACE__ | |
+ /* bssrdf scatter to a different location on the same object */ | |
+ if (sd.flag & SD_BSSRDF) { | |
+ kernel_branched_path_subsurface_scatter( | |
+ kg, &sd, &indirect_sd, emission_sd, L, &state, &ray, throughput); | |
+ } | |
+# endif /* __SUBSURFACE__ */ | |
+ | |
+ PathState hit_state = state; | |
+ | |
+# ifdef __EMISSION__ | |
+ /* direct light */ | |
+ if (kernel_data.integrator.use_direct_light) { | |
+ int all = (kernel_data.integrator.sample_all_lights_direct) || | |
+ (state.flag & PATH_RAY_SHADOW_CATCHER); | |
+ kernel_branched_path_surface_connect_light( | |
+ kg, &sd, emission_sd, &hit_state, throughput, 1.0f, L, all); | |
+ } | |
+# endif /* __EMISSION__ */ | |
+ | |
+ /* indirect light */ | |
+ kernel_branched_path_surface_indirect_light( | |
+ kg, &sd, &indirect_sd, emission_sd, throughput, 1.0f, &hit_state, L); | |
+ | |
+ /* continue in case of transparency */ | |
+ throughput *= shader_bsdf_transparency(kg, &sd); | |
+ | |
+ if (is_zero(throughput)) | |
+ break; | |
+ | |
+ /* Update Path State */ | |
+ path_state_next(kg, &state, LABEL_TRANSPARENT); | |
+ | |
+# ifdef __VOLUME__ | |
+ } | |
+ else { | |
+ if (!path_state_volume_next(kg, &state)) { | |
+ break; | |
+ } | |
+ } | |
+# endif | |
+ | |
+ ray.P = ray_offset(sd.P, -sd.Ng); | |
+ ray.t -= sd.ray_length; /* clipping works through transparent */ | |
+ | |
+# ifdef __RAY_DIFFERENTIALS__ | |
+ ray.dP = sd.dP; | |
+ ray.dD.dx = -sd.dI.dx; | |
+ ray.dD.dy = -sd.dI.dy; | |
+# endif /* __RAY_DIFFERENTIALS__ */ | |
+ | |
+# ifdef __VOLUME__ | |
+ /* enter/exit volume */ | |
+ kernel_volume_stack_enter_exit(kg, &sd, state.volume_stack); | |
+# endif /* __VOLUME__ */ | |
+ } | |
+} | |
+ | |
+ccl_device void kernel_branched_path_trace( | |
+ KernelGlobals *kg, ccl_global float *buffer, int sample, int x, int y, int offset, int stride) | |
+{ | |
+ /* buffer offset */ | |
+ int index = offset + x + y * stride; | |
+ int pass_stride = kernel_data.film.pass_stride; | |
+ | |
+ buffer += index * pass_stride; | |
+ | |
+ /* initialize random numbers and ray */ | |
+ uint rng_hash; | |
+ Ray ray; | |
+ | |
+ kernel_path_trace_setup(kg, sample, x, y, &rng_hash, &ray); | |
+ | |
+ /* integrate */ | |
+ PathRadiance L; | |
+ | |
+ if (ray.t != 0.0f) { | |
+ kernel_branched_path_integrate(kg, rng_hash, sample, ray, buffer, &L); | |
+ kernel_write_result(kg, buffer, sample, &L); | |
+ } | |
+} | |
+ | |
+# endif /* __SPLIT_KERNEL__ */ | |
+ | |
+#endif /* __BRANCHED_PATH__ */ | |
+ | |
+CCL_NAMESPACE_END | |
diff -Naur a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h | |
--- a/intern/cycles/kernel/kernel_path.h 2020-01-10 20:37:06.000000000 +0300 | |
+++ b/intern/cycles/kernel/kernel_path.h 2020-01-10 20:42:43.464256721 +0300 | |
@@ -31,6 +31,7 @@ | |
#include "kernel/kernel_accumulate.h" | |
#include "kernel/kernel_shader.h" | |
#include "kernel/kernel_light.h" | |
+#include "kernel/kernel_adaptive_sampling.h" | |
#include "kernel/kernel_passes.h" | |
#if defined(__VOLUME__) || defined(__SUBSURFACE__) | |
@@ -656,6 +657,14 @@ | |
buffer += index * pass_stride; | |
+ if (kernel_data.film.pass_adaptive_aux_buffer) { | |
+ ccl_global float4 *aux = (ccl_global float4 *)(buffer + | |
+ kernel_data.film.pass_adaptive_aux_buffer); | |
+ if (aux->w > 0.0f) { | |
+ return; | |
+ } | |
+ } | |
+ | |
/* Initialize random numbers and sample ray. */ | |
uint rng_hash; | |
Ray ray; | |
diff -Naur a/intern/cycles/kernel/kernel_path.h.orig b/intern/cycles/kernel/kernel_path.h.orig | |
--- a/intern/cycles/kernel/kernel_path.h.orig 1970-01-01 03:00:00.000000000 +0300 | |
+++ b/intern/cycles/kernel/kernel_path.h.orig 2020-01-10 20:37:06.000000000 +0300 | |
@@ -0,0 +1,698 @@ | |
+/* | |
+ * Copyright 2011-2013 Blender Foundation | |
+ * | |
+ * Licensed under the Apache License, Version 2.0 (the "License"); | |
+ * you may not use this file except in compliance with the License. | |
+ * You may obtain a copy of the License at | |
+ * | |
+ * http://www.apache.org/licenses/LICENSE-2.0 | |
+ * | |
+ * Unless required by applicable law or agreed to in writing, software | |
+ * distributed under the License is distributed on an "AS IS" BASIS, | |
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
+ * See the License for the specific language governing permissions and | |
+ * limitations under the License. | |
+ */ | |
+ | |
+#ifdef __OSL__ | |
+# include "kernel/osl/osl_shader.h" | |
+#endif | |
+ | |
+#include "kernel/kernel_random.h" | |
+#include "kernel/kernel_projection.h" | |
+#include "kernel/kernel_montecarlo.h" | |
+#include "kernel/kernel_differential.h" | |
+#include "kernel/kernel_camera.h" | |
+ | |
+#include "kernel/geom/geom.h" | |
+#include "kernel/bvh/bvh.h" | |
+ | |
+#include "kernel/kernel_write_passes.h" | |
+#include "kernel/kernel_accumulate.h" | |
+#include "kernel/kernel_shader.h" | |
+#include "kernel/kernel_light.h" | |
+#include "kernel/kernel_passes.h" | |
+ | |
+#if defined(__VOLUME__) || defined(__SUBSURFACE__) | |
+# include "kernel/kernel_volume.h" | |
+#endif | |
+ | |
+#ifdef __SUBSURFACE__ | |
+# include "kernel/kernel_subsurface.h" | |
+#endif | |
+ | |
+#include "kernel/kernel_path_state.h" | |
+#include "kernel/kernel_shadow.h" | |
+#include "kernel/kernel_emission.h" | |
+#include "kernel/kernel_path_common.h" | |
+#include "kernel/kernel_path_surface.h" | |
+#include "kernel/kernel_path_volume.h" | |
+#include "kernel/kernel_path_subsurface.h" | |
+ | |
+CCL_NAMESPACE_BEGIN | |
+ | |
+ccl_device_forceinline bool kernel_path_scene_intersect(KernelGlobals *kg, | |
+ ccl_addr_space PathState *state, | |
+ Ray *ray, | |
+ Intersection *isect, | |
+ PathRadiance *L) | |
+{ | |
+ PROFILING_INIT(kg, PROFILING_SCENE_INTERSECT); | |
+ | |
+ uint visibility = path_state_ray_visibility(kg, state); | |
+ | |
+ if (path_state_ao_bounce(kg, state)) { | |
+ visibility = PATH_RAY_SHADOW; | |
+ ray->t = kernel_data.background.ao_distance; | |
+ } | |
+ | |
+ bool hit = scene_intersect(kg, ray, visibility, isect); | |
+ | |
+#ifdef __KERNEL_DEBUG__ | |
+ if (state->flag & PATH_RAY_CAMERA) { | |
+ L->debug_data.num_bvh_traversed_nodes += isect->num_traversed_nodes; | |
+ L->debug_data.num_bvh_traversed_instances += isect->num_traversed_instances; | |
+ L->debug_data.num_bvh_intersections += isect->num_intersections; | |
+ } | |
+ L->debug_data.num_ray_bounces++; | |
+#endif /* __KERNEL_DEBUG__ */ | |
+ | |
+ return hit; | |
+} | |
+ | |
+ccl_device_forceinline void kernel_path_lamp_emission(KernelGlobals *kg, | |
+ ccl_addr_space PathState *state, | |
+ Ray *ray, | |
+ float3 throughput, | |
+ ccl_addr_space Intersection *isect, | |
+ ShaderData *emission_sd, | |
+ PathRadiance *L) | |
+{ | |
+ PROFILING_INIT(kg, PROFILING_INDIRECT_EMISSION); | |
+ | |
+#ifdef __LAMP_MIS__ | |
+ if (kernel_data.integrator.use_lamp_mis && !(state->flag & PATH_RAY_CAMERA)) { | |
+ /* ray starting from previous non-transparent bounce */ | |
+ Ray light_ray ccl_optional_struct_init; | |
+ | |
+ light_ray.P = ray->P - state->ray_t * ray->D; | |
+ state->ray_t += isect->t; | |
+ light_ray.D = ray->D; | |
+ light_ray.t = state->ray_t; | |
+ light_ray.time = ray->time; | |
+ light_ray.dD = ray->dD; | |
+ light_ray.dP = ray->dP; | |
+ | |
+ /* intersect with lamp */ | |
+ indirect_lamp_emission(kg, emission_sd, state, L, &light_ray, throughput); | |
+ } | |
+#endif /* __LAMP_MIS__ */ | |
+} | |
+ | |
+ccl_device_forceinline void kernel_path_background(KernelGlobals *kg, | |
+ ccl_addr_space PathState *state, | |
+ ccl_addr_space Ray *ray, | |
+ float3 throughput, | |
+ ShaderData *sd, | |
+ ccl_global float *buffer, | |
+ PathRadiance *L) | |
+{ | |
+ /* eval background shader if nothing hit */ | |
+ if (kernel_data.background.transparent && (state->flag & PATH_RAY_TRANSPARENT_BACKGROUND)) { | |
+ L->transparent += average(throughput); | |
+ | |
+#ifdef __PASSES__ | |
+ if (!(kernel_data.film.light_pass_flag & PASSMASK(BACKGROUND))) | |
+#endif /* __PASSES__ */ | |
+ return; | |
+ } | |
+ | |
+ /* When using the ao bounces approximation, adjust background | |
+ * shader intensity with ao factor. */ | |
+ if (path_state_ao_bounce(kg, state)) { | |
+ throughput *= kernel_data.background.ao_bounces_factor; | |
+ } | |
+ | |
+#ifdef __BACKGROUND__ | |
+ /* sample background shader */ | |
+ float3 L_background = indirect_background(kg, sd, state, buffer, ray); | |
+ path_radiance_accum_background(kg, L, state, throughput, L_background); | |
+#endif /* __BACKGROUND__ */ | |
+} | |
+ | |
+#ifndef __SPLIT_KERNEL__ | |
+ | |
+# ifdef __VOLUME__ | |
+ccl_device_forceinline VolumeIntegrateResult kernel_path_volume(KernelGlobals *kg, | |
+ ShaderData *sd, | |
+ PathState *state, | |
+ Ray *ray, | |
+ float3 *throughput, | |
+ ccl_addr_space Intersection *isect, | |
+ bool hit, | |
+ ShaderData *emission_sd, | |
+ PathRadiance *L) | |
+{ | |
+ PROFILING_INIT(kg, PROFILING_VOLUME); | |
+ | |
+ /* Sanitize volume stack. */ | |
+ if (!hit) { | |
+ kernel_volume_clean_stack(kg, state->volume_stack); | |
+ } | |
+ | |
+ if (state->volume_stack[0].shader == SHADER_NONE) { | |
+ return VOLUME_PATH_ATTENUATED; | |
+ } | |
+ | |
+ /* volume attenuation, emission, scatter */ | |
+ Ray volume_ray = *ray; | |
+ volume_ray.t = (hit) ? isect->t : FLT_MAX; | |
+ | |
+ bool heterogeneous = volume_stack_is_heterogeneous(kg, state->volume_stack); | |
+ | |
+# ifdef __VOLUME_DECOUPLED__ | |
+ int sampling_method = volume_stack_sampling_method(kg, state->volume_stack); | |
+ bool direct = (state->flag & PATH_RAY_CAMERA) != 0; | |
+ bool decoupled = kernel_volume_use_decoupled(kg, heterogeneous, direct, sampling_method); | |
+ | |
+ if (decoupled) { | |
+ /* cache steps along volume for repeated sampling */ | |
+ VolumeSegment volume_segment; | |
+ | |
+ shader_setup_from_volume(kg, sd, &volume_ray); | |
+ kernel_volume_decoupled_record(kg, state, &volume_ray, sd, &volume_segment, heterogeneous); | |
+ | |
+ volume_segment.sampling_method = sampling_method; | |
+ | |
+ /* emission */ | |
+ if (volume_segment.closure_flag & SD_EMISSION) | |
+ path_radiance_accum_emission(kg, L, state, *throughput, volume_segment.accum_emission); | |
+ | |
+ /* scattering */ | |
+ VolumeIntegrateResult result = VOLUME_PATH_ATTENUATED; | |
+ | |
+ if (volume_segment.closure_flag & SD_SCATTER) { | |
+ int all = kernel_data.integrator.sample_all_lights_indirect; | |
+ | |
+ /* direct light sampling */ | |
+ kernel_branched_path_volume_connect_light( | |
+ kg, sd, emission_sd, *throughput, state, L, all, &volume_ray, &volume_segment); | |
+ | |
+ /* indirect sample. if we use distance sampling and take just | |
+ * one sample for direct and indirect light, we could share | |
+ * this computation, but makes code a bit complex */ | |
+ float rphase = path_state_rng_1D(kg, state, PRNG_PHASE_CHANNEL); | |
+ float rscatter = path_state_rng_1D(kg, state, PRNG_SCATTER_DISTANCE); | |
+ | |
+ result = kernel_volume_decoupled_scatter( | |
+ kg, state, &volume_ray, sd, throughput, rphase, rscatter, &volume_segment, NULL, true); | |
+ } | |
+ | |
+ /* free cached steps */ | |
+ kernel_volume_decoupled_free(kg, &volume_segment); | |
+ | |
+ if (result == VOLUME_PATH_SCATTERED) { | |
+ if (kernel_path_volume_bounce(kg, sd, throughput, state, &L->state, ray)) | |
+ return VOLUME_PATH_SCATTERED; | |
+ else | |
+ return VOLUME_PATH_MISSED; | |
+ } | |
+ else { | |
+ *throughput *= volume_segment.accum_transmittance; | |
+ } | |
+ } | |
+ else | |
+# endif /* __VOLUME_DECOUPLED__ */ | |
+ { | |
+ /* integrate along volume segment with distance sampling */ | |
+ VolumeIntegrateResult result = kernel_volume_integrate( | |
+ kg, state, sd, &volume_ray, L, throughput, heterogeneous); | |
+ | |
+# ifdef __VOLUME_SCATTER__ | |
+ if (result == VOLUME_PATH_SCATTERED) { | |
+ /* direct lighting */ | |
+ kernel_path_volume_connect_light(kg, sd, emission_sd, *throughput, state, L); | |
+ | |
+ /* indirect light bounce */ | |
+ if (kernel_path_volume_bounce(kg, sd, throughput, state, &L->state, ray)) | |
+ return VOLUME_PATH_SCATTERED; | |
+ else | |
+ return VOLUME_PATH_MISSED; | |
+ } | |
+# endif /* __VOLUME_SCATTER__ */ | |
+ } | |
+ | |
+ return VOLUME_PATH_ATTENUATED; | |
+} | |
+# endif /* __VOLUME__ */ | |
+ | |
+#endif /* __SPLIT_KERNEL__ */ | |
+ | |
+ccl_device_forceinline bool kernel_path_shader_apply(KernelGlobals *kg, | |
+ ShaderData *sd, | |
+ ccl_addr_space PathState *state, | |
+ ccl_addr_space Ray *ray, | |
+ float3 throughput, | |
+ ShaderData *emission_sd, | |
+ PathRadiance *L, | |
+ ccl_global float *buffer) | |
+{ | |
+ PROFILING_INIT(kg, PROFILING_SHADER_APPLY); | |
+ | |
+#ifdef __SHADOW_TRICKS__ | |
+ if ((sd->object_flag & SD_OBJECT_SHADOW_CATCHER)) { | |
+ if (state->flag & PATH_RAY_TRANSPARENT_BACKGROUND) { | |
+ state->flag |= (PATH_RAY_SHADOW_CATCHER | PATH_RAY_STORE_SHADOW_INFO); | |
+ | |
+ float3 bg = make_float3(0.0f, 0.0f, 0.0f); | |
+ if (!kernel_data.background.transparent) { | |
+ bg = indirect_background(kg, emission_sd, state, NULL, ray); | |
+ } | |
+ path_radiance_accum_shadowcatcher(L, throughput, bg); | |
+ } | |
+ } | |
+ else if (state->flag & PATH_RAY_SHADOW_CATCHER) { | |
+ /* Only update transparency after shadow catcher bounce. */ | |
+ L->shadow_transparency *= average(shader_bsdf_transparency(kg, sd)); | |
+ } | |
+#endif /* __SHADOW_TRICKS__ */ | |
+ | |
+ /* holdout */ | |
+#ifdef __HOLDOUT__ | |
+ if (((sd->flag & SD_HOLDOUT) || (sd->object_flag & SD_OBJECT_HOLDOUT_MASK)) && | |
+ (state->flag & PATH_RAY_TRANSPARENT_BACKGROUND)) { | |
+ if (kernel_data.background.transparent) { | |
+ float3 holdout_weight; | |
+ if (sd->object_flag & SD_OBJECT_HOLDOUT_MASK) { | |
+ holdout_weight = make_float3(1.0f, 1.0f, 1.0f); | |
+ } | |
+ else { | |
+ holdout_weight = shader_holdout_eval(kg, sd); | |
+ } | |
+ /* any throughput is ok, should all be identical here */ | |
+ L->transparent += average(holdout_weight * throughput); | |
+ } | |
+ | |
+ if (sd->object_flag & SD_OBJECT_HOLDOUT_MASK) { | |
+ return false; | |
+ } | |
+ } | |
+#endif /* __HOLDOUT__ */ | |
+ | |
+ /* holdout mask objects do not write data passes */ | |
+ kernel_write_data_passes(kg, buffer, L, sd, state, throughput); | |
+ | |
+ /* blurring of bsdf after bounces, for rays that have a small likelihood | |
+ * of following this particular path (diffuse, rough glossy) */ | |
+ if (kernel_data.integrator.filter_glossy != FLT_MAX) { | |
+ float blur_pdf = kernel_data.integrator.filter_glossy * state->min_ray_pdf; | |
+ | |
+ if (blur_pdf < 1.0f) { | |
+ float blur_roughness = sqrtf(1.0f - blur_pdf) * 0.5f; | |
+ shader_bsdf_blur(kg, sd, blur_roughness); | |
+ } | |
+ } | |
+ | |
+#ifdef __EMISSION__ | |
+ /* emission */ | |
+ if (sd->flag & SD_EMISSION) { | |
+ float3 emission = indirect_primitive_emission( | |
+ kg, sd, sd->ray_length, state->flag, state->ray_pdf); | |
+ path_radiance_accum_emission(kg, L, state, throughput, emission); | |
+ } | |
+#endif /* __EMISSION__ */ | |
+ | |
+ return true; | |
+} | |
+ | |
+#ifdef __KERNEL_OPTIX__ | |
+ccl_device_inline /* inline trace calls */ | |
+#else | |
+ccl_device_noinline | |
+#endif | |
+ void | |
+ kernel_path_ao(KernelGlobals *kg, | |
+ ShaderData *sd, | |
+ ShaderData *emission_sd, | |
+ PathRadiance *L, | |
+ ccl_addr_space PathState *state, | |
+ float3 throughput, | |
+ float3 ao_alpha) | |
+{ | |
+ PROFILING_INIT(kg, PROFILING_AO); | |
+ | |
+ /* todo: solve correlation */ | |
+ float bsdf_u, bsdf_v; | |
+ | |
+ path_state_rng_2D(kg, state, PRNG_BSDF_U, &bsdf_u, &bsdf_v); | |
+ | |
+ float ao_factor = kernel_data.background.ao_factor; | |
+ float3 ao_N; | |
+ float3 ao_bsdf = shader_bsdf_ao(kg, sd, ao_factor, &ao_N); | |
+ float3 ao_D; | |
+ float ao_pdf; | |
+ | |
+ sample_cos_hemisphere(ao_N, bsdf_u, bsdf_v, &ao_D, &ao_pdf); | |
+ | |
+ if (dot(sd->Ng, ao_D) > 0.0f && ao_pdf != 0.0f) { | |
+ Ray light_ray; | |
+ float3 ao_shadow; | |
+ | |
+ light_ray.P = ray_offset(sd->P, sd->Ng); | |
+ light_ray.D = ao_D; | |
+ light_ray.t = kernel_data.background.ao_distance; | |
+ light_ray.time = sd->time; | |
+ light_ray.dP = sd->dP; | |
+ light_ray.dD = differential3_zero(); | |
+ | |
+ if (!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &ao_shadow)) { | |
+ path_radiance_accum_ao(kg, L, state, throughput, ao_alpha, ao_bsdf, ao_shadow); | |
+ } | |
+ else { | |
+ path_radiance_accum_total_ao(L, state, throughput, ao_bsdf); | |
+ } | |
+ } | |
+} | |
+ | |
+#ifndef __SPLIT_KERNEL__ | |
+ | |
+# if defined(__BRANCHED_PATH__) || defined(__BAKING__) | |
+ | |
+ccl_device void kernel_path_indirect(KernelGlobals *kg, | |
+ ShaderData *sd, | |
+ ShaderData *emission_sd, | |
+ Ray *ray, | |
+ float3 throughput, | |
+ PathState *state, | |
+ PathRadiance *L) | |
+{ | |
+# ifdef __SUBSURFACE__ | |
+ SubsurfaceIndirectRays ss_indirect; | |
+ kernel_path_subsurface_init_indirect(&ss_indirect); | |
+ | |
+ for (;;) { | |
+# endif /* __SUBSURFACE__ */ | |
+ | |
+ /* path iteration */ | |
+ for (;;) { | |
+ /* Find intersection with objects in scene. */ | |
+ Intersection isect; | |
+ bool hit = kernel_path_scene_intersect(kg, state, ray, &isect, L); | |
+ | |
+ /* Find intersection with lamps and compute emission for MIS. */ | |
+ kernel_path_lamp_emission(kg, state, ray, throughput, &isect, sd, L); | |
+ | |
+# ifdef __VOLUME__ | |
+ /* Volume integration. */ | |
+ VolumeIntegrateResult result = kernel_path_volume( | |
+ kg, sd, state, ray, &throughput, &isect, hit, emission_sd, L); | |
+ | |
+ if (result == VOLUME_PATH_SCATTERED) { | |
+ continue; | |
+ } | |
+ else if (result == VOLUME_PATH_MISSED) { | |
+ break; | |
+ } | |
+# endif /* __VOLUME__*/ | |
+ | |
+ /* Shade background. */ | |
+ if (!hit) { | |
+ kernel_path_background(kg, state, ray, throughput, sd, NULL, L); | |
+ break; | |
+ } | |
+ else if (path_state_ao_bounce(kg, state)) { | |
+ break; | |
+ } | |
+ | |
+ /* Setup shader data. */ | |
+ shader_setup_from_ray(kg, sd, &isect, ray); | |
+ | |
+ /* Skip most work for volume bounding surface. */ | |
+# ifdef __VOLUME__ | |
+ if (!(sd->flag & SD_HAS_ONLY_VOLUME)) { | |
+# endif | |
+ | |
+ /* Evaluate shader. */ | |
+ shader_eval_surface(kg, sd, state, NULL, state->flag); | |
+ shader_prepare_closures(sd, state); | |
+ | |
+ /* Apply shadow catcher, holdout, emission. */ | |
+ if (!kernel_path_shader_apply(kg, sd, state, ray, throughput, emission_sd, L, NULL)) { | |
+ break; | |
+ } | |
+ | |
+ /* path termination. this is a strange place to put the termination, it's | |
+ * mainly due to the mixed in MIS that we use. gives too many unneeded | |
+ * shader evaluations, only need emission if we are going to terminate */ | |
+ float probability = path_state_continuation_probability(kg, state, throughput); | |
+ | |
+ if (probability == 0.0f) { | |
+ break; | |
+ } | |
+ else if (probability != 1.0f) { | |
+ float terminate = path_state_rng_1D(kg, state, PRNG_TERMINATE); | |
+ | |
+ if (terminate >= probability) | |
+ break; | |
+ | |
+ throughput /= probability; | |
+ } | |
+ | |
+# ifdef __DENOISING_FEATURES__ | |
+ kernel_update_denoising_features(kg, sd, state, L); | |
+# endif | |
+ | |
+# ifdef __AO__ | |
+ /* ambient occlusion */ | |
+ if (kernel_data.integrator.use_ambient_occlusion) { | |
+ kernel_path_ao(kg, sd, emission_sd, L, state, throughput, make_float3(0.0f, 0.0f, 0.0f)); | |
+ } | |
+# endif /* __AO__ */ | |
+ | |
+# ifdef __SUBSURFACE__ | |
+ /* bssrdf scatter to a different location on the same object, replacing | |
+ * the closures with a diffuse BSDF */ | |
+ if (sd->flag & SD_BSSRDF) { | |
+ if (kernel_path_subsurface_scatter( | |
+ kg, sd, emission_sd, L, state, ray, &throughput, &ss_indirect)) { | |
+ break; | |
+ } | |
+ } | |
+# endif /* __SUBSURFACE__ */ | |
+ | |
+# if defined(__EMISSION__) | |
+ int all = (kernel_data.integrator.sample_all_lights_indirect) || | |
+ (state->flag & PATH_RAY_SHADOW_CATCHER); | |
+ kernel_branched_path_surface_connect_light( | |
+ kg, sd, emission_sd, state, throughput, 1.0f, L, all); | |
+# endif /* defined(__EMISSION__) */ | |
+ | |
+# ifdef __VOLUME__ | |
+ } | |
+# endif | |
+ | |
+ if (!kernel_path_surface_bounce(kg, sd, &throughput, state, &L->state, ray)) | |
+ break; | |
+ } | |
+ | |
+# ifdef __SUBSURFACE__ | |
+ /* Trace indirect subsurface rays by restarting the loop. this uses less | |
+ * stack memory than invoking kernel_path_indirect. | |
+ */ | |
+ if (ss_indirect.num_rays) { | |
+ kernel_path_subsurface_setup_indirect(kg, &ss_indirect, state, ray, L, &throughput); | |
+ } | |
+ else { | |
+ break; | |
+ } | |
+ } | |
+# endif /* __SUBSURFACE__ */ | |
+} | |
+ | |
+# endif /* defined(__BRANCHED_PATH__) || defined(__BAKING__) */ | |
+ | |
+ccl_device_forceinline void kernel_path_integrate(KernelGlobals *kg, | |
+ PathState *state, | |
+ float3 throughput, | |
+ Ray *ray, | |
+ PathRadiance *L, | |
+ ccl_global float *buffer, | |
+ ShaderData *emission_sd) | |
+{ | |
+ PROFILING_INIT(kg, PROFILING_PATH_INTEGRATE); | |
+ | |
+ /* Shader data memory used for both volumes and surfaces, saves stack space. */ | |
+ ShaderData sd; | |
+ | |
+# ifdef __SUBSURFACE__ | |
+ SubsurfaceIndirectRays ss_indirect; | |
+ kernel_path_subsurface_init_indirect(&ss_indirect); | |
+ | |
+ for (;;) { | |
+# endif /* __SUBSURFACE__ */ | |
+ | |
+ /* path iteration */ | |
+ for (;;) { | |
+ /* Find intersection with objects in scene. */ | |
+ Intersection isect; | |
+ bool hit = kernel_path_scene_intersect(kg, state, ray, &isect, L); | |
+ | |
+ /* Find intersection with lamps and compute emission for MIS. */ | |
+ kernel_path_lamp_emission(kg, state, ray, throughput, &isect, &sd, L); | |
+ | |
+# ifdef __VOLUME__ | |
+ /* Volume integration. */ | |
+ VolumeIntegrateResult result = kernel_path_volume( | |
+ kg, &sd, state, ray, &throughput, &isect, hit, emission_sd, L); | |
+ | |
+ if (result == VOLUME_PATH_SCATTERED) { | |
+ continue; | |
+ } | |
+ else if (result == VOLUME_PATH_MISSED) { | |
+ break; | |
+ } | |
+# endif /* __VOLUME__*/ | |
+ | |
+ /* Shade background. */ | |
+ if (!hit) { | |
+ kernel_path_background(kg, state, ray, throughput, &sd, buffer, L); | |
+ break; | |
+ } | |
+ else if (path_state_ao_bounce(kg, state)) { | |
+ break; | |
+ } | |
+ | |
+ /* Setup shader data. */ | |
+ shader_setup_from_ray(kg, &sd, &isect, ray); | |
+ | |
+ /* Skip most work for volume bounding surface. */ | |
+# ifdef __VOLUME__ | |
+ if (!(sd.flag & SD_HAS_ONLY_VOLUME)) { | |
+# endif | |
+ | |
+ /* Evaluate shader. */ | |
+ shader_eval_surface(kg, &sd, state, buffer, state->flag); | |
+ shader_prepare_closures(&sd, state); | |
+ | |
+ /* Apply shadow catcher, holdout, emission. */ | |
+ if (!kernel_path_shader_apply(kg, &sd, state, ray, throughput, emission_sd, L, buffer)) { | |
+ break; | |
+ } | |
+ | |
+ /* path termination. this is a strange place to put the termination, it's | |
+ * mainly due to the mixed in MIS that we use. gives too many unneeded | |
+ * shader evaluations, only need emission if we are going to terminate */ | |
+ float probability = path_state_continuation_probability(kg, state, throughput); | |
+ | |
+ if (probability == 0.0f) { | |
+ break; | |
+ } | |
+ else if (probability != 1.0f) { | |
+ float terminate = path_state_rng_1D(kg, state, PRNG_TERMINATE); | |
+ if (terminate >= probability) | |
+ break; | |
+ | |
+ throughput /= probability; | |
+ } | |
+ | |
+# ifdef __DENOISING_FEATURES__ | |
+ kernel_update_denoising_features(kg, &sd, state, L); | |
+# endif | |
+ | |
+# ifdef __AO__ | |
+ /* ambient occlusion */ | |
+ if (kernel_data.integrator.use_ambient_occlusion) { | |
+ kernel_path_ao(kg, &sd, emission_sd, L, state, throughput, shader_bsdf_alpha(kg, &sd)); | |
+ } | |
+# endif /* __AO__ */ | |
+ | |
+# ifdef __SUBSURFACE__ | |
+ /* bssrdf scatter to a different location on the same object, replacing | |
+ * the closures with a diffuse BSDF */ | |
+ if (sd.flag & SD_BSSRDF) { | |
+ if (kernel_path_subsurface_scatter( | |
+ kg, &sd, emission_sd, L, state, ray, &throughput, &ss_indirect)) { | |
+ break; | |
+ } | |
+ } | |
+# endif /* __SUBSURFACE__ */ | |
+ | |
+# ifdef __EMISSION__ | |
+ /* direct lighting */ | |
+ kernel_path_surface_connect_light(kg, &sd, emission_sd, throughput, state, L); | |
+# endif /* __EMISSION__ */ | |
+ | |
+# ifdef __VOLUME__ | |
+ } | |
+# endif | |
+ | |
+ /* compute direct lighting and next bounce */ | |
+ if (!kernel_path_surface_bounce(kg, &sd, &throughput, state, &L->state, ray)) | |
+ break; | |
+ } | |
+ | |
+# ifdef __SUBSURFACE__ | |
+ /* Trace indirect subsurface rays by restarting the loop. this uses less | |
+ * stack memory than invoking kernel_path_indirect. | |
+ */ | |
+ if (ss_indirect.num_rays) { | |
+ kernel_path_subsurface_setup_indirect(kg, &ss_indirect, state, ray, L, &throughput); | |
+ } | |
+ else { | |
+ break; | |
+ } | |
+ } | |
+# endif /* __SUBSURFACE__ */ | |
+} | |
+ | |
+ccl_device void kernel_path_trace( | |
+ KernelGlobals *kg, ccl_global float *buffer, int sample, int x, int y, int offset, int stride) | |
+{ | |
+ PROFILING_INIT(kg, PROFILING_RAY_SETUP); | |
+ | |
+ /* buffer offset */ | |
+ int index = offset + x + y * stride; | |
+ int pass_stride = kernel_data.film.pass_stride; | |
+ | |
+ buffer += index * pass_stride; | |
+ | |
+ /* Initialize random numbers and sample ray. */ | |
+ uint rng_hash; | |
+ Ray ray; | |
+ | |
+ kernel_path_trace_setup(kg, sample, x, y, &rng_hash, &ray); | |
+ | |
+# ifndef __KERNEL_OPTIX__ | |
+ if (ray.t == 0.0f) { | |
+ return; | |
+ } | |
+# endif | |
+ | |
+ /* Initialize state. */ | |
+ float3 throughput = make_float3(1.0f, 1.0f, 1.0f); | |
+ | |
+ PathRadiance L; | |
+ path_radiance_init(kg, &L); | |
+ | |
+ ShaderDataTinyStorage emission_sd_storage; | |
+ ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage); | |
+ | |
+ PathState state; | |
+ path_state_init(kg, emission_sd, &state, rng_hash, sample, &ray); | |
+ | |
+# ifdef __KERNEL_OPTIX__ | |
+ /* Force struct into local memory to avoid costly spilling on trace calls. */ | |
+ if (pass_stride < 0) /* This is never executed and just prevents the compiler from doing SROA. */ | |
+ for (int i = 0; i < sizeof(L); ++i) | |
+ reinterpret_cast<unsigned char *>(&L)[-pass_stride + i] = 0; | |
+# endif | |
+ | |
+ /* Integrate. */ | |
+ kernel_path_integrate(kg, &state, throughput, &ray, &L, buffer, emission_sd); | |
+ | |
+ kernel_write_result(kg, buffer, sample, &L); | |
+} | |
+ | |
+#endif /* __SPLIT_KERNEL__ */ | |
+ | |
+CCL_NAMESPACE_END | |
diff -Naur a/intern/cycles/kernel/kernel_random.h b/intern/cycles/kernel/kernel_random.h | |
--- a/intern/cycles/kernel/kernel_random.h 2020-01-10 20:37:06.000000000 +0300 | |
+++ b/intern/cycles/kernel/kernel_random.h 2020-01-10 20:55:57.757604393 +0300 | |
@@ -43,20 +43,34 @@ | |
uint i = index + SOBOL_SKIP; | |
for (int j = 0, x; (x = find_first_set(i)); i >>= x) { | |
j += x; | |
- result ^= kernel_tex_fetch(__sobol_directions, 32 * dimension + j - 1); | |
+ result ^= kernel_tex_fetch(__sample_pattern_lut, 32 * dimension + j); | |
} | |
return result; | |
} | |
#endif /* __SOBOL__ */ | |
+#define NUM_PJ_SAMPLES 64 * 64 | |
+#define NUM_PJ_PATTERNS 48 | |
+ | |
ccl_device_forceinline float path_rng_1D( | |
KernelGlobals *kg, uint rng_hash, int sample, int num_samples, int dimension) | |
{ | |
#ifdef __DEBUG_CORRELATION__ | |
return (float)drand48(); | |
#endif | |
- | |
+ if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_PMJ) { | |
+ /* Fallback to random */ | |
+ if (sample > NUM_PJ_SAMPLES) { | |
+ int p = rng_hash + dimension; | |
+ return cmj_randfloat(sample, p); | |
+ } | |
+ uint tmp_rng = cmj_hash_simple(dimension, rng_hash); | |
+ int index = ((dimension % NUM_PJ_PATTERNS) * NUM_PJ_SAMPLES + sample) * 2; | |
+ return __uint_as_float(kernel_tex_fetch(__sample_pattern_lut, index) ^ | |
+ (tmp_rng & 0x007fffff)) - | |
+ 1.0f; | |
+ } | |
#ifdef __CMJ__ | |
# ifdef __SOBOL__ | |
if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_CMJ) | |
@@ -99,7 +113,22 @@ | |
*fy = (float)drand48(); | |
return; | |
#endif | |
- | |
+ if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_PMJ) { | |
+ if (sample > NUM_PJ_SAMPLES) { | |
+ int p = rng_hash + dimension; | |
+ *fx = cmj_randfloat(sample, p); | |
+ *fy = cmj_randfloat(sample, p + 1); | |
+ } | |
+ uint tmp_rng = cmj_hash_simple(dimension, rng_hash); | |
+ int index = ((dimension % NUM_PJ_PATTERNS) * NUM_PJ_SAMPLES + sample) * 2; | |
+ *fx = __uint_as_float(kernel_tex_fetch(__sample_pattern_lut, index) ^ (tmp_rng & 0x007fffff)) - | |
+ 1.0f; | |
+ tmp_rng = cmj_hash_simple(dimension + 1, rng_hash); | |
+ *fy = __uint_as_float(kernel_tex_fetch(__sample_pattern_lut, index + 1) ^ | |
+ (tmp_rng & 0x007fffff)) - | |
+ 1.0f; | |
+ return; | |
+ } | |
#ifdef __CMJ__ | |
# ifdef __SOBOL__ | |
if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_CMJ) | |
@@ -284,4 +313,23 @@ | |
return (float)*rng * (1.0f / (float)0xFFFFFFFF); | |
} | |
+ccl_device_inline bool sample_is_even(int pattern, int sample) | |
+{ | |
+ if (pattern == SAMPLING_PATTERN_PMJ) { | |
+ /* See Section 10.2.1, "Progressive Multi-Jittered Sample Sequences", Christensen et al. | |
+ * We can use this to get divide sample sequence into two classes for easier variance estimation. | |
+ * There must be a more elegant way of writing this? */ | |
+ return (bool)(sample & 2) ^ (bool)(sample & 8) ^ (bool)(sample & 0x20) ^ | |
+ (bool)(sample & 0x80) ^ (bool)(sample & 0x200) ^ (bool)(sample & 0x800) ^ | |
+ (bool)(sample & 0x2000) ^ (bool)(sample & 0x8000) ^ (bool)(sample & 0x20000) ^ | |
+ (bool)(sample & 0x80000) ^ (bool)(sample & 0x200000) ^ (bool)(sample & 0x800000) ^ | |
+ (bool)(sample & 0x2000000) ^ (bool)(sample & 0x8000000) ^ (bool)(sample & 0x20000000) ^ | |
+ (bool)(sample & 0x80000000); | |
+ } | |
+ else { | |
+ /* TODO: Are there reliable ways of dividing CMJ and Sobol into two classes? */ | |
+ return sample & 0x1; | |
+ } | |
+} | |
+ | |
CCL_NAMESPACE_END | |
diff -Naur a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h | |
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h 2020-01-10 20:37:06.000000000 +0300 | |
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h 2020-01-10 20:42:43.467590055 +0300 | |
@@ -89,5 +89,9 @@ | |
DECLARE_SPLIT_KERNEL_FUNCTION(next_iteration_setup) | |
DECLARE_SPLIT_KERNEL_FUNCTION(indirect_subsurface) | |
DECLARE_SPLIT_KERNEL_FUNCTION(buffer_update) | |
+DECLARE_SPLIT_KERNEL_FUNCTION(adaptive_stopping) | |
+DECLARE_SPLIT_KERNEL_FUNCTION(adaptive_filter_x) | |
+DECLARE_SPLIT_KERNEL_FUNCTION(adaptive_filter_y) | |
+DECLARE_SPLIT_KERNEL_FUNCTION(adaptive_adjust_samples) | |
#undef KERNEL_ARCH | |
diff -Naur a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h | |
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h 2020-01-10 20:37:06.000000000 +0300 | |
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h 2020-01-10 20:42:43.467590055 +0300 | |
@@ -58,6 +58,10 @@ | |
# include "kernel/split/kernel_next_iteration_setup.h" | |
# include "kernel/split/kernel_indirect_subsurface.h" | |
# include "kernel/split/kernel_buffer_update.h" | |
+# include "kernel/split/kernel_adaptive_stopping.h" | |
+# include "kernel/split/kernel_adaptive_filter_x.h" | |
+# include "kernel/split/kernel_adaptive_filter_y.h" | |
+# include "kernel/split/kernel_adaptive_adjust_samples.h" | |
# endif /* __SPLIT_KERNEL__ */ | |
#else | |
# define STUB_ASSERT(arch, name) \ | |
@@ -204,6 +208,10 @@ | |
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(next_iteration_setup, uint) | |
DEFINE_SPLIT_KERNEL_FUNCTION(indirect_subsurface) | |
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(buffer_update, uint) | |
+DEFINE_SPLIT_KERNEL_FUNCTION(adaptive_stopping) | |
+DEFINE_SPLIT_KERNEL_FUNCTION(adaptive_filter_x) | |
+DEFINE_SPLIT_KERNEL_FUNCTION(adaptive_filter_y) | |
+DEFINE_SPLIT_KERNEL_FUNCTION(adaptive_adjust_samples) | |
#endif /* __SPLIT_KERNEL__ */ | |
#undef KERNEL_STUB | |
diff -Naur a/intern/cycles/kernel/kernels/cuda/kernel.cu b/intern/cycles/kernel/kernels/cuda/kernel.cu | |
--- a/intern/cycles/kernel/kernels/cuda/kernel.cu 2020-01-10 20:37:06.000000000 +0300 | |
+++ b/intern/cycles/kernel/kernels/cuda/kernel.cu 2020-01-10 20:42:43.467590055 +0300 | |
@@ -33,6 +33,7 @@ | |
#include "kernel/kernel_path_branched.h" | |
#include "kernel/kernel_bake.h" | |
#include "kernel/kernel_work_stealing.h" | |
+#include "kernel/kernel_adaptive_sampling.h" | |
/* kernels */ | |
extern "C" __global__ void | |
@@ -83,6 +84,75 @@ | |
extern "C" __global__ void | |
CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) | |
+kernel_cuda_adaptive_stopping(WorkTile *tile, int sample, uint total_work_size) | |
+{ | |
+ int work_index = ccl_global_id(0); | |
+ bool thread_is_active = work_index < total_work_size; | |
+ KernelGlobals kg; | |
+ if(thread_is_active && kernel_data.film.pass_adaptive_aux_buffer) { | |
+ uint x = tile->x + work_index % tile->w; | |
+ uint y = tile->y + work_index / tile->w; | |
+ int index = tile->offset + x + y * tile->stride; | |
+ ccl_global float *buffer = tile->buffer + index * kernel_data.film.pass_stride; | |
+ kernel_do_adaptive_stopping(&kg, buffer, sample); | |
+ } | |
+} | |
+ | |
+extern "C" __global__ void | |
+CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) | |
+kernel_cuda_adaptive_filter_x(WorkTile *tile, int sample, uint) | |
+{ | |
+ KernelGlobals kg; | |
+ if(kernel_data.film.pass_adaptive_aux_buffer && sample > kernel_data.integrator.adaptive_min_samples) { | |
+ if(ccl_global_id(0) < tile->h) { | |
+ int y = tile->y + ccl_global_id(0); | |
+ kernel_do_adaptive_filter_x(&kg, y, tile); | |
+ } | |
+ } | |
+} | |
+ | |
+extern "C" __global__ void | |
+CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) | |
+kernel_cuda_adaptive_filter_y(WorkTile *tile, int sample, uint) | |
+{ | |
+ KernelGlobals kg; | |
+ if(kernel_data.film.pass_adaptive_aux_buffer && sample >= kernel_data.integrator.adaptive_min_samples - 1) { | |
+ if(ccl_global_id(0) < tile->w) { | |
+ int x = tile->x + ccl_global_id(0); | |
+ kernel_do_adaptive_filter_y(&kg, x, tile); | |
+ } | |
+ } | |
+} | |
+ | |
+extern "C" __global__ void | |
+CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) | |
+kernel_cuda_adaptive_scale_samples(WorkTile *tile, int start_sample, int sample, uint total_work_size) | |
+{ | |
+ if(kernel_data.film.pass_adaptive_aux_buffer) { | |
+ int work_index = ccl_global_id(0); | |
+ bool thread_is_active = work_index < total_work_size; | |
+ KernelGlobals kg; | |
+ if(thread_is_active) { | |
+ uint x = tile->x + work_index % tile->w; | |
+ uint y = tile->y + work_index / tile->w; | |
+ int index = tile->offset + x + y * tile->stride; | |
+ ccl_global float *buffer = tile->buffer + index * kernel_data.film.pass_stride; | |
+ if(buffer[kernel_data.film.pass_sample_count] < 0.0f) { | |
+ buffer[kernel_data.film.pass_sample_count] = -buffer[kernel_data.film.pass_sample_count]; | |
+ float sample_multiplier = sample / max((float)start_sample + 1.0f, buffer[kernel_data.film.pass_sample_count]); | |
+ if(sample_multiplier != 1.0f) { | |
+ kernel_adaptive_post_adjust(&kg, buffer, sample_multiplier); | |
+ } | |
+ } | |
+ else { | |
+ kernel_adaptive_post_adjust(&kg, buffer, sample / (sample - 1.0f)); | |
+ } | |
+ } | |
+ } | |
+} | |
+ | |
+extern "C" __global__ void | |
+CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) | |
kernel_cuda_convert_to_byte(uchar4 *rgba, float *buffer, float sample_scale, int sx, int sy, int sw, int sh, int offset, int stride) | |
{ | |
int x = sx + blockDim.x*blockIdx.x + threadIdx.x; | |
diff -Naur a/intern/cycles/kernel/kernels/cuda/kernel_split.cu b/intern/cycles/kernel/kernels/cuda/kernel_split.cu | |
--- a/intern/cycles/kernel/kernels/cuda/kernel_split.cu 2020-01-10 20:37:06.000000000 +0300 | |
+++ b/intern/cycles/kernel/kernels/cuda/kernel_split.cu 2020-01-10 20:42:43.467590055 +0300 | |
@@ -43,6 +43,10 @@ | |
#include "kernel/split/kernel_next_iteration_setup.h" | |
#include "kernel/split/kernel_indirect_subsurface.h" | |
#include "kernel/split/kernel_buffer_update.h" | |
+#include "kernel/split/kernel_adaptive_stopping.h" | |
+#include "kernel/split/kernel_adaptive_filter_x.h" | |
+#include "kernel/split/kernel_adaptive_filter_y.h" | |
+#include "kernel/split/kernel_adaptive_adjust_samples.h" | |
#include "kernel/kernel_film.h" | |
@@ -121,6 +125,10 @@ | |
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(next_iteration_setup, uint) | |
DEFINE_SPLIT_KERNEL_FUNCTION(indirect_subsurface) | |
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(buffer_update, uint) | |
+DEFINE_SPLIT_KERNEL_FUNCTION(adaptive_stopping) | |
+DEFINE_SPLIT_KERNEL_FUNCTION(adaptive_filter_x) | |
+DEFINE_SPLIT_KERNEL_FUNCTION(adaptive_filter_y) | |
+DEFINE_SPLIT_KERNEL_FUNCTION(adaptive_adjust_samples) | |
extern "C" __global__ void | |
CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) | |
diff -Naur a/intern/cycles/kernel/kernels/opencl/kernel_adaptive_adjust_samples.cl b/intern/cycles/kernel/kernels/opencl/kernel_adaptive_adjust_samples.cl | |
--- a/intern/cycles/kernel/kernels/opencl/kernel_adaptive_adjust_samples.cl 1970-01-01 03:00:00.000000000 +0300 | |
+++ b/intern/cycles/kernel/kernels/opencl/kernel_adaptive_adjust_samples.cl 2020-01-10 20:42:43.467590055 +0300 | |
@@ -0,0 +1,23 @@ | |
+/* | |
+ * Copyright 2019 Blender Foundation | |
+ * | |
+ * Licensed under the Apache License, Version 2.0 (the "License"); | |
+ * you may not use this file except in compliance with the License. | |
+ * You may obtain a copy of the License at | |
+ * | |
+ * http://www.apache.org/licenses/LICENSE-2.0 | |
+ * | |
+ * Unless required by applicable law or agreed to in writing, software | |
+ * distributed under the License is distributed on an "AS IS" BASIS, | |
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
+ * See the License for the specific language governing permissions and | |
+ * limitations under the License. | |
+ */ | |
+ | |
+#include "kernel/kernel_compat_opencl.h" | |
+#include "kernel/split/kernel_split_common.h" | |
+#include "kernel/split/kernel_adaptive_adjust_samples.h" | |
+ | |
+#define KERNEL_NAME adaptive_adjust_samples | |
+#include "kernel/kernels/opencl/kernel_split_function.h" | |
+#undef KERNEL_NAME | |
diff -Naur a/intern/cycles/kernel/kernels/opencl/kernel_adaptive_filter_x.cl b/intern/cycles/kernel/kernels/opencl/kernel_adaptive_filter_x.cl | |
--- a/intern/cycles/kernel/kernels/opencl/kernel_adaptive_filter_x.cl 1970-01-01 03:00:00.000000000 +0300 | |
+++ b/intern/cycles/kernel/kernels/opencl/kernel_adaptive_filter_x.cl 2020-01-10 20:42:43.467590055 +0300 | |
@@ -0,0 +1,23 @@ | |
+/* | |
+ * Copyright 2019 Blender Foundation | |
+ * | |
+ * Licensed under the Apache License, Version 2.0 (the "License"); | |
+ * you may not use this file except in compliance with the License. | |
+ * You may obtain a copy of the License at | |
+ * | |
+ * http://www.apache.org/licenses/LICENSE-2.0 | |
+ * | |
+ * Unless required by applicable law or agreed to in writing, software | |
+ * distributed under the License is distributed on an "AS IS" BASIS, | |
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
+ * See the License for the specific language governing permissions and | |
+ * limitations under the License. | |
+ */ | |
+ | |
+#include "kernel/kernel_compat_opencl.h" | |
+#include "kernel/split/kernel_split_common.h" | |
+#include "kernel/split/kernel_adaptive_filter_x.h" | |
+ | |
+#define KERNEL_NAME adaptive_filter_x | |
+#include "kernel/kernels/opencl/kernel_split_function.h" | |
+#undef KERNEL_NAME | |
diff -Naur a/intern/cycles/kernel/kernels/opencl/kernel_adaptive_filter_y.cl b/intern/cycles/kernel/kernels/opencl/kernel_adaptive_filter_y.cl | |
--- a/intern/cycles/kernel/kernels/opencl/kernel_adaptive_filter_y.cl 1970-01-01 03:00:00.000000000 +0300 | |
+++ b/intern/cycles/kernel/kernels/opencl/kernel_adaptive_filter_y.cl 2020-01-10 20:42:43.467590055 +0300 | |
@@ -0,0 +1,23 @@ | |
+/* | |
+ * Copyright 2019 Blender Foundation | |
+ * | |
+ * Licensed under the Apache License, Version 2.0 (the "License"); | |
+ * you may not use this file except in compliance with the License. | |
+ * You may obtain a copy of the License at | |
+ * | |
+ * http://www.apache.org/licenses/LICENSE-2.0 | |
+ * | |
+ * Unless required by applicable law or agreed to in writing, software | |
+ * distributed under the License is distributed on an "AS IS" BASIS, | |
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
+ * See the License for the specific language governing permissions and | |
+ * limitations under the License. | |
+ */ | |
+ | |
+#include "kernel/kernel_compat_opencl.h" | |
+#include "kernel/split/kernel_split_common.h" | |
+#include "kernel/split/kernel_adaptive_filter_y.h" | |
+ | |
+#define KERNEL_NAME adaptive_filter_y | |
+#include "kernel/kernels/opencl/kernel_split_function.h" | |
+#undef KERNEL_NAME | |
diff -Naur a/intern/cycles/kernel/kernels/opencl/kernel_adaptive_stopping.cl b/intern/cycles/kernel/kernels/opencl/kernel_adaptive_stopping.cl | |
--- a/intern/cycles/kernel/kernels/opencl/kernel_adaptive_stopping.cl 1970-01-01 03:00:00.000000000 +0300 | |
+++ b/intern/cycles/kernel/kernels/opencl/kernel_adaptive_stopping.cl 2020-01-10 20:42:43.467590055 +0300 | |
@@ -0,0 +1,23 @@ | |
+/* | |
+ * Copyright 2019 Blender Foundation | |
+ * | |
+ * Licensed under the Apache License, Version 2.0 (the "License"); | |
+ * you may not use this file except in compliance with the License. | |
+ * You may obtain a copy of the License at | |
+ * | |
+ * http://www.apache.org/licenses/LICENSE-2.0 | |
+ * | |
+ * Unless required by applicable law or agreed to in writing, software | |
+ * distributed under the License is distributed on an "AS IS" BASIS, | |
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
+ * See the License for the specific language governing permissions and | |
+ * limitations under the License. | |
+ */ | |
+ | |
+#include "kernel/kernel_compat_opencl.h" | |
+#include "kernel/split/kernel_split_common.h" | |
+#include "kernel/split/kernel_adaptive_stopping.h" | |
+ | |
+#define KERNEL_NAME adaptive_stopping | |
+#include "kernel/kernels/opencl/kernel_split_function.h" | |
+#undef KERNEL_NAME | |
diff -Naur a/intern/cycles/kernel/kernels/opencl/kernel_split_bundle.cl b/intern/cycles/kernel/kernels/opencl/kernel_split_bundle.cl | |
--- a/intern/cycles/kernel/kernels/opencl/kernel_split_bundle.cl 2020-01-10 20:37:06.000000000 +0300 | |
+++ b/intern/cycles/kernel/kernels/opencl/kernel_split_bundle.cl 2020-01-10 20:42:43.467590055 +0300 | |
@@ -28,3 +28,7 @@ | |
#include "kernel/kernels/opencl/kernel_next_iteration_setup.cl" | |
#include "kernel/kernels/opencl/kernel_indirect_subsurface.cl" | |
#include "kernel/kernels/opencl/kernel_buffer_update.cl" | |
+#include "kernel/kernels/opencl/kernel_adaptive_stopping.cl" | |
+#include "kernel/kernels/opencl/kernel_adaptive_filter_x.cl" | |
+#include "kernel/kernels/opencl/kernel_adaptive_filter_y.cl" | |
+#include "kernel/kernels/opencl/kernel_adaptive_adjust_samples.cl" | |
diff -Naur a/intern/cycles/kernel/kernel_textures.h b/intern/cycles/kernel/kernel_textures.h | |
--- a/intern/cycles/kernel/kernel_textures.h 2020-01-10 20:37:06.000000000 +0300 | |
+++ b/intern/cycles/kernel/kernel_textures.h 2020-01-10 20:42:43.464256721 +0300 | |
@@ -77,7 +77,7 @@ | |
KERNEL_TEX(float, __lookup_table) | |
/* sobol */ | |
-KERNEL_TEX(uint, __sobol_directions) | |
+KERNEL_TEX(uint, __sample_pattern_lut) | |
/* image textures */ | |
KERNEL_TEX(TextureInfo, __texture_info) | |
diff -Naur a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h | |
--- a/intern/cycles/kernel/kernel_types.h 2020-01-10 20:37:06.000000000 +0300 | |
+++ b/intern/cycles/kernel/kernel_types.h 2020-01-10 20:49:57.107597884 +0300 | |
@@ -269,6 +269,7 @@ | |
enum SamplingPattern { | |
SAMPLING_PATTERN_SOBOL = 0, | |
SAMPLING_PATTERN_CMJ = 1, | |
+ SAMPLING_PATTERN_PMJ = 2, | |
SAMPLING_NUM_PATTERNS, | |
}; | |
@@ -373,6 +374,8 @@ | |
#endif | |
PASS_RENDER_TIME, | |
PASS_CRYPTOMATTE, | |
+ PASS_ADAPTIVE_AUX_BUFFER, | |
+ PASS_SAMPLE_COUNT, | |
PASS_AOV_COLOR, | |
PASS_AOV_VALUE, | |
PASS_CATEGORY_MAIN_END = 31, | |
@@ -1239,6 +1242,9 @@ | |
int cryptomatte_depth; | |
int pass_cryptomatte; | |
+ int pass_adaptive_aux_buffer; | |
+ int pass_sample_count; | |
+ | |
int pass_mist; | |
float mist_start; | |
float mist_inv_depth; | |
@@ -1273,6 +1279,7 @@ | |
int display_divide_pass_stride; | |
int use_display_exposure; | |
int use_display_pass_alpha; | |
+ int pad1, pad2; | |
} KernelFilm; | |
static_assert_align(KernelFilm, 16); | |
@@ -1354,6 +1361,8 @@ | |
/* sampler */ | |
int sampling_pattern; | |
int aa_samples; | |
+ int adaptive_min_samples; | |
+ float adaptive_threshold; | |
/* volume render */ | |
int use_volumes; | |
@@ -1429,7 +1438,7 @@ | |
typedef struct KernelTables { | |
int beckmann_offset; | |
- int pad1, pad2, pad3; | |
+ int pad1; | |
} KernelTables; | |
static_assert_align(KernelTables, 16); | |
@@ -1679,7 +1688,7 @@ | |
uint start_sample; | |
uint num_samples; | |
- uint offset; | |
+ int offset; | |
uint stride; | |
ccl_global float *buffer; | |
diff -Naur a/intern/cycles/kernel/kernel_work_stealing.h b/intern/cycles/kernel/kernel_work_stealing.h | |
--- a/intern/cycles/kernel/kernel_work_stealing.h 2020-01-10 20:37:06.000000000 +0300 | |
+++ b/intern/cycles/kernel/kernel_work_stealing.h 2020-01-10 20:42:43.467590055 +0300 | |
@@ -23,17 +23,41 @@ | |
* Utility functions for work stealing | |
*/ | |
+/* Map global work index to tile, pixel X/Y and sample. */ | |
+ccl_device_inline void get_work_pixel(ccl_global const WorkTile *tile, | |
+ uint global_work_index, | |
+ ccl_private uint *x, | |
+ ccl_private uint *y, | |
+ ccl_private uint *sample) | |
+{ | |
+#ifdef __KERNEL_CUDA__ | |
+ /* Keeping threads for the same pixel together improves performance on CUDA. */ | |
+ uint sample_offset = global_work_index % tile->num_samples; | |
+ uint pixel_offset = global_work_index / tile->num_samples; | |
+#else /* __KERNEL_CUDA__ */ | |
+ uint tile_pixels = tile->w * tile->h; | |
+ uint sample_offset = global_work_index / tile_pixels; | |
+ uint pixel_offset = global_work_index - sample_offset * tile_pixels; | |
+#endif /* __KERNEL_CUDA__ */ | |
+ uint y_offset = pixel_offset / tile->w; | |
+ uint x_offset = pixel_offset - y_offset * tile->w; | |
+ | |
+ *x = tile->x + x_offset; | |
+ *y = tile->y + y_offset; | |
+ *sample = tile->start_sample + sample_offset; | |
+} | |
+ | |
#ifdef __KERNEL_OPENCL__ | |
# pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable | |
#endif | |
#ifdef __SPLIT_KERNEL__ | |
/* Returns true if there is work */ | |
-ccl_device bool get_next_work(KernelGlobals *kg, | |
- ccl_global uint *work_pools, | |
- uint total_work_size, | |
- uint ray_index, | |
- ccl_private uint *global_work_index) | |
+ccl_device bool get_next_work_item(KernelGlobals *kg, | |
+ ccl_global uint *work_pools, | |
+ uint total_work_size, | |
+ uint ray_index, | |
+ ccl_private uint *global_work_index) | |
{ | |
/* With a small amount of work there may be more threads than work due to | |
* rounding up of global size, stop such threads immediately. */ | |
@@ -56,31 +80,37 @@ | |
/* Test if all work for this pool is done. */ | |
return (*global_work_index < total_work_size); | |
} | |
-#endif | |
-/* Map global work index to tile, pixel X/Y and sample. */ | |
-ccl_device_inline void get_work_pixel(ccl_global const WorkTile *tile, | |
- uint global_work_index, | |
- ccl_private uint *x, | |
- ccl_private uint *y, | |
- ccl_private uint *sample) | |
+ccl_device bool get_next_work(KernelGlobals *kg, | |
+ ccl_global uint *work_pools, | |
+ uint total_work_size, | |
+ uint ray_index, | |
+ ccl_private uint *global_work_index) | |
{ | |
-#ifdef __KERNEL_CUDA__ | |
- /* Keeping threads for the same pixel together improves performance on CUDA. */ | |
- uint sample_offset = global_work_index % tile->num_samples; | |
- uint pixel_offset = global_work_index / tile->num_samples; | |
-#else /* __KERNEL_CUDA__ */ | |
- uint tile_pixels = tile->w * tile->h; | |
- uint sample_offset = global_work_index / tile_pixels; | |
- uint pixel_offset = global_work_index - sample_offset * tile_pixels; | |
-#endif /* __KERNEL_CUDA__ */ | |
- uint y_offset = pixel_offset / tile->w; | |
- uint x_offset = pixel_offset - y_offset * tile->w; | |
- | |
- *x = tile->x + x_offset; | |
- *y = tile->y + y_offset; | |
- *sample = tile->start_sample + sample_offset; | |
+ bool got_work = false; | |
+ if (kernel_data.film.pass_adaptive_aux_buffer) { | |
+ do { | |
+ got_work = get_next_work_item(kg, work_pools, total_work_size, ray_index, global_work_index); | |
+ if (got_work) { | |
+ ccl_global WorkTile *tile = &kernel_split_params.tile; | |
+ uint x, y, sample; | |
+ get_work_pixel(tile, *global_work_index, &x, &y, &sample); | |
+ uint buffer_offset = (tile->offset + x + y * tile->stride) * kernel_data.film.pass_stride; | |
+ ccl_global float *buffer = kernel_split_params.tile.buffer + buffer_offset; | |
+ ccl_global float4 *aux = (ccl_global float4 *)(buffer + | |
+ kernel_data.film.pass_adaptive_aux_buffer); | |
+ if (aux->w == 0.0f) { | |
+ break; | |
+ } | |
+ } | |
+ } while (got_work); | |
+ } | |
+ else { | |
+ got_work = get_next_work_item(kg, work_pools, total_work_size, ray_index, global_work_index); | |
+ } | |
+ return got_work; | |
} | |
+#endif | |
CCL_NAMESPACE_END | |
diff -Naur a/intern/cycles/kernel/split/kernel_adaptive_adjust_samples.h b/intern/cycles/kernel/split/kernel_adaptive_adjust_samples.h | |
--- a/intern/cycles/kernel/split/kernel_adaptive_adjust_samples.h 1970-01-01 03:00:00.000000000 +0300 | |
+++ b/intern/cycles/kernel/split/kernel_adaptive_adjust_samples.h 2020-01-10 20:42:43.467590055 +0300 | |
@@ -0,0 +1,44 @@ | |
+/* | |
+ * Copyright 2019 Blender Foundation | |
+ * | |
+ * Licensed under the Apache License, Version 2.0 (the "License"); | |
+ * you may not use this file except in compliance with the License. | |
+ * You may obtain a copy of the License at | |
+ * | |
+ * http://www.apache.org/licenses/LICENSE-2.0 | |
+ * | |
+ * Unless required by applicable law or agreed to in writing, software | |
+ * distributed under the License is distributed on an "AS IS" BASIS, | |
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
+ * See the License for the specific language governing permissions and | |
+ * limitations under the License. | |
+ */ | |
+ | |
+CCL_NAMESPACE_BEGIN | |
+ | |
+ccl_device void kernel_adaptive_adjust_samples(KernelGlobals *kg) | |
+{ | |
+ int pixel_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); | |
+ if (pixel_index < kernel_split_params.tile.w * kernel_split_params.tile.h) { | |
+ int x = kernel_split_params.tile.x + pixel_index % kernel_split_params.tile.w; | |
+ int y = kernel_split_params.tile.y + pixel_index / kernel_split_params.tile.w; | |
+ int buffer_offset = (kernel_split_params.tile.offset + x + | |
+ y * kernel_split_params.tile.stride) * | |
+ kernel_data.film.pass_stride; | |
+ ccl_global float *buffer = kernel_split_params.tile.buffer + buffer_offset; | |
+ int sample = kernel_split_params.tile.start_sample + kernel_split_params.tile.num_samples; | |
+ if (buffer[kernel_data.film.pass_sample_count] < 0.0f) { | |
+ buffer[kernel_data.film.pass_sample_count] = -buffer[kernel_data.film.pass_sample_count]; | |
+ float sample_multiplier = sample / max((float)kernel_split_params.tile.start_sample + 1.0f, | |
+ buffer[kernel_data.film.pass_sample_count]); | |
+ if (sample_multiplier != 1.0f) { | |
+ kernel_adaptive_post_adjust(kg, buffer, sample_multiplier); | |
+ } | |
+ } | |
+ else { | |
+ kernel_adaptive_post_adjust(kg, buffer, sample / (sample - 1.0f)); | |
+ } | |
+ } | |
+} | |
+ | |
+CCL_NAMESPACE_END | |
diff -Naur a/intern/cycles/kernel/split/kernel_adaptive_filter_x.h b/intern/cycles/kernel/split/kernel_adaptive_filter_x.h | |
--- a/intern/cycles/kernel/split/kernel_adaptive_filter_x.h 1970-01-01 03:00:00.000000000 +0300 | |
+++ b/intern/cycles/kernel/split/kernel_adaptive_filter_x.h 2020-01-10 20:42:43.467590055 +0300 | |
@@ -0,0 +1,30 @@ | |
+/* | |
+ * Copyright 2019 Blender Foundation | |
+ * | |
+ * Licensed under the Apache License, Version 2.0 (the "License"); | |
+ * you may not use this file except in compliance with the License. | |
+ * You may obtain a copy of the License at | |
+ * | |
+ * http://www.apache.org/licenses/LICENSE-2.0 | |
+ * | |
+ * Unless required by applicable law or agreed to in writing, software | |
+ * distributed under the License is distributed on an "AS IS" BASIS, | |
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
+ * See the License for the specific language governing permissions and | |
+ * limitations under the License. | |
+ */ | |
+ | |
+CCL_NAMESPACE_BEGIN | |
+ | |
+ccl_device void kernel_adaptive_filter_x(KernelGlobals *kg) | |
+{ | |
+ int pixel_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); | |
+ if (pixel_index < kernel_split_params.tile.h && | |
+ kernel_split_params.tile.start_sample + kernel_split_params.tile.num_samples >= | |
+ kernel_data.integrator.adaptive_min_samples) { | |
+ int y = kernel_split_params.tile.y + pixel_index; | |
+ kernel_do_adaptive_filter_x(kg, y, &kernel_split_params.tile); | |
+ } | |
+} | |
+ | |
+CCL_NAMESPACE_END | |
diff -Naur a/intern/cycles/kernel/split/kernel_adaptive_filter_y.h b/intern/cycles/kernel/split/kernel_adaptive_filter_y.h | |
--- a/intern/cycles/kernel/split/kernel_adaptive_filter_y.h 1970-01-01 03:00:00.000000000 +0300 | |
+++ b/intern/cycles/kernel/split/kernel_adaptive_filter_y.h 2020-01-10 20:42:43.470923389 +0300 | |
@@ -0,0 +1,29 @@ | |
+/* | |
+ * Copyright 2019 Blender Foundation | |
+ * | |
+ * Licensed under the Apache License, Version 2.0 (the "License"); | |
+ * you may not use this file except in compliance with the License. | |
+ * You may obtain a copy of the License at | |
+ * | |
+ * http://www.apache.org/licenses/LICENSE-2.0 | |
+ * | |
+ * Unless required by applicable law or agreed to in writing, software | |
+ * distributed under the License is distributed on an "AS IS" BASIS, | |
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
+ * See the License for the specific language governing permissions and | |
+ * limitations under the License. | |
+ */ | |
+ | |
+CCL_NAMESPACE_BEGIN | |
+ | |
+ccl_device void kernel_adaptive_filter_y(KernelGlobals *kg) | |
+{ | |
+ int pixel_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); | |
+ if (pixel_index < kernel_split_params.tile.w && | |
+ kernel_split_params.tile.start_sample + kernel_split_params.tile.num_samples >= | |
+ kernel_data.integrator.adaptive_min_samples) { | |
+ int x = kernel_split_params.tile.x + pixel_index; | |
+ kernel_do_adaptive_filter_y(kg, x, &kernel_split_params.tile); | |
+ } | |
+} | |
+CCL_NAMESPACE_END | |
diff -Naur a/intern/cycles/kernel/split/kernel_adaptive_stopping.h b/intern/cycles/kernel/split/kernel_adaptive_stopping.h | |
--- a/intern/cycles/kernel/split/kernel_adaptive_stopping.h 1970-01-01 03:00:00.000000000 +0300 | |
+++ b/intern/cycles/kernel/split/kernel_adaptive_stopping.h 2020-01-10 20:42:43.470923389 +0300 | |
@@ -0,0 +1,37 @@ | |
+/* | |
+ * Copyright 2019 Blender Foundation | |
+ * | |
+ * Licensed under the Apache License, Version 2.0 (the "License"); | |
+ * you may not use this file except in compliance with the License. | |
+ * You may obtain a copy of the License at | |
+ * | |
+ * http://www.apache.org/licenses/LICENSE-2.0 | |
+ * | |
+ * Unless required by applicable law or agreed to in writing, software | |
+ * distributed under the License is distributed on an "AS IS" BASIS, | |
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
+ * See the License for the specific language governing permissions and | |
+ * limitations under the License. | |
+ */ | |
+ | |
+CCL_NAMESPACE_BEGIN | |
+ | |
+ccl_device void kernel_adaptive_stopping(KernelGlobals *kg) | |
+{ | |
+ int pixel_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); | |
+ if (pixel_index < kernel_split_params.tile.w * kernel_split_params.tile.h && | |
+ kernel_split_params.tile.start_sample + kernel_split_params.tile.num_samples >= | |
+ kernel_data.integrator.adaptive_min_samples) { | |
+ int x = kernel_split_params.tile.x + pixel_index % kernel_split_params.tile.w; | |
+ int y = kernel_split_params.tile.y + pixel_index / kernel_split_params.tile.w; | |
+ int buffer_offset = (kernel_split_params.tile.offset + x + | |
+ y * kernel_split_params.tile.stride) * | |
+ kernel_data.film.pass_stride; | |
+ ccl_global float *buffer = kernel_split_params.tile.buffer + buffer_offset; | |
+ kernel_do_adaptive_stopping(kg, | |
+ buffer, | |
+ kernel_split_params.tile.start_sample + | |
+ kernel_split_params.tile.num_samples - 1); | |
+ } | |
+} | |
+CCL_NAMESPACE_END | |
diff -Naur a/intern/cycles/render/buffers.cpp b/intern/cycles/render/buffers.cpp | |
--- a/intern/cycles/render/buffers.cpp 2020-01-10 20:37:06.000000000 +0300 | |
+++ b/intern/cycles/render/buffers.cpp 2020-01-10 20:42:43.470923389 +0300 | |
@@ -258,6 +258,22 @@ | |
return false; | |
} | |
+ float *sample_count = NULL; | |
+ if (type == PassType::PASS_COMBINED) { | |
+ int sample_offset = 0; | |
+ for (size_t j = 0; j < params.passes.size(); j++) { | |
+ Pass &pass = params.passes[j]; | |
+ if (pass.type != PASS_SAMPLE_COUNT) { | |
+ sample_offset += pass.components; | |
+ continue; | |
+ } | |
+ else { | |
+ sample_count = buffer.data() + sample_offset; | |
+ break; | |
+ } | |
+ } | |
+ } | |
+ | |
int pass_offset = 0; | |
for (size_t j = 0; j < params.passes.size(); j++) { | |
@@ -418,6 +434,11 @@ | |
} | |
else { | |
for (int i = 0; i < size; i++, in += pass_stride, pixels += 4) { | |
+ if (sample_count && sample_count[i * pass_stride] < 0.0f) { | |
+ scale = (pass.filter) ? -1.0f / (sample_count[i * pass_stride]) : 1.0f; | |
+ scale_exposure = (pass.exposure) ? scale * exposure : scale; | |
+ } | |
+ | |
float4 f = make_float4(in[0], in[1], in[2], in[3]); | |
pixels[0] = f.x * scale_exposure; | |
diff -Naur a/intern/cycles/render/buffers.cpp.orig b/intern/cycles/render/buffers.cpp.orig | |
--- a/intern/cycles/render/buffers.cpp.orig 1970-01-01 03:00:00.000000000 +0300 | |
+++ b/intern/cycles/render/buffers.cpp.orig 2020-01-10 20:37:06.000000000 +0300 | |
@@ -0,0 +1,506 @@ | |
+/* | |
+ * Copyright 2011-2013 Blender Foundation | |
+ * | |
+ * Licensed under the Apache License, Version 2.0 (the "License"); | |
+ * you may not use this file except in compliance with the License. | |
+ * You may obtain a copy of the License at | |
+ * | |
+ * http://www.apache.org/licenses/LICENSE-2.0 | |
+ * | |
+ * Unless required by applicable law or agreed to in writing, software | |
+ * distributed under the License is distributed on an "AS IS" BASIS, | |
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
+ * See the License for the specific language governing permissions and | |
+ * limitations under the License. | |
+ */ | |
+ | |
+#include <stdlib.h> | |
+ | |
+#include "render/buffers.h" | |
+#include "device/device.h" | |
+ | |
+#include "util/util_foreach.h" | |
+#include "util/util_hash.h" | |
+#include "util/util_math.h" | |
+#include "util/util_opengl.h" | |
+#include "util/util_time.h" | |
+#include "util/util_types.h" | |
+ | |
+CCL_NAMESPACE_BEGIN | |
+ | |
+/* Buffer Params */ | |
+ | |
+BufferParams::BufferParams() | |
+{ | |
+ width = 0; | |
+ height = 0; | |
+ | |
+ full_x = 0; | |
+ full_y = 0; | |
+ full_width = 0; | |
+ full_height = 0; | |
+ | |
+ denoising_data_pass = false; | |
+ denoising_clean_pass = false; | |
+ denoising_prefiltered_pass = false; | |
+} | |
+ | |
+void BufferParams::get_offset_stride(int &offset, int &stride) | |
+{ | |
+ offset = -(full_x + full_y * width); | |
+ stride = width; | |
+} | |
+ | |
+bool BufferParams::modified(const BufferParams ¶ms) | |
+{ | |
+ return !(full_x == params.full_x && full_y == params.full_y && width == params.width && | |
+ height == params.height && full_width == params.full_width && | |
+ full_height == params.full_height && Pass::equals(passes, params.passes) && | |
+ denoising_data_pass == params.denoising_data_pass && | |
+ denoising_clean_pass == params.denoising_clean_pass && | |
+ denoising_prefiltered_pass == params.denoising_prefiltered_pass); | |
+} | |
+ | |
+int BufferParams::get_passes_size() | |
+{ | |
+ int size = 0; | |
+ | |
+ for (size_t i = 0; i < passes.size(); i++) | |
+ size += passes[i].components; | |
+ | |
+ if (denoising_data_pass) { | |
+ size += DENOISING_PASS_SIZE_BASE; | |
+ if (denoising_clean_pass) | |
+ size += DENOISING_PASS_SIZE_CLEAN; | |
+ if (denoising_prefiltered_pass) | |
+ size += DENOISING_PASS_SIZE_PREFILTERED; | |
+ } | |
+ | |
+ return align_up(size, 4); | |
+} | |
+ | |
+int BufferParams::get_denoising_offset() | |
+{ | |
+ int offset = 0; | |
+ | |
+ for (size_t i = 0; i < passes.size(); i++) | |
+ offset += passes[i].components; | |
+ | |
+ return offset; | |
+} | |
+ | |
+int BufferParams::get_denoising_prefiltered_offset() | |
+{ | |
+ assert(denoising_prefiltered_pass); | |
+ | |
+ int offset = get_denoising_offset(); | |
+ | |
+ offset += DENOISING_PASS_SIZE_BASE; | |
+ if (denoising_clean_pass) { | |
+ offset += DENOISING_PASS_SIZE_CLEAN; | |
+ } | |
+ | |
+ return offset; | |
+} | |
+ | |
+/* Render Buffer Task */ | |
+ | |
+RenderTile::RenderTile() | |
+{ | |
+ x = 0; | |
+ y = 0; | |
+ w = 0; | |
+ h = 0; | |
+ | |
+ sample = 0; | |
+ start_sample = 0; | |
+ num_samples = 0; | |
+ resolution = 0; | |
+ | |
+ offset = 0; | |
+ stride = 0; | |
+ | |
+ buffer = 0; | |
+ | |
+ buffers = NULL; | |
+} | |
+ | |
+/* Render Buffers */ | |
+ | |
+RenderBuffers::RenderBuffers(Device *device) | |
+ : buffer(device, "RenderBuffers", MEM_READ_WRITE), | |
+ map_neighbor_copied(false), | |
+ render_time(0.0f) | |
+{ | |
+} | |
+ | |
+RenderBuffers::~RenderBuffers() | |
+{ | |
+ buffer.free(); | |
+} | |
+ | |
+void RenderBuffers::reset(BufferParams ¶ms_) | |
+{ | |
+ params = params_; | |
+ | |
+ /* re-allocate buffer */ | |
+ buffer.alloc(params.width * params.height * params.get_passes_size()); | |
+ buffer.zero_to_device(); | |
+} | |
+ | |
+void RenderBuffers::zero() | |
+{ | |
+ buffer.zero_to_device(); | |
+} | |
+ | |
+bool RenderBuffers::copy_from_device() | |
+{ | |
+ if (!buffer.device_pointer) | |
+ return false; | |
+ | |
+ buffer.copy_from_device(0, params.width * params.get_passes_size(), params.height); | |
+ | |
+ return true; | |
+} | |
+ | |
+bool RenderBuffers::get_denoising_pass_rect( | |
+ int type, float exposure, int sample, int components, float *pixels) | |
+{ | |
+ if (buffer.data() == NULL) { | |
+ return false; | |
+ } | |
+ | |
+ float scale = 1.0f; | |
+ float alpha_scale = 1.0f / sample; | |
+ if (type == DENOISING_PASS_PREFILTERED_COLOR || type == DENOISING_PASS_CLEAN || | |
+ type == DENOISING_PASS_PREFILTERED_INTENSITY) { | |
+ scale *= exposure; | |
+ } | |
+ else if (type == DENOISING_PASS_PREFILTERED_VARIANCE) { | |
+ scale *= exposure * exposure * (sample - 1); | |
+ } | |
+ | |
+ int offset; | |
+ if (type == DENOISING_PASS_CLEAN) { | |
+ /* The clean pass isn't changed by prefiltering, so we use the original one there. */ | |
+ offset = type + params.get_denoising_offset(); | |
+ scale /= sample; | |
+ } | |
+ else if (params.denoising_prefiltered_pass) { | |
+ offset = type + params.get_denoising_prefiltered_offset(); | |
+ } | |
+ else { | |
+ switch (type) { | |
+ case DENOISING_PASS_PREFILTERED_DEPTH: | |
+ offset = params.get_denoising_offset() + DENOISING_PASS_DEPTH; | |
+ break; | |
+ case DENOISING_PASS_PREFILTERED_NORMAL: | |
+ offset = params.get_denoising_offset() + DENOISING_PASS_NORMAL; | |
+ break; | |
+ case DENOISING_PASS_PREFILTERED_ALBEDO: | |
+ offset = params.get_denoising_offset() + DENOISING_PASS_ALBEDO; | |
+ break; | |
+ case DENOISING_PASS_PREFILTERED_COLOR: | |
+ /* If we're not saving the prefiltering result, return the original noisy pass. */ | |
+ offset = params.get_denoising_offset() + DENOISING_PASS_COLOR; | |
+ break; | |
+ default: | |
+ return false; | |
+ } | |
+ scale /= sample; | |
+ } | |
+ | |
+ int pass_stride = params.get_passes_size(); | |
+ int size = params.width * params.height; | |
+ | |
+ float *in = buffer.data() + offset; | |
+ | |
+ if (components == 1) { | |
+ for (int i = 0; i < size; i++, in += pass_stride, pixels++) { | |
+ pixels[0] = in[0] * scale; | |
+ } | |
+ } | |
+ else if (components == 3) { | |
+ for (int i = 0; i < size; i++, in += pass_stride, pixels += 3) { | |
+ pixels[0] = in[0] * scale; | |
+ pixels[1] = in[1] * scale; | |
+ pixels[2] = in[2] * scale; | |
+ } | |
+ } | |
+ else if (components == 4) { | |
+ /* Since the alpha channel is not involved in denoising, output the Combined alpha channel. */ | |
+ assert(params.passes[0].type == PASS_COMBINED); | |
+ float *in_combined = buffer.data(); | |
+ | |
+ for (int i = 0; i < size; i++, in += pass_stride, in_combined += pass_stride, pixels += 4) { | |
+ float3 val = make_float3(in[0], in[1], in[2]); | |
+ if (type == DENOISING_PASS_PREFILTERED_COLOR && params.denoising_prefiltered_pass) { | |
+ /* Remove highlight compression from the image. */ | |
+ val = color_highlight_uncompress(val); | |
+ } | |
+ pixels[0] = val.x * scale; | |
+ pixels[1] = val.y * scale; | |
+ pixels[2] = val.z * scale; | |
+ pixels[3] = saturate(in_combined[3] * alpha_scale); | |
+ } | |
+ } | |
+ else { | |
+ return false; | |
+ } | |
+ | |
+ return true; | |
+} | |
+ | |
+bool RenderBuffers::get_pass_rect( | |
+ const string &name, float exposure, int sample, int components, float *pixels) | |
+{ | |
+ if (buffer.data() == NULL) { | |
+ return false; | |
+ } | |
+ | |
+ int pass_offset = 0; | |
+ | |
+ for (size_t j = 0; j < params.passes.size(); j++) { | |
+ Pass &pass = params.passes[j]; | |
+ | |
+ /* Pass is identified by both type and name, multiple of the same type | |
+ * may exist with a different name. */ | |
+ if (pass.name != name) { | |
+ pass_offset += pass.components; | |
+ continue; | |
+ } | |
+ | |
+ PassType type = pass.type; | |
+ | |
+ float *in = buffer.data() + pass_offset; | |
+ int pass_stride = params.get_passes_size(); | |
+ | |
+ float scale = (pass.filter) ? 1.0f / (float)sample : 1.0f; | |
+ float scale_exposure = (pass.exposure) ? scale * exposure : scale; | |
+ | |
+ int size = params.width * params.height; | |
+ | |
+ if (components == 1 && type == PASS_RENDER_TIME) { | |
+ /* Render time is not stored by kernel, but measured per tile. */ | |
+ float val = (float)(1000.0 * render_time / (params.width * params.height * sample)); | |
+ for (int i = 0; i < size; i++, pixels++) { | |
+ pixels[0] = val; | |
+ } | |
+ } | |
+ else if (components == 1) { | |
+ assert(pass.components == components); | |
+ | |
+ /* Scalar */ | |
+ if (type == PASS_DEPTH) { | |
+ for (int i = 0; i < size; i++, in += pass_stride, pixels++) { | |
+ float f = *in; | |
+ pixels[0] = (f == 0.0f) ? 1e10f : f * scale_exposure; | |
+ } | |
+ } | |
+ else if (type == PASS_MIST) { | |
+ for (int i = 0; i < size; i++, in += pass_stride, pixels++) { | |
+ float f = *in; | |
+ pixels[0] = saturate(f * scale_exposure); | |
+ } | |
+ } | |
+#ifdef WITH_CYCLES_DEBUG | |
+ else if (type == PASS_BVH_TRAVERSED_NODES || type == PASS_BVH_TRAVERSED_INSTANCES || | |
+ type == PASS_BVH_INTERSECTIONS || type == PASS_RAY_BOUNCES) { | |
+ for (int i = 0; i < size; i++, in += pass_stride, pixels++) { | |
+ float f = *in; | |
+ pixels[0] = f * scale; | |
+ } | |
+ } | |
+#endif | |
+ else { | |
+ for (int i = 0; i < size; i++, in += pass_stride, pixels++) { | |
+ float f = *in; | |
+ pixels[0] = f * scale_exposure; | |
+ } | |
+ } | |
+ } | |
+ else if (components == 3) { | |
+ assert(pass.components == 4); | |
+ | |
+ /* RGBA */ | |
+ if (type == PASS_SHADOW) { | |
+ for (int i = 0; i < size; i++, in += pass_stride, pixels += 3) { | |
+ float4 f = make_float4(in[0], in[1], in[2], in[3]); | |
+ float invw = (f.w > 0.0f) ? 1.0f / f.w : 1.0f; | |
+ | |
+ pixels[0] = f.x * invw; | |
+ pixels[1] = f.y * invw; | |
+ pixels[2] = f.z * invw; | |
+ } | |
+ } | |
+ else if (pass.divide_type != PASS_NONE) { | |
+ /* RGB lighting passes that need to divide out color */ | |
+ pass_offset = 0; | |
+ for (size_t k = 0; k < params.passes.size(); k++) { | |
+ Pass &color_pass = params.passes[k]; | |
+ if (color_pass.type == pass.divide_type) | |
+ break; | |
+ pass_offset += color_pass.components; | |
+ } | |
+ | |
+ float *in_divide = buffer.data() + pass_offset; | |
+ | |
+ for (int i = 0; i < size; i++, in += pass_stride, in_divide += pass_stride, pixels += 3) { | |
+ float3 f = make_float3(in[0], in[1], in[2]); | |
+ float3 f_divide = make_float3(in_divide[0], in_divide[1], in_divide[2]); | |
+ | |
+ f = safe_divide_even_color(f * exposure, f_divide); | |
+ | |
+ pixels[0] = f.x; | |
+ pixels[1] = f.y; | |
+ pixels[2] = f.z; | |
+ } | |
+ } | |
+ else { | |
+ /* RGB/vector */ | |
+ for (int i = 0; i < size; i++, in += pass_stride, pixels += 3) { | |
+ float3 f = make_float3(in[0], in[1], in[2]); | |
+ | |
+ pixels[0] = f.x * scale_exposure; | |
+ pixels[1] = f.y * scale_exposure; | |
+ pixels[2] = f.z * scale_exposure; | |
+ } | |
+ } | |
+ } | |
+ else if (components == 4) { | |
+ assert(pass.components == components); | |
+ | |
+ /* RGBA */ | |
+ if (type == PASS_SHADOW) { | |
+ for (int i = 0; i < size; i++, in += pass_stride, pixels += 4) { | |
+ float4 f = make_float4(in[0], in[1], in[2], in[3]); | |
+ float invw = (f.w > 0.0f) ? 1.0f / f.w : 1.0f; | |
+ | |
+ pixels[0] = f.x * invw; | |
+ pixels[1] = f.y * invw; | |
+ pixels[2] = f.z * invw; | |
+ pixels[3] = 1.0f; | |
+ } | |
+ } | |
+ else if (type == PASS_MOTION) { | |
+ /* need to normalize by number of samples accumulated for motion */ | |
+ pass_offset = 0; | |
+ for (size_t k = 0; k < params.passes.size(); k++) { | |
+ Pass &color_pass = params.passes[k]; | |
+ if (color_pass.type == PASS_MOTION_WEIGHT) | |
+ break; | |
+ pass_offset += color_pass.components; | |
+ } | |
+ | |
+ float *in_weight = buffer.data() + pass_offset; | |
+ | |
+ for (int i = 0; i < size; i++, in += pass_stride, in_weight += pass_stride, pixels += 4) { | |
+ float4 f = make_float4(in[0], in[1], in[2], in[3]); | |
+ float w = in_weight[0]; | |
+ float invw = (w > 0.0f) ? 1.0f / w : 0.0f; | |
+ | |
+ pixels[0] = f.x * invw; | |
+ pixels[1] = f.y * invw; | |
+ pixels[2] = f.z * invw; | |
+ pixels[3] = f.w * invw; | |
+ } | |
+ } | |
+ else if (type == PASS_CRYPTOMATTE) { | |
+ for (int i = 0; i < size; i++, in += pass_stride, pixels += 4) { | |
+ float4 f = make_float4(in[0], in[1], in[2], in[3]); | |
+ /* x and z contain integer IDs, don't rescale them. | |
+ y and w contain matte weights, they get scaled. */ | |
+ pixels[0] = f.x; | |
+ pixels[1] = f.y * scale; | |
+ pixels[2] = f.z; | |
+ pixels[3] = f.w * scale; | |
+ } | |
+ } | |
+ else { | |
+ for (int i = 0; i < size; i++, in += pass_stride, pixels += 4) { | |
+ float4 f = make_float4(in[0], in[1], in[2], in[3]); | |
+ | |
+ pixels[0] = f.x * scale_exposure; | |
+ pixels[1] = f.y * scale_exposure; | |
+ pixels[2] = f.z * scale_exposure; | |
+ | |
+ /* clamp since alpha might be > 1.0 due to russian roulette */ | |
+ pixels[3] = saturate(f.w * scale); | |
+ } | |
+ } | |
+ } | |
+ | |
+ return true; | |
+ } | |
+ | |
+ return false; | |
+} | |
+ | |
+/* Display Buffer */ | |
+ | |
+DisplayBuffer::DisplayBuffer(Device *device, bool linear) | |
+ : draw_width(0), | |
+ draw_height(0), | |
+ transparent(true), /* todo: determine from background */ | |
+ half_float(linear), | |
+ rgba_byte(device, "display buffer byte"), | |
+ rgba_half(device, "display buffer half") | |
+{ | |
+} | |
+ | |
+DisplayBuffer::~DisplayBuffer() | |
+{ | |
+ rgba_byte.free(); | |
+ rgba_half.free(); | |
+} | |
+ | |
+void DisplayBuffer::reset(BufferParams ¶ms_) | |
+{ | |
+ draw_width = 0; | |
+ draw_height = 0; | |
+ | |
+ params = params_; | |
+ | |
+ /* allocate display pixels */ | |
+ if (half_float) { | |
+ rgba_half.alloc_to_device(params.width, params.height); | |
+ } | |
+ else { | |
+ rgba_byte.alloc_to_device(params.width, params.height); | |
+ } | |
+} | |
+ | |
+void DisplayBuffer::draw_set(int width, int height) | |
+{ | |
+ assert(width <= params.width && height <= params.height); | |
+ | |
+ draw_width = width; | |
+ draw_height = height; | |
+} | |
+ | |
+void DisplayBuffer::draw(Device *device, const DeviceDrawParams &draw_params) | |
+{ | |
+ if (draw_width != 0 && draw_height != 0) { | |
+ device_memory &rgba = (half_float) ? (device_memory &)rgba_half : (device_memory &)rgba_byte; | |
+ | |
+ device->draw_pixels(rgba, | |
+ 0, | |
+ draw_width, | |
+ draw_height, | |
+ params.width, | |
+ params.height, | |
+ params.full_x, | |
+ params.full_y, | |
+ params.full_width, | |
+ params.full_height, | |
+ transparent, | |
+ draw_params); | |
+ } | |
+} | |
+ | |
+bool DisplayBuffer::draw_ready() | |
+{ | |
+ return (draw_width != 0 && draw_height != 0); | |
+} | |
+ | |
+CCL_NAMESPACE_END | |
diff -Naur a/intern/cycles/render/CMakeLists.txt b/intern/cycles/render/CMakeLists.txt | |
--- a/intern/cycles/render/CMakeLists.txt 2020-01-10 20:37:06.000000000 +0300 | |
+++ b/intern/cycles/render/CMakeLists.txt 2020-01-10 20:42:43.470923389 +0300 | |
@@ -22,6 +22,7 @@ | |
graph.cpp | |
image.cpp | |
integrator.cpp | |
+ jitter.cpp | |
light.cpp | |
merge.cpp | |
mesh.cpp | |
@@ -58,6 +59,7 @@ | |
image.h | |
integrator.h | |
light.h | |
+ jitter.h | |
merge.h | |
mesh.h | |
nodes.h | |
diff -Naur a/intern/cycles/render/CMakeLists.txt.orig b/intern/cycles/render/CMakeLists.txt.orig | |
--- a/intern/cycles/render/CMakeLists.txt.orig 1970-01-01 03:00:00.000000000 +0300 | |
+++ b/intern/cycles/render/CMakeLists.txt.orig 2020-01-10 20:37:06.000000000 +0300 | |
@@ -0,0 +1,93 @@ | |
+ | |
+set(INC | |
+ .. | |
+ ../../glew-mx | |
+) | |
+ | |
+set(INC_SYS | |
+ ${GLEW_INCLUDE_DIR} | |
+) | |
+ | |
+set(SRC | |
+ attribute.cpp | |
+ background.cpp | |
+ bake.cpp | |
+ buffers.cpp | |
+ camera.cpp | |
+ colorspace.cpp | |
+ constant_fold.cpp | |
+ coverage.cpp | |
+ denoising.cpp | |
+ film.cpp | |
+ graph.cpp | |
+ image.cpp | |
+ integrator.cpp | |
+ light.cpp | |
+ merge.cpp | |
+ mesh.cpp | |
+ mesh_displace.cpp | |
+ mesh_subdivision.cpp | |
+ mesh_volume.cpp | |
+ nodes.cpp | |
+ object.cpp | |
+ osl.cpp | |
+ particles.cpp | |
+ curves.cpp | |
+ scene.cpp | |
+ session.cpp | |
+ shader.cpp | |
+ sobol.cpp | |
+ stats.cpp | |
+ svm.cpp | |
+ tables.cpp | |
+ tile.cpp | |
+) | |
+ | |
+set(SRC_HEADERS | |
+ attribute.h | |
+ bake.h | |
+ background.h | |
+ buffers.h | |
+ camera.h | |
+ colorspace.h | |
+ constant_fold.h | |
+ coverage.h | |
+ denoising.h | |
+ film.h | |
+ graph.h | |
+ image.h | |
+ integrator.h | |
+ light.h | |
+ merge.h | |
+ mesh.h | |
+ nodes.h | |
+ object.h | |
+ osl.h | |
+ particles.h | |
+ curves.h | |
+ scene.h | |
+ session.h | |
+ shader.h | |
+ sobol.h | |
+ stats.h | |
+ svm.h | |
+ tables.h | |
+ tile.h | |
+) | |
+ | |
+set(LIB | |
+ cycles_bvh | |
+) | |
+ | |
+if(WITH_CYCLES_OSL) | |
+ list(APPEND LIB | |
+ cycles_kernel_osl | |
+ ) | |
+endif() | |
+ | |
+include_directories(${INC}) | |
+include_directories(SYSTEM ${INC_SYS}) | |
+ | |
+add_definitions(${GL_DEFINITIONS}) | |
+ | |
+cycles_add_library(cycles_render "${LIB}" ${SRC} ${SRC_HEADERS}) | |
diff -Naur a/intern/cycles/render/film.cpp b/intern/cycles/render/film.cpp | |
--- a/intern/cycles/render/film.cpp 2020-01-10 20:37:06.000000000 +0300 | |
+++ b/intern/cycles/render/film.cpp 2020-01-10 20:42:43.470923389 +0300 | |
@@ -196,6 +196,13 @@ | |
case PASS_AOV_VALUE: | |
pass.components = 1; | |
break; | |
+ case PASS_ADAPTIVE_AUX_BUFFER: | |
+ pass.components = 4; | |
+ break; | |
+ case PASS_SAMPLE_COUNT: | |
+ pass.components = 1; | |
+ pass.exposure = false; | |
+ break; | |
default: | |
assert(false); | |
break; | |
@@ -318,6 +325,7 @@ | |
SOCKET_BOOLEAN(denoising_clean_pass, "Generate Denoising Clean Pass", false); | |
SOCKET_BOOLEAN(denoising_prefiltered_pass, "Generate Denoising Prefiltered Pass", false); | |
SOCKET_INT(denoising_flags, "Denoising Flags", 0); | |
+ SOCKET_BOOLEAN(use_adaptive_sampling, "Use Adaptive Sampling", false); | |
return type; | |
} | |
@@ -507,6 +515,12 @@ | |
have_aov_value = true; | |
} | |
break; | |
+ case PASS_ADAPTIVE_AUX_BUFFER: | |
+ kfilm->pass_adaptive_aux_buffer = kfilm->pass_stride; | |
+ break; | |
+ case PASS_SAMPLE_COUNT: | |
+ kfilm->pass_sample_count = kfilm->pass_stride; | |
+ break; | |
default: | |
assert(false); | |
break; | |
diff -Naur a/intern/cycles/render/film.cpp.orig b/intern/cycles/render/film.cpp.orig | |
--- a/intern/cycles/render/film.cpp.orig 1970-01-01 03:00:00.000000000 +0300 | |
+++ b/intern/cycles/render/film.cpp.orig 2020-01-10 20:37:06.000000000 +0300 | |
@@ -0,0 +1,638 @@ | |
+/* | |
+ * Copyright 2011-2013 Blender Foundation | |
+ * | |
+ * Licensed under the Apache License, Version 2.0 (the "License"); | |
+ * you may not use this file except in compliance with the License. | |
+ * You may obtain a copy of the License at | |
+ * | |
+ * http://www.apache.org/licenses/LICENSE-2.0 | |
+ * | |
+ * Unless required by applicable law or agreed to in writing, software | |
+ * distributed under the License is distributed on an "AS IS" BASIS, | |
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
+ * See the License for the specific language governing permissions and | |
+ * limitations under the License. | |
+ */ | |
+ | |
+#include "render/camera.h" | |
+#include "device/device.h" | |
+#include "render/film.h" | |
+#include "render/integrator.h" | |
+#include "render/mesh.h" | |
+#include "render/scene.h" | |
+#include "render/tables.h" | |
+ | |
+#include "util/util_algorithm.h" | |
+#include "util/util_foreach.h" | |
+#include "util/util_math.h" | |
+#include "util/util_math_cdf.h" | |
+ | |
+CCL_NAMESPACE_BEGIN | |
+ | |
+/* Pass */ | |
+ | |
+static bool compare_pass_order(const Pass &a, const Pass &b) | |
+{ | |
+ if (a.components == b.components) | |
+ return (a.type < b.type); | |
+ return (a.components > b.components); | |
+} | |
+ | |
+void Pass::add(PassType type, vector<Pass> &passes, const char *name) | |
+{ | |
+ for (size_t i = 0; i < passes.size(); i++) { | |
+ if (passes[i].type != type) { | |
+ continue; | |
+ } | |
+ | |
+ /* An empty name is used as a placeholder to signal that any pass of | |
+ * that type is fine (because the content always is the same). | |
+ * This is important to support divide_type: If the pass that has a | |
+ * divide_type is added first, a pass for divide_type with an empty | |
+ * name will be added. Then, if a matching pass with a name is later | |
+ * requested, the existing placeholder will be renamed to that. | |
+ * If the divide_type is explicitly allocated with a name first and | |
+ * then again as part of another pass, the second one will just be | |
+ * skipped because that type already exists. */ | |
+ | |
+ /* If no name is specified, any pass of the correct type will match. */ | |
+ if (name == NULL) { | |
+ return; | |
+ } | |
+ | |
+ /* If we already have a placeholder pass, rename that one. */ | |
+ if (passes[i].name.empty()) { | |
+ passes[i].name = name; | |
+ return; | |
+ } | |
+ | |
+ /* If neither existing nor requested pass have placeholder name, they | |
+ * must match. */ | |
+ if (name == passes[i].name) { | |
+ return; | |
+ } | |
+ } | |
+ | |
+ Pass pass; | |
+ | |
+ pass.type = type; | |
+ pass.filter = true; | |
+ pass.exposure = false; | |
+ pass.divide_type = PASS_NONE; | |
+ if (name) { | |
+ pass.name = name; | |
+ } | |
+ | |
+ switch (type) { | |
+ case PASS_NONE: | |
+ pass.components = 0; | |
+ break; | |
+ case PASS_COMBINED: | |
+ pass.components = 4; | |
+ pass.exposure = true; | |
+ break; | |
+ case PASS_DEPTH: | |
+ pass.components = 1; | |
+ pass.filter = false; | |
+ break; | |
+ case PASS_MIST: | |
+ pass.components = 1; | |
+ break; | |
+ case PASS_NORMAL: | |
+ pass.components = 4; | |
+ break; | |
+ case PASS_UV: | |
+ pass.components = 4; | |
+ break; | |
+ case PASS_MOTION: | |
+ pass.components = 4; | |
+ pass.divide_type = PASS_MOTION_WEIGHT; | |
+ break; | |
+ case PASS_MOTION_WEIGHT: | |
+ pass.components = 1; | |
+ break; | |
+ case PASS_OBJECT_ID: | |
+ case PASS_MATERIAL_ID: | |
+ pass.components = 1; | |
+ pass.filter = false; | |
+ break; | |
+ | |
+ case PASS_EMISSION: | |
+ case PASS_BACKGROUND: | |
+ pass.components = 4; | |
+ pass.exposure = true; | |
+ break; | |
+ case PASS_AO: | |
+ pass.components = 4; | |
+ break; | |
+ case PASS_SHADOW: | |
+ pass.components = 4; | |
+ pass.exposure = false; | |
+ break; | |
+ case PASS_LIGHT: | |
+ /* This isn't a real pass, used by baking to see whether | |
+ * light data is needed or not. | |
+ * | |
+ * Set components to 0 so pass sort below happens in a | |
+ * determined way. | |
+ */ | |
+ pass.components = 0; | |
+ break; | |
+#ifdef WITH_CYCLES_DEBUG | |
+ case PASS_BVH_TRAVERSED_NODES: | |
+ case PASS_BVH_TRAVERSED_INSTANCES: | |
+ case PASS_BVH_INTERSECTIONS: | |
+ case PASS_RAY_BOUNCES: | |
+ pass.components = 1; | |
+ pass.exposure = false; | |
+ break; | |
+#endif | |
+ case PASS_RENDER_TIME: | |
+ /* This pass is handled entirely on the host side. */ | |
+ pass.components = 0; | |
+ break; | |
+ | |
+ case PASS_DIFFUSE_COLOR: | |
+ case PASS_GLOSSY_COLOR: | |
+ case PASS_TRANSMISSION_COLOR: | |
+ case PASS_SUBSURFACE_COLOR: | |
+ pass.components = 4; | |
+ break; | |
+ case PASS_DIFFUSE_DIRECT: | |
+ case PASS_DIFFUSE_INDIRECT: | |
+ pass.components = 4; | |
+ pass.exposure = true; | |
+ pass.divide_type = PASS_DIFFUSE_COLOR; | |
+ break; | |
+ case PASS_GLOSSY_DIRECT: | |
+ case PASS_GLOSSY_INDIRECT: | |
+ pass.components = 4; | |
+ pass.exposure = true; | |
+ pass.divide_type = PASS_GLOSSY_COLOR; | |
+ break; | |
+ case PASS_TRANSMISSION_DIRECT: | |
+ case PASS_TRANSMISSION_INDIRECT: | |
+ pass.components = 4; | |
+ pass.exposure = true; | |
+ pass.divide_type = PASS_TRANSMISSION_COLOR; | |
+ break; | |
+ case PASS_SUBSURFACE_DIRECT: | |
+ case PASS_SUBSURFACE_INDIRECT: | |
+ pass.components = 4; | |
+ pass.exposure = true; | |
+ pass.divide_type = PASS_SUBSURFACE_COLOR; | |
+ break; | |
+ case PASS_VOLUME_DIRECT: | |
+ case PASS_VOLUME_INDIRECT: | |
+ pass.components = 4; | |
+ pass.exposure = true; | |
+ break; | |
+ case PASS_CRYPTOMATTE: | |
+ pass.components = 4; | |
+ break; | |
+ case PASS_AOV_COLOR: | |
+ pass.components = 4; | |
+ break; | |
+ case PASS_AOV_VALUE: | |
+ pass.components = 1; | |
+ break; | |
+ default: | |
+ assert(false); | |
+ break; | |
+ } | |
+ | |
+ passes.push_back(pass); | |
+ | |
+ /* order from by components, to ensure alignment so passes with size 4 | |
+ * come first and then passes with size 1 */ | |
+ sort(&passes[0], &passes[0] + passes.size(), compare_pass_order); | |
+ | |
+ if (pass.divide_type != PASS_NONE) | |
+ Pass::add(pass.divide_type, passes); | |
+} | |
+ | |
+bool Pass::equals(const vector<Pass> &A, const vector<Pass> &B) | |
+{ | |
+ if (A.size() != B.size()) | |
+ return false; | |
+ | |
+ for (int i = 0; i < A.size(); i++) | |
+ if (A[i].type != B[i].type || A[i].name != B[i].name) | |
+ return false; | |
+ | |
+ return true; | |
+} | |
+ | |
+bool Pass::contains(const vector<Pass> &passes, PassType type) | |
+{ | |
+ for (size_t i = 0; i < passes.size(); i++) | |
+ if (passes[i].type == type) | |
+ return true; | |
+ | |
+ return false; | |
+} | |
+ | |
+/* Pixel Filter */ | |
+ | |
+static float filter_func_box(float /*v*/, float /*width*/) | |
+{ | |
+ return 1.0f; | |
+} | |
+ | |
+static float filter_func_gaussian(float v, float width) | |
+{ | |
+ v *= 6.0f / width; | |
+ return expf(-2.0f * v * v); | |
+} | |
+ | |
+static float filter_func_blackman_harris(float v, float width) | |
+{ | |
+ v = M_2PI_F * (v / width + 0.5f); | |
+ return 0.35875f - 0.48829f * cosf(v) + 0.14128f * cosf(2.0f * v) - 0.01168f * cosf(3.0f * v); | |
+} | |
+ | |
+static vector<float> filter_table(FilterType type, float width) | |
+{ | |
+ vector<float> filter_table(FILTER_TABLE_SIZE); | |
+ float (*filter_func)(float, float) = NULL; | |
+ | |
+ switch (type) { | |
+ case FILTER_BOX: | |
+ filter_func = filter_func_box; | |
+ break; | |
+ case FILTER_GAUSSIAN: | |
+ filter_func = filter_func_gaussian; | |
+ width *= 3.0f; | |
+ break; | |
+ case FILTER_BLACKMAN_HARRIS: | |
+ filter_func = filter_func_blackman_harris; | |
+ width *= 2.0f; | |
+ break; | |
+ default: | |
+ assert(0); | |
+ } | |
+ | |
+ /* Create importance sampling table. */ | |
+ | |
+ /* TODO(sergey): With the even filter table size resolution we can not | |
+ * really make it nice symmetric importance map without sampling full range | |
+ * (meaning, we would need to sample full filter range and not use the | |
+ * make_symmetric argument). | |
+ * | |
+ * Current code matches exactly initial filter table code, but we should | |
+ * consider either making FILTER_TABLE_SIZE odd value or sample full filter. | |
+ */ | |
+ | |
+ util_cdf_inverted(FILTER_TABLE_SIZE, | |
+ 0.0f, | |
+ width * 0.5f, | |
+ function_bind(filter_func, _1, width), | |
+ true, | |
+ filter_table); | |
+ | |
+ return filter_table; | |
+} | |
+ | |
+/* Film */ | |
+ | |
+NODE_DEFINE(Film) | |
+{ | |
+ NodeType *type = NodeType::add("film", create); | |
+ | |
+ SOCKET_FLOAT(exposure, "Exposure", 0.8f); | |
+ SOCKET_FLOAT(pass_alpha_threshold, "Pass Alpha Threshold", 0.0f); | |
+ | |
+ static NodeEnum filter_enum; | |
+ filter_enum.insert("box", FILTER_BOX); | |
+ filter_enum.insert("gaussian", FILTER_GAUSSIAN); | |
+ filter_enum.insert("blackman_harris", FILTER_BLACKMAN_HARRIS); | |
+ | |
+ SOCKET_ENUM(filter_type, "Filter Type", filter_enum, FILTER_BOX); | |
+ SOCKET_FLOAT(filter_width, "Filter Width", 1.0f); | |
+ | |
+ SOCKET_FLOAT(mist_start, "Mist Start", 0.0f); | |
+ SOCKET_FLOAT(mist_depth, "Mist Depth", 100.0f); | |
+ SOCKET_FLOAT(mist_falloff, "Mist Falloff", 1.0f); | |
+ | |
+ SOCKET_BOOLEAN(denoising_data_pass, "Generate Denoising Data Pass", false); | |
+ SOCKET_BOOLEAN(denoising_clean_pass, "Generate Denoising Clean Pass", false); | |
+ SOCKET_BOOLEAN(denoising_prefiltered_pass, "Generate Denoising Prefiltered Pass", false); | |
+ SOCKET_INT(denoising_flags, "Denoising Flags", 0); | |
+ | |
+ return type; | |
+} | |
+ | |
+Film::Film() : Node(node_type) | |
+{ | |
+ use_light_visibility = false; | |
+ filter_table_offset = TABLE_OFFSET_INVALID; | |
+ cryptomatte_passes = CRYPT_NONE; | |
+ | |
+ need_update = true; | |
+} | |
+ | |
+Film::~Film() | |
+{ | |
+} | |
+ | |
+void Film::device_update(Device *device, DeviceScene *dscene, Scene *scene) | |
+{ | |
+ if (!need_update) | |
+ return; | |
+ | |
+ device_free(device, dscene, scene); | |
+ | |
+ KernelFilm *kfilm = &dscene->data.film; | |
+ | |
+ /* update __data */ | |
+ kfilm->exposure = exposure; | |
+ kfilm->pass_flag = 0; | |
+ | |
+ kfilm->display_pass_stride = -1; | |
+ kfilm->display_pass_components = 0; | |
+ kfilm->display_divide_pass_stride = -1; | |
+ kfilm->use_display_exposure = false; | |
+ kfilm->use_display_pass_alpha = (display_pass == PASS_COMBINED); | |
+ | |
+ kfilm->light_pass_flag = 0; | |
+ kfilm->pass_stride = 0; | |
+ kfilm->use_light_pass = use_light_visibility; | |
+ | |
+ bool have_cryptomatte = false, have_aov_color = false, have_aov_value = false; | |
+ | |
+ for (size_t i = 0; i < passes.size(); i++) { | |
+ Pass &pass = passes[i]; | |
+ | |
+ if (pass.type == PASS_NONE) { | |
+ continue; | |
+ } | |
+ | |
+ /* Can't do motion pass if no motion vectors are available. */ | |
+ if (pass.type == PASS_MOTION || pass.type == PASS_MOTION_WEIGHT) { | |
+ if (scene->need_motion() != Scene::MOTION_PASS) { | |
+ kfilm->pass_stride += pass.components; | |
+ continue; | |
+ } | |
+ } | |
+ | |
+ int pass_flag = (1 << (pass.type % 32)); | |
+ if (pass.type <= PASS_CATEGORY_MAIN_END) { | |
+ kfilm->pass_flag |= pass_flag; | |
+ } | |
+ else { | |
+ assert(pass.type <= PASS_CATEGORY_LIGHT_END); | |
+ kfilm->use_light_pass = 1; | |
+ kfilm->light_pass_flag |= pass_flag; | |
+ } | |
+ | |
+ switch (pass.type) { | |
+ case PASS_COMBINED: | |
+ kfilm->pass_combined = kfilm->pass_stride; | |
+ break; | |
+ case PASS_DEPTH: | |
+ kfilm->pass_depth = kfilm->pass_stride; | |
+ break; | |
+ case PASS_NORMAL: | |
+ kfilm->pass_normal = kfilm->pass_stride; | |
+ break; | |
+ case PASS_UV: | |
+ kfilm->pass_uv = kfilm->pass_stride; | |
+ break; | |
+ case PASS_MOTION: | |
+ kfilm->pass_motion = kfilm->pass_stride; | |
+ break; | |
+ case PASS_MOTION_WEIGHT: | |
+ kfilm->pass_motion_weight = kfilm->pass_stride; | |
+ break; | |
+ case PASS_OBJECT_ID: | |
+ kfilm->pass_object_id = kfilm->pass_stride; | |
+ break; | |
+ case PASS_MATERIAL_ID: | |
+ kfilm->pass_material_id = kfilm->pass_stride; | |
+ break; | |
+ | |
+ case PASS_MIST: | |
+ kfilm->pass_mist = kfilm->pass_stride; | |
+ break; | |
+ case PASS_EMISSION: | |
+ kfilm->pass_emission = kfilm->pass_stride; | |
+ break; | |
+ case PASS_BACKGROUND: | |
+ kfilm->pass_background = kfilm->pass_stride; | |
+ break; | |
+ case PASS_AO: | |
+ kfilm->pass_ao = kfilm->pass_stride; | |
+ break; | |
+ case PASS_SHADOW: | |
+ kfilm->pass_shadow = kfilm->pass_stride; | |
+ break; | |
+ | |
+ case PASS_LIGHT: | |
+ break; | |
+ | |
+ case PASS_DIFFUSE_COLOR: | |
+ kfilm->pass_diffuse_color = kfilm->pass_stride; | |
+ break; | |
+ case PASS_GLOSSY_COLOR: | |
+ kfilm->pass_glossy_color = kfilm->pass_stride; | |
+ break; | |
+ case PASS_TRANSMISSION_COLOR: | |
+ kfilm->pass_transmission_color = kfilm->pass_stride; | |
+ break; | |
+ case PASS_SUBSURFACE_COLOR: | |
+ kfilm->pass_subsurface_color = kfilm->pass_stride; | |
+ break; | |
+ case PASS_DIFFUSE_INDIRECT: | |
+ kfilm->pass_diffuse_indirect = kfilm->pass_stride; | |
+ break; | |
+ case PASS_GLOSSY_INDIRECT: | |
+ kfilm->pass_glossy_indirect = kfilm->pass_stride; | |
+ break; | |
+ case PASS_TRANSMISSION_INDIRECT: | |
+ kfilm->pass_transmission_indirect = kfilm->pass_stride; | |
+ break; | |
+ case PASS_SUBSURFACE_INDIRECT: | |
+ kfilm->pass_subsurface_indirect = kfilm->pass_stride; | |
+ break; | |
+ case PASS_VOLUME_INDIRECT: | |
+ kfilm->pass_volume_indirect = kfilm->pass_stride; | |
+ break; | |
+ case PASS_DIFFUSE_DIRECT: | |
+ kfilm->pass_diffuse_direct = kfilm->pass_stride; | |
+ break; | |
+ case PASS_GLOSSY_DIRECT: | |
+ kfilm->pass_glossy_direct = kfilm->pass_stride; | |
+ break; | |
+ case PASS_TRANSMISSION_DIRECT: | |
+ kfilm->pass_transmission_direct = kfilm->pass_stride; | |
+ break; | |
+ case PASS_SUBSURFACE_DIRECT: | |
+ kfilm->pass_subsurface_direct = kfilm->pass_stride; | |
+ break; | |
+ case PASS_VOLUME_DIRECT: | |
+ kfilm->pass_volume_direct = kfilm->pass_stride; | |
+ break; | |
+ | |
+#ifdef WITH_CYCLES_DEBUG | |
+ case PASS_BVH_TRAVERSED_NODES: | |
+ kfilm->pass_bvh_traversed_nodes = kfilm->pass_stride; | |
+ break; | |
+ case PASS_BVH_TRAVERSED_INSTANCES: | |
+ kfilm->pass_bvh_traversed_instances = kfilm->pass_stride; | |
+ break; | |
+ case PASS_BVH_INTERSECTIONS: | |
+ kfilm->pass_bvh_intersections = kfilm->pass_stride; | |
+ break; | |
+ case PASS_RAY_BOUNCES: | |
+ kfilm->pass_ray_bounces = kfilm->pass_stride; | |
+ break; | |
+#endif | |
+ case PASS_RENDER_TIME: | |
+ break; | |
+ case PASS_CRYPTOMATTE: | |
+ kfilm->pass_cryptomatte = have_cryptomatte ? | |
+ min(kfilm->pass_cryptomatte, kfilm->pass_stride) : | |
+ kfilm->pass_stride; | |
+ have_cryptomatte = true; | |
+ break; | |
+ case PASS_AOV_COLOR: | |
+ if (!have_aov_color) { | |
+ kfilm->pass_aov_color = kfilm->pass_stride; | |
+ have_aov_color = true; | |
+ } | |
+ break; | |
+ case PASS_AOV_VALUE: | |
+ if (!have_aov_value) { | |
+ kfilm->pass_aov_value = kfilm->pass_stride; | |
+ have_aov_value = true; | |
+ } | |
+ break; | |
+ default: | |
+ assert(false); | |
+ break; | |
+ } | |
+ | |
+ if (pass.type == display_pass) { | |
+ kfilm->display_pass_stride = kfilm->pass_stride; | |
+ kfilm->display_pass_components = pass.components; | |
+ kfilm->use_display_exposure = pass.exposure && (kfilm->exposure != 1.0f); | |
+ } | |
+ else if (pass.type == PASS_DIFFUSE_COLOR || pass.type == PASS_TRANSMISSION_COLOR || | |
+ pass.type == PASS_GLOSSY_COLOR || pass.type == PASS_SUBSURFACE_COLOR) { | |
+ kfilm->display_divide_pass_stride = kfilm->pass_stride; | |
+ } | |
+ | |
+ kfilm->pass_stride += pass.components; | |
+ } | |
+ | |
+ kfilm->pass_denoising_data = 0; | |
+ kfilm->pass_denoising_clean = 0; | |
+ kfilm->denoising_flags = 0; | |
+ if (denoising_data_pass) { | |
+ kfilm->pass_denoising_data = kfilm->pass_stride; | |
+ kfilm->pass_stride += DENOISING_PASS_SIZE_BASE; | |
+ kfilm->denoising_flags = denoising_flags; | |
+ if (denoising_clean_pass) { | |
+ kfilm->pass_denoising_clean = kfilm->pass_stride; | |
+ kfilm->pass_stride += DENOISING_PASS_SIZE_CLEAN; | |
+ kfilm->use_light_pass = 1; | |
+ } | |
+ if (denoising_prefiltered_pass) { | |
+ kfilm->pass_stride += DENOISING_PASS_SIZE_PREFILTERED; | |
+ } | |
+ } | |
+ | |
+ kfilm->pass_stride = align_up(kfilm->pass_stride, 4); | |
+ | |
+ /* When displaying the normal/uv pass in the viewport we need to disable | |
+ * transparency. | |
+ * | |
+ * We also don't need to perform light accumulations. Later we want to optimize this to suppress | |
+ * light calculations. */ | |
+ if (display_pass == PASS_NORMAL || display_pass == PASS_UV) { | |
+ kfilm->use_light_pass = 0; | |
+ } | |
+ else { | |
+ kfilm->pass_alpha_threshold = pass_alpha_threshold; | |
+ } | |
+ | |
+ /* update filter table */ | |
+ vector<float> table = filter_table(filter_type, filter_width); | |
+ scene->lookup_tables->remove_table(&filter_table_offset); | |
+ filter_table_offset = scene->lookup_tables->add_table(dscene, table); | |
+ kfilm->filter_table_offset = (int)filter_table_offset; | |
+ | |
+ /* mist pass parameters */ | |
+ kfilm->mist_start = mist_start; | |
+ kfilm->mist_inv_depth = (mist_depth > 0.0f) ? 1.0f / mist_depth : 0.0f; | |
+ kfilm->mist_falloff = mist_falloff; | |
+ | |
+ kfilm->cryptomatte_passes = cryptomatte_passes; | |
+ kfilm->cryptomatte_depth = cryptomatte_depth; | |
+ | |
+ pass_stride = kfilm->pass_stride; | |
+ denoising_data_offset = kfilm->pass_denoising_data; | |
+ denoising_clean_offset = kfilm->pass_denoising_clean; | |
+ | |
+ need_update = false; | |
+} | |
+ | |
+void Film::device_free(Device * /*device*/, DeviceScene * /*dscene*/, Scene *scene) | |
+{ | |
+ scene->lookup_tables->remove_table(&filter_table_offset); | |
+} | |
+ | |
+bool Film::modified(const Film &film) | |
+{ | |
+ return !Node::equals(film) || !Pass::equals(passes, film.passes); | |
+} | |
+ | |
+void Film::tag_passes_update(Scene *scene, const vector<Pass> &passes_, bool update_passes) | |
+{ | |
+ if (Pass::contains(passes, PASS_UV) != Pass::contains(passes_, PASS_UV)) { | |
+ scene->mesh_manager->tag_update(scene); | |
+ | |
+ foreach (Shader *shader, scene->shaders) | |
+ shader->need_update_mesh = true; | |
+ } | |
+ else if (Pass::contains(passes, PASS_MOTION) != Pass::contains(passes_, PASS_MOTION)) { | |
+ scene->mesh_manager->tag_update(scene); | |
+ } | |
+ else if (Pass::contains(passes, PASS_AO) != Pass::contains(passes_, PASS_AO)) { | |
+ scene->integrator->tag_update(scene); | |
+ } | |
+ | |
+ if (update_passes) { | |
+ passes = passes_; | |
+ } | |
+} | |
+ | |
+void Film::tag_update(Scene * /*scene*/) | |
+{ | |
+ need_update = true; | |
+} | |
+ | |
+int Film::get_aov_offset(string name, bool &is_color) | |
+{ | |
+ int num_color = 0, num_value = 0; | |
+ foreach (const Pass &pass, passes) { | |
+ if (pass.type == PASS_AOV_COLOR) { | |
+ num_color++; | |
+ } | |
+ else if (pass.type == PASS_AOV_VALUE) { | |
+ num_value++; | |
+ } | |
+ else { | |
+ continue; | |
+ } | |
+ | |
+ if (pass.name == name) { | |
+ is_color = (pass.type == PASS_AOV_COLOR); | |
+ return (is_color ? num_color : num_value) - 1; | |
+ } | |
+ } | |
+ | |
+ return -1; | |
+} | |
+ | |
+CCL_NAMESPACE_END | |
diff -Naur a/intern/cycles/render/film.h b/intern/cycles/render/film.h | |
--- a/intern/cycles/render/film.h 2020-01-10 20:37:06.000000000 +0300 | |
+++ b/intern/cycles/render/film.h 2020-01-10 20:42:43.470923389 +0300 | |
@@ -81,6 +81,8 @@ | |
CryptomatteType cryptomatte_passes; | |
int cryptomatte_depth; | |
+ bool use_adaptive_sampling; | |
+ | |
bool need_update; | |
Film(); | |
diff -Naur a/intern/cycles/render/integrator.cpp b/intern/cycles/render/integrator.cpp | |
--- a/intern/cycles/render/integrator.cpp 2020-01-10 20:37:06.000000000 +0300 | |
+++ b/intern/cycles/render/integrator.cpp 2020-01-10 20:42:43.470923389 +0300 | |
@@ -18,12 +18,14 @@ | |
#include "render/background.h" | |
#include "render/integrator.h" | |
#include "render/film.h" | |
+#include "render/jitter.h" | |
#include "render/light.h" | |
#include "render/scene.h" | |
#include "render/shader.h" | |
#include "render/sobol.h" | |
#include "util/util_foreach.h" | |
+#include "util/util_logging.h" | |
#include "util/util_hash.h" | |
CCL_NAMESPACE_BEGIN | |
@@ -66,6 +68,9 @@ | |
SOCKET_INT(volume_samples, "Volume Samples", 1); | |
SOCKET_INT(start_sample, "Start Sample", 0); | |
+ SOCKET_FLOAT(adaptive_threshold, "Adaptive Threshold", 0.0f); | |
+ SOCKET_INT(adaptive_min_samples, "Adaptive Min Samples", 0); | |
+ | |
SOCKET_BOOLEAN(sample_all_lights_direct, "Sample All Lights Direct", true); | |
SOCKET_BOOLEAN(sample_all_lights_indirect, "Sample All Lights Indirect", true); | |
SOCKET_FLOAT(light_sampling_threshold, "Light Sampling Threshold", 0.05f); | |
@@ -78,6 +83,7 @@ | |
static NodeEnum sampling_pattern_enum; | |
sampling_pattern_enum.insert("sobol", SAMPLING_PATTERN_SOBOL); | |
sampling_pattern_enum.insert("cmj", SAMPLING_PATTERN_CMJ); | |
+ sampling_pattern_enum.insert("pmj", SAMPLING_PATTERN_PMJ); | |
SOCKET_ENUM(sampling_pattern, "Sampling Pattern", sampling_pattern_enum, SAMPLING_PATTERN_SOBOL); | |
return type; | |
@@ -174,6 +180,22 @@ | |
kintegrator->sampling_pattern = sampling_pattern; | |
kintegrator->aa_samples = aa_samples; | |
+ if (aa_samples > 0 && adaptive_min_samples == 0) { | |
+ kintegrator->adaptive_min_samples = max(4, (int)sqrtf(aa_samples)); | |
+ VLOG(1) << "Cycles adaptive sampling: automatic min samples = " | |
+ << kintegrator->adaptive_min_samples; | |
+ } | |
+ else { | |
+ kintegrator->adaptive_min_samples = max(4, adaptive_min_samples); | |
+ } | |
+ if (aa_samples > 0 && adaptive_threshold == 0.0f) { | |
+ kintegrator->adaptive_threshold = max(0.001f, 1.0f / (float)aa_samples); | |
+ VLOG(1) << "Cycles adaptive sampling: automatic threshold = " | |
+ << kintegrator->adaptive_threshold; | |
+ } | |
+ else { | |
+ kintegrator->adaptive_threshold = adaptive_threshold; | |
+ } | |
if (light_sampling_threshold > 0.0f) { | |
kintegrator->light_inv_rr_threshold = 1.0f / light_sampling_threshold; | |
@@ -203,18 +225,34 @@ | |
int dimensions = PRNG_BASE_NUM + max_samples * PRNG_BOUNCE_NUM; | |
dimensions = min(dimensions, SOBOL_MAX_DIMENSIONS); | |
- uint *directions = dscene->sobol_directions.alloc(SOBOL_BITS * dimensions); | |
+ if (sampling_pattern == SAMPLING_PATTERN_SOBOL) { | |
+ uint *directions = dscene->sample_pattern_lut.alloc(SOBOL_BITS * dimensions); | |
- sobol_generate_direction_vectors((uint(*)[SOBOL_BITS])directions, dimensions); | |
+ sobol_generate_direction_vectors((uint(*)[SOBOL_BITS])directions, dimensions); | |
- dscene->sobol_directions.copy_to_device(); | |
+ dscene->sample_pattern_lut.copy_to_device(); | |
+ } | |
+ else { | |
+ constexpr int sequence_size = 64 * 64; | |
+ constexpr int num_sequences = 48; | |
+ float2 *directions = (float2 *)dscene->sample_pattern_lut.alloc(sequence_size * num_sequences * | |
+ 2); | |
+ TaskPool pool; | |
+ for (int j = 0; j < num_sequences; ++j) { | |
+ float2 *sequence = directions + j * sequence_size; | |
+ pool.push( | |
+ function_bind(&progressive_multi_jitter_02_generate_2D, sequence, sequence_size, j)); | |
+ } | |
+ pool.wait_work(); | |
+ dscene->sample_pattern_lut.copy_to_device(); | |
+ } | |
need_update = false; | |
} | |
void Integrator::device_free(Device *, DeviceScene *dscene) | |
{ | |
- dscene->sobol_directions.free(); | |
+ dscene->sample_pattern_lut.free(); | |
} | |
bool Integrator::modified(const Integrator &integrator) | |
diff -Naur a/intern/cycles/render/integrator.cpp.orig b/intern/cycles/render/integrator.cpp.orig | |
--- a/intern/cycles/render/integrator.cpp.orig 1970-01-01 03:00:00.000000000 +0300 | |
+++ b/intern/cycles/render/integrator.cpp.orig 2020-01-10 20:37:06.000000000 +0300 | |
@@ -0,0 +1,236 @@ | |
+/* | |
+ * Copyright 2011-2013 Blender Foundation | |
+ * | |
+ * Licensed under the Apache License, Version 2.0 (the "License"); | |
+ * you may not use this file except in compliance with the License. | |
+ * You may obtain a copy of the License at | |
+ * | |
+ * http://www.apache.org/licenses/LICENSE-2.0 | |
+ * | |
+ * Unless required by applicable law or agreed to in writing, software | |
+ * distributed under the License is distributed on an "AS IS" BASIS, | |
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
+ * See the License for the specific language governing permissions and | |
+ * limitations under the License. | |
+ */ | |
+ | |
+#include "device/device.h" | |
+#include "render/background.h" | |
+#include "render/integrator.h" | |
+#include "render/film.h" | |
+#include "render/light.h" | |
+#include "render/scene.h" | |
+#include "render/shader.h" | |
+#include "render/sobol.h" | |
+ | |
+#include "util/util_foreach.h" | |
+#include "util/util_hash.h" | |
+ | |
+CCL_NAMESPACE_BEGIN | |
+ | |
+NODE_DEFINE(Integrator) | |
+{ | |
+ NodeType *type = NodeType::add("integrator", create); | |
+ | |
+ SOCKET_INT(min_bounce, "Min Bounce", 0); | |
+ SOCKET_INT(max_bounce, "Max Bounce", 7); | |
+ | |
+ SOCKET_INT(max_diffuse_bounce, "Max Diffuse Bounce", 7); | |
+ SOCKET_INT(max_glossy_bounce, "Max Glossy Bounce", 7); | |
+ SOCKET_INT(max_transmission_bounce, "Max Transmission Bounce", 7); | |
+ SOCKET_INT(max_volume_bounce, "Max Volume Bounce", 7); | |
+ | |
+ SOCKET_INT(transparent_min_bounce, "Transparent Min Bounce", 0); | |
+ SOCKET_INT(transparent_max_bounce, "Transparent Max Bounce", 7); | |
+ | |
+ SOCKET_INT(ao_bounces, "AO Bounces", 0); | |
+ | |
+ SOCKET_INT(volume_max_steps, "Volume Max Steps", 1024); | |
+ SOCKET_FLOAT(volume_step_size, "Volume Step Size", 0.1f); | |
+ | |
+ SOCKET_BOOLEAN(caustics_reflective, "Reflective Caustics", true); | |
+ SOCKET_BOOLEAN(caustics_refractive, "Refractive Caustics", true); | |
+ SOCKET_FLOAT(filter_glossy, "Filter Glossy", 0.0f); | |
+ SOCKET_INT(seed, "Seed", 0); | |
+ SOCKET_FLOAT(sample_clamp_direct, "Sample Clamp Direct", 0.0f); | |
+ SOCKET_FLOAT(sample_clamp_indirect, "Sample Clamp Indirect", 0.0f); | |
+ SOCKET_BOOLEAN(motion_blur, "Motion Blur", false); | |
+ | |
+ SOCKET_INT(aa_samples, "AA Samples", 0); | |
+ SOCKET_INT(diffuse_samples, "Diffuse Samples", 1); | |
+ SOCKET_INT(glossy_samples, "Glossy Samples", 1); | |
+ SOCKET_INT(transmission_samples, "Transmission Samples", 1); | |
+ SOCKET_INT(ao_samples, "AO Samples", 1); | |
+ SOCKET_INT(mesh_light_samples, "Mesh Light Samples", 1); | |
+ SOCKET_INT(subsurface_samples, "Subsurface Samples", 1); | |
+ SOCKET_INT(volume_samples, "Volume Samples", 1); | |
+ SOCKET_INT(start_sample, "Start Sample", 0); | |
+ | |
+ SOCKET_BOOLEAN(sample_all_lights_direct, "Sample All Lights Direct", true); | |
+ SOCKET_BOOLEAN(sample_all_lights_indirect, "Sample All Lights Indirect", true); | |
+ SOCKET_FLOAT(light_sampling_threshold, "Light Sampling Threshold", 0.05f); | |
+ | |
+ static NodeEnum method_enum; | |
+ method_enum.insert("path", PATH); | |
+ method_enum.insert("branched_path", BRANCHED_PATH); | |
+ SOCKET_ENUM(method, "Method", method_enum, PATH); | |
+ | |
+ static NodeEnum sampling_pattern_enum; | |
+ sampling_pattern_enum.insert("sobol", SAMPLING_PATTERN_SOBOL); | |
+ sampling_pattern_enum.insert("cmj", SAMPLING_PATTERN_CMJ); | |
+ SOCKET_ENUM(sampling_pattern, "Sampling Pattern", sampling_pattern_enum, SAMPLING_PATTERN_SOBOL); | |
+ | |
+ return type; | |
+} | |
+ | |
+Integrator::Integrator() : Node(node_type) | |
+{ | |
+ need_update = true; | |
+} | |
+ | |
+Integrator::~Integrator() | |
+{ | |
+} | |
+ | |
+void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene) | |
+{ | |
+ if (!need_update) | |
+ return; | |
+ | |
+ device_free(device, dscene); | |
+ | |
+ KernelIntegrator *kintegrator = &dscene->data.integrator; | |
+ | |
+ /* integrator parameters */ | |
+ kintegrator->min_bounce = min_bounce + 1; | |
+ kintegrator->max_bounce = max_bounce + 1; | |
+ | |
+ kintegrator->max_diffuse_bounce = max_diffuse_bounce + 1; | |
+ kintegrator->max_glossy_bounce = max_glossy_bounce + 1; | |
+ kintegrator->max_transmission_bounce = max_transmission_bounce + 1; | |
+ kintegrator->max_volume_bounce = max_volume_bounce + 1; | |
+ | |
+ kintegrator->transparent_min_bounce = transparent_min_bounce + 1; | |
+ kintegrator->transparent_max_bounce = transparent_max_bounce + 1; | |
+ | |
+ if (ao_bounces == 0) { | |
+ kintegrator->ao_bounces = INT_MAX; | |
+ } | |
+ else { | |
+ kintegrator->ao_bounces = ao_bounces - 1; | |
+ } | |
+ | |
+ /* Transparent Shadows | |
+ * We only need to enable transparent shadows, if we actually have | |
+ * transparent shaders in the scene. Otherwise we can disable it | |
+ * to improve performance a bit. */ | |
+ kintegrator->transparent_shadows = false; | |
+ foreach (Shader *shader, scene->shaders) { | |
+ /* keep this in sync with SD_HAS_TRANSPARENT_SHADOW in shader.cpp */ | |
+ if ((shader->has_surface_transparent && shader->use_transparent_shadow) || | |
+ shader->has_volume) { | |
+ kintegrator->transparent_shadows = true; | |
+ break; | |
+ } | |
+ } | |
+ | |
+ kintegrator->volume_max_steps = volume_max_steps; | |
+ kintegrator->volume_step_size = volume_step_size; | |
+ | |
+ kintegrator->caustics_reflective = caustics_reflective; | |
+ kintegrator->caustics_refractive = caustics_refractive; | |
+ kintegrator->filter_glossy = (filter_glossy == 0.0f) ? FLT_MAX : 1.0f / filter_glossy; | |
+ | |
+ kintegrator->seed = hash_uint2(seed, 0); | |
+ | |
+ kintegrator->use_ambient_occlusion = ((Pass::contains(scene->film->passes, PASS_AO)) || | |
+ dscene->data.background.ao_factor != 0.0f); | |
+ | |
+ kintegrator->sample_clamp_direct = (sample_clamp_direct == 0.0f) ? FLT_MAX : | |
+ sample_clamp_direct * 3.0f; | |
+ kintegrator->sample_clamp_indirect = (sample_clamp_indirect == 0.0f) ? | |
+ FLT_MAX : | |
+ sample_clamp_indirect * 3.0f; | |
+ | |
+ kintegrator->branched = (method == BRANCHED_PATH); | |
+ kintegrator->volume_decoupled = device->info.has_volume_decoupled; | |
+ kintegrator->diffuse_samples = diffuse_samples; | |
+ kintegrator->glossy_samples = glossy_samples; | |
+ kintegrator->transmission_samples = transmission_samples; | |
+ kintegrator->ao_samples = ao_samples; | |
+ kintegrator->mesh_light_samples = mesh_light_samples; | |
+ kintegrator->subsurface_samples = subsurface_samples; | |
+ kintegrator->volume_samples = volume_samples; | |
+ kintegrator->start_sample = start_sample; | |
+ | |
+ if (method == BRANCHED_PATH) { | |
+ kintegrator->sample_all_lights_direct = sample_all_lights_direct; | |
+ kintegrator->sample_all_lights_indirect = sample_all_lights_indirect; | |
+ } | |
+ else { | |
+ kintegrator->sample_all_lights_direct = false; | |
+ kintegrator->sample_all_lights_indirect = false; | |
+ } | |
+ | |
+ kintegrator->sampling_pattern = sampling_pattern; | |
+ kintegrator->aa_samples = aa_samples; | |
+ | |
+ if (light_sampling_threshold > 0.0f) { | |
+ kintegrator->light_inv_rr_threshold = 1.0f / light_sampling_threshold; | |
+ } | |
+ else { | |
+ kintegrator->light_inv_rr_threshold = 0.0f; | |
+ } | |
+ | |
+ /* sobol directions table */ | |
+ int max_samples = 1; | |
+ | |
+ if (method == BRANCHED_PATH) { | |
+ foreach (Light *light, scene->lights) | |
+ max_samples = max(max_samples, light->samples); | |
+ | |
+ max_samples = max(max_samples, | |
+ max(diffuse_samples, max(glossy_samples, transmission_samples))); | |
+ max_samples = max(max_samples, max(ao_samples, max(mesh_light_samples, subsurface_samples))); | |
+ max_samples = max(max_samples, volume_samples); | |
+ } | |
+ | |
+ uint total_bounces = max_bounce + transparent_max_bounce + 3 + VOLUME_BOUNDS_MAX + | |
+ max(BSSRDF_MAX_HITS, BSSRDF_MAX_BOUNCES); | |
+ | |
+ max_samples *= total_bounces; | |
+ | |
+ int dimensions = PRNG_BASE_NUM + max_samples * PRNG_BOUNCE_NUM; | |
+ dimensions = min(dimensions, SOBOL_MAX_DIMENSIONS); | |
+ | |
+ uint *directions = dscene->sobol_directions.alloc(SOBOL_BITS * dimensions); | |
+ | |
+ sobol_generate_direction_vectors((uint(*)[SOBOL_BITS])directions, dimensions); | |
+ | |
+ dscene->sobol_directions.copy_to_device(); | |
+ | |
+ need_update = false; | |
+} | |
+ | |
+void Integrator::device_free(Device *, DeviceScene *dscene) | |
+{ | |
+ dscene->sobol_directions.free(); | |
+} | |
+ | |
+bool Integrator::modified(const Integrator &integrator) | |
+{ | |
+ return !Node::equals(integrator); | |
+} | |
+ | |
+void Integrator::tag_update(Scene *scene) | |
+{ | |
+ foreach (Shader *shader, scene->shaders) { | |
+ if (shader->has_integrator_dependency) { | |
+ scene->shader_manager->need_update = true; | |
+ break; | |
+ } | |
+ } | |
+ need_update = true; | |
+} | |
+ | |
+CCL_NAMESPACE_END | |
diff -Naur a/intern/cycles/render/integrator.h b/intern/cycles/render/integrator.h | |
--- a/intern/cycles/render/integrator.h 2020-01-10 20:37:06.000000000 +0300 | |
+++ b/intern/cycles/render/integrator.h 2020-01-10 20:42:43.470923389 +0300 | |
@@ -75,6 +75,9 @@ | |
bool sample_all_lights_indirect; | |
float light_sampling_threshold; | |
+ int adaptive_min_samples; | |
+ float adaptive_threshold; | |
+ | |
enum Method { | |
BRANCHED_PATH = 0, | |
PATH = 1, | |
diff -Naur a/intern/cycles/render/integrator.h.orig b/intern/cycles/render/integrator.h.orig | |
--- a/intern/cycles/render/integrator.h.orig 1970-01-01 03:00:00.000000000 +0300 | |
+++ b/intern/cycles/render/integrator.h.orig 2020-01-10 20:37:06.000000000 +0300 | |
@@ -0,0 +1,103 @@ | |
+/* | |
+ * Copyright 2011-2013 Blender Foundation | |
+ * | |
+ * Licensed under the Apache License, Version 2.0 (the "License"); | |
+ * you may not use this file except in compliance with the License. | |
+ * You may obtain a copy of the License at | |
+ * | |
+ * http://www.apache.org/licenses/LICENSE-2.0 | |
+ * | |
+ * Unless required by applicable law or agreed to in writing, software | |
+ * distributed under the License is distributed on an "AS IS" BASIS, | |
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
+ * See the License for the specific language governing permissions and | |
+ * limitations under the License. | |
+ */ | |
+ | |
+#ifndef __INTEGRATOR_H__ | |
+#define __INTEGRATOR_H__ | |
+ | |
+#include "kernel/kernel_types.h" | |
+ | |
+#include "graph/node.h" | |
+ | |
+CCL_NAMESPACE_BEGIN | |
+ | |
+class Device; | |
+class DeviceScene; | |
+class Scene; | |
+ | |
+class Integrator : public Node { | |
+ public: | |
+ NODE_DECLARE | |
+ | |
+ int min_bounce; | |
+ int max_bounce; | |
+ | |
+ int max_diffuse_bounce; | |
+ int max_glossy_bounce; | |
+ int max_transmission_bounce; | |
+ int max_volume_bounce; | |
+ | |
+ int transparent_min_bounce; | |
+ int transparent_max_bounce; | |
+ | |
+ int ao_bounces; | |
+ | |
+ int volume_max_steps; | |
+ float volume_step_size; | |
+ | |
+ bool caustics_reflective; | |
+ bool caustics_refractive; | |
+ float filter_glossy; | |
+ | |
+ int seed; | |
+ | |
+ float sample_clamp_direct; | |
+ float sample_clamp_indirect; | |
+ bool motion_blur; | |
+ | |
+ /* Maximum number of samples, beyond which we are likely to run into | |
+ * precision issues for sampling patterns. */ | |
+ static const int MAX_SAMPLES = (1 << 24); | |
+ | |
+ int aa_samples; | |
+ int diffuse_samples; | |
+ int glossy_samples; | |
+ int transmission_samples; | |
+ int ao_samples; | |
+ int mesh_light_samples; | |
+ int subsurface_samples; | |
+ int volume_samples; | |
+ int start_sample; | |
+ | |
+ bool sample_all_lights_direct; | |
+ bool sample_all_lights_indirect; | |
+ float light_sampling_threshold; | |
+ | |
+ enum Method { | |
+ BRANCHED_PATH = 0, | |
+ PATH = 1, | |
+ | |
+ NUM_METHODS, | |
+ }; | |
+ | |
+ Method method; | |
+ | |
+ SamplingPattern sampling_pattern; | |
+ | |
+ bool need_update; | |
+ | |
+ Integrator(); | |
+ ~Integrator(); | |
+ | |
+ void device_update(Device *device, DeviceScene *dscene, Scene *scene); | |
+ void device_free(Device *device, DeviceScene *dscene); | |
+ | |
+ bool modified(const Integrator &integrator); | |
+ void tag_update(Scene *scene); | |
+}; | |
+ | |
+CCL_NAMESPACE_END | |
+ | |
+#endif /* __INTEGRATOR_H__ */ | |
diff -Naur a/intern/cycles/render/jitter.cpp b/intern/cycles/render/jitter.cpp | |
--- a/intern/cycles/render/jitter.cpp 1970-01-01 03:00:00.000000000 +0300 | |
+++ b/intern/cycles/render/jitter.cpp 2020-01-10 20:42:43.470923389 +0300 | |
@@ -0,0 +1,287 @@ | |
+/* | |
+ * Copyright 2019 Blender Foundation | |
+ * | |
+ * Licensed under the Apache License, Version 2.0 (the "License"); | |
+ * you may not use this file except in compliance with the License. | |
+ * You may obtain a copy of the License at | |
+ * | |
+ * http://www.apache.org/licenses/LICENSE-2.0 | |
+ * | |
+ * Unless required by applicable law or agreed to in writing, software | |
+ * distributed under the License is distributed on an "AS IS" BASIS, | |
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
+ * See the License for the specific language governing permissions and | |
+ * limitations under the License. | |
+ */ | |
+ | |
+/* This file is based on "Progressive Multi-Jittered Sample Sequences" | |
+ * by Per Christensen, Andrew Kensler and Charlie Kilpatrick. | |
+ * http://graphics.pixar.com/library/ProgressiveMultiJitteredSampling/paper.pdf | |
+ * | |
+ * Performance can be improved in the future by implementing the new | |
+ * algorithm from Matt Pharr in http://jcgt.org/published/0008/01/04/ | |
+ * "Efficient Generation of Points that Satisfy Two-Dimensional Elementary Intervals" | |
+ */ | |
+ | |
+#include "render/jitter.h" | |
+ | |
+#include <math.h> | |
+#include <vector> | |
+ | |
+CCL_NAMESPACE_BEGIN | |
+ | |
+static uint cmj_hash(uint i, uint p) | |
+{ | |
+ i ^= p; | |
+ i ^= i >> 17; | |
+ i ^= i >> 10; | |
+ i *= 0xb36534e5; | |
+ i ^= i >> 12; | |
+ i ^= i >> 21; | |
+ i *= 0x93fc4795; | |
+ i ^= 0xdf6e307f; | |
+ i ^= i >> 17; | |
+ i *= 1 | p >> 18; | |
+ | |
+ return i; | |
+} | |
+ | |
+static float cmj_randfloat(uint i, uint p) | |
+{ | |
+ return cmj_hash(i, p) * (1.0f / 4294967808.0f); | |
+} | |
+ | |
+class PMJ_Generator { | |
+ public: | |
+ static void generate_2D(float2 points[], int size, int rng_seed_in) | |
+ { | |
+ PMJ_Generator g(rng_seed_in); | |
+ points[0].x = g.rnd(); | |
+ points[0].y = g.rnd(); | |
+ int N = 1; | |
+ while (N < size) { | |
+ g.extend_sequence_even(points, N); | |
+ g.extend_sequence_odd(points, 2 * N); | |
+ N = 4 * N; | |
+ } | |
+ } | |
+ | |
+ protected: | |
+ PMJ_Generator(int rnd_seed_in) : num_samples(1), rnd_index(2), rnd_seed(rnd_seed_in) | |
+ { | |
+ } | |
+ | |
+ float rnd() | |
+ { | |
+ return cmj_randfloat(++rnd_index, rnd_seed); | |
+ } | |
+ | |
+ virtual void mark_occupied_strata(float2 points[], int N) | |
+ { | |
+ int NN = 2 * N; | |
+ for (int s = 0; s < NN; ++s) { | |
+ occupied1Dx[s] = occupied1Dy[s] = false; | |
+ } | |
+ for (int s = 0; s < N; ++s) { | |
+ int xstratum = (int)(NN * points[s].x); | |
+ int ystratum = (int)(NN * points[s].y); | |
+ occupied1Dx[xstratum] = true; | |
+ occupied1Dy[ystratum] = true; | |
+ } | |
+ } | |
+ | |
+ virtual void generate_sample_point( | |
+ float2 points[], float i, float j, float xhalf, float yhalf, int n, int N) | |
+ { | |
+ int NN = 2 * N; | |
+ float2 pt; | |
+ int xstratum, ystratum; | |
+ do { | |
+ pt.x = (i + 0.5f * (xhalf + rnd())) / n; | |
+ xstratum = (int)(NN * pt.x); | |
+ } while (occupied1Dx[xstratum]); | |
+ do { | |
+ pt.y = (j + 0.5f * (yhalf + rnd())) / n; | |
+ ystratum = (int)(NN * pt.y); | |
+ } while (occupied1Dy[ystratum]); | |
+ occupied1Dx[xstratum] = true; | |
+ occupied1Dy[ystratum] = true; | |
+ points[num_samples] = pt; | |
+ ++num_samples; | |
+ } | |
+ | |
+ void extend_sequence_even(float2 points[], int N) | |
+ { | |
+ int n = (int)sqrtf(N); | |
+ occupied1Dx.resize(2 * N); | |
+ occupied1Dy.resize(2 * N); | |
+ mark_occupied_strata(points, N); | |
+ for (int s = 0; s < N; ++s) { | |
+ float2 oldpt = points[s]; | |
+ float i = floorf(n * oldpt.x); | |
+ float j = floorf(n * oldpt.y); | |
+ float xhalf = floorf(2.0f * (n * oldpt.x - i)); | |
+ float yhalf = floorf(2.0f * (n * oldpt.y - j)); | |
+ xhalf = 1.0f - xhalf; | |
+ yhalf = 1.0f - yhalf; | |
+ generate_sample_point(points, i, j, xhalf, yhalf, n, N); | |
+ } | |
+ } | |
+ | |
+ void extend_sequence_odd(float2 points[], int N) | |
+ { | |
+ int n = (int)sqrtf(N / 2); | |
+ occupied1Dx.resize(2 * N); | |
+ occupied1Dy.resize(2 * N); | |
+ mark_occupied_strata(points, N); | |
+ std::vector<float> xhalves(N / 2); | |
+ std::vector<float> yhalves(N / 2); | |
+ for (int s = 0; s < N / 2; ++s) { | |
+ float2 oldpt = points[s]; | |
+ float i = floorf(n * oldpt.x); | |
+ float j = floorf(n * oldpt.y); | |
+ float xhalf = floorf(2.0f * (n * oldpt.x - i)); | |
+ float yhalf = floorf(2.0f * (n * oldpt.y - j)); | |
+ if (rnd() > 0.5f) { | |
+ xhalf = 1.0f - xhalf; | |
+ } | |
+ else { | |
+ yhalf = 1.0f - yhalf; | |
+ } | |
+ xhalves[s] = xhalf; | |
+ yhalves[s] = yhalf; | |
+ generate_sample_point(points, i, j, xhalf, yhalf, n, N); | |
+ } | |
+ for (int s = 0; s < N / 2; ++s) { | |
+ float2 oldpt = points[s]; | |
+ float i = floorf(n * oldpt.x); | |
+ float j = floorf(n * oldpt.y); | |
+ float xhalf = 1.0f - xhalves[s]; | |
+ float yhalf = 1.0f - yhalves[s]; | |
+ generate_sample_point(points, i, j, xhalf, yhalf, n, N); | |
+ } | |
+ } | |
+ | |
+ std::vector<bool> occupied1Dx, occupied1Dy; | |
+ int num_samples; | |
+ int rnd_index, rnd_seed; | |
+}; | |
+ | |
+class PMJ02_Generator : public PMJ_Generator { | |
+ protected: | |
+ void generate_sample_point( | |
+ float2 points[], float i, float j, float xhalf, float yhalf, int n, int N) override | |
+ { | |
+ int NN = 2 * N; | |
+ float2 pt; | |
+ do { | |
+ pt.x = (i + 0.5f * (xhalf + rnd())) / n; | |
+ pt.y = (j + 0.5f * (yhalf + rnd())) / n; | |
+ } while (is_occupied(pt, NN)); | |
+ mark_occupied_strata1(pt, NN); | |
+ points[num_samples] = pt; | |
+ ++num_samples; | |
+ } | |
+ | |
+ void mark_occupied_strata(float2 points[], int N) override | |
+ { | |
+ int NN = 2 * N; | |
+ int num_shapes = (int)log2f(NN) + 1; | |
+ occupiedStrata.resize(num_shapes); | |
+ for (int shape = 0; shape < num_shapes; ++shape) { | |
+ occupiedStrata[shape].resize(NN); | |
+ for (int n = 0; n < NN; ++n) { | |
+ occupiedStrata[shape][n] = false; | |
+ } | |
+ } | |
+ for (int s = 0; s < N; ++s) { | |
+ mark_occupied_strata1(points[s], NN); | |
+ } | |
+ } | |
+ | |
+ void mark_occupied_strata1(float2 pt, int NN) | |
+ { | |
+ int shape = 0; | |
+ int xdivs = NN; | |
+ int ydivs = 1; | |
+ do { | |
+ int xstratum = (int)(xdivs * pt.x); | |
+ int ystratum = (int)(ydivs * pt.y); | |
+ size_t index = ystratum * xdivs + xstratum; | |
+ assert(index < NN); | |
+ occupiedStrata[shape][index] = true; | |
+ shape = shape + 1; | |
+ xdivs = xdivs / 2; | |
+ ydivs = ydivs * 2; | |
+ } while (xdivs > 0); | |
+ } | |
+ | |
+ bool is_occupied(float2 pt, int NN) | |
+ { | |
+ int shape = 0; | |
+ int xdivs = NN; | |
+ int ydivs = 1; | |
+ do { | |
+ int xstratum = (int)(xdivs * pt.x); | |
+ int ystratum = (int)(ydivs * pt.y); | |
+ size_t index = ystratum * xdivs + xstratum; | |
+ assert(index < NN); | |
+ if (occupiedStrata[shape][index]) { | |
+ return true; | |
+ } | |
+ shape = shape + 1; | |
+ xdivs = xdivs / 2; | |
+ ydivs = ydivs * 2; | |
+ } while (xdivs > 0); | |
+ return false; | |
+ } | |
+ | |
+ private: | |
+ std::vector<std::vector<bool>> occupiedStrata; | |
+}; | |
+ | |
+static void shuffle(float2 points[], int size, int rng_seed) | |
+{ | |
+ /* Offset samples by 1.0 for faster scrambling in kernel_random.h */ | |
+ for (int i = 0; i < size; ++i) { | |
+ points[i].x += 1.0f; | |
+ points[i].y += 1.0f; | |
+ } | |
+ | |
+ if (rng_seed == 0) { | |
+ return; | |
+ } | |
+ | |
+ constexpr int odd[8] = {0, 1, 4, 5, 10, 11, 14, 15}; | |
+ constexpr int even[8] = {2, 3, 6, 7, 8, 9, 12, 13}; | |
+ | |
+ int rng_index = 0; | |
+ for (int yy = 0; yy < size / 16; ++yy) { | |
+ for (int xx = 0; xx < 8; ++xx) { | |
+ int other = (int)(cmj_randfloat(++rng_index, rng_seed) * (8.0f - xx) + xx); | |
+ float2 tmp = points[odd[other] + yy * 16]; | |
+ points[odd[other] + yy * 16] = points[odd[xx] + yy * 16]; | |
+ points[odd[xx] + yy * 16] = tmp; | |
+ } | |
+ for (int xx = 0; xx < 8; ++xx) { | |
+ int other = (int)(cmj_randfloat(++rng_index, rng_seed) * (8.0f - xx) + xx); | |
+ float2 tmp = points[even[other] + yy * 16]; | |
+ points[even[other] + yy * 16] = points[even[xx] + yy * 16]; | |
+ points[even[xx] + yy * 16] = tmp; | |
+ } | |
+ } | |
+} | |
+ | |
+void progressive_multi_jitter_generate_2D(float2 points[], int size, int rng_seed) | |
+{ | |
+ PMJ_Generator::generate_2D(points, size, rng_seed); | |
+ shuffle(points, size, rng_seed); | |
+} | |
+ | |
+void progressive_multi_jitter_02_generate_2D(float2 points[], int size, int rng_seed) | |
+{ | |
+ PMJ02_Generator::generate_2D(points, size, rng_seed); | |
+ shuffle(points, size, rng_seed); | |
+} | |
+ | |
+CCL_NAMESPACE_END | |
diff -Naur a/intern/cycles/render/jitter.h b/intern/cycles/render/jitter.h | |
--- a/intern/cycles/render/jitter.h 1970-01-01 03:00:00.000000000 +0300 | |
+++ b/intern/cycles/render/jitter.h 2020-01-10 20:42:43.470923389 +0300 | |
@@ -0,0 +1,29 @@ | |
+/* | |
+ * Copyright 2019 Blender Foundation | |
+ * | |
+ * Licensed under the Apache License, Version 2.0 (the "License"); | |
+ * you may not use this file except in compliance with the License. | |
+ * You may obtain a copy of the License at | |
+ * | |
+ * http://www.apache.org/licenses/LICENSE-2.0 | |
+ * | |
+ * Unless required by applicable law or agreed to in writing, software | |
+ * distributed under the License is distributed on an "AS IS" BASIS, | |
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
+ * See the License for the specific language governing permissions and | |
+ * limitations under the License. | |
+ */ | |
+ | |
+#ifndef __JITTER_H__ | |
+#define __JITTER_H__ | |
+ | |
+#include "util/util_types.h" | |
+ | |
+CCL_NAMESPACE_BEGIN | |
+ | |
+void progressive_multi_jitter_generate_2D(float2 points[], int size, int rng_seed); | |
+void progressive_multi_jitter_02_generate_2D(float2 points[], int size, int rng_seed); | |
+ | |
+CCL_NAMESPACE_END | |
+ | |
+#endif /* __JITTER_H__ */ | |
diff -Naur a/intern/cycles/render/scene.cpp b/intern/cycles/render/scene.cpp | |
--- a/intern/cycles/render/scene.cpp 2020-01-10 20:37:06.000000000 +0300 | |
+++ b/intern/cycles/render/scene.cpp 2020-01-10 20:42:43.474256721 +0300 | |
@@ -77,7 +77,7 @@ | |
svm_nodes(device, "__svm_nodes", MEM_TEXTURE), | |
shaders(device, "__shaders", MEM_TEXTURE), | |
lookup_table(device, "__lookup_table", MEM_TEXTURE), | |
- sobol_directions(device, "__sobol_directions", MEM_TEXTURE), | |
+ sample_pattern_lut(device, "__sample_pattern_lut", MEM_TEXTURE), | |
ies_lights(device, "__ies", MEM_TEXTURE) | |
{ | |
memset((void *)&data, 0, sizeof(data)); | |
diff -Naur a/intern/cycles/render/scene.h b/intern/cycles/render/scene.h | |
--- a/intern/cycles/render/scene.h 2020-01-10 20:37:06.000000000 +0300 | |
+++ b/intern/cycles/render/scene.h 2020-01-10 20:42:43.474256721 +0300 | |
@@ -119,7 +119,7 @@ | |
device_vector<float> lookup_table; | |
/* integrator */ | |
- device_vector<uint> sobol_directions; | |
+ device_vector<uint> sample_pattern_lut; | |
/* ies lights */ | |
device_vector<float> ies_lights; | |
diff -Naur a/intern/cycles/render/session.cpp b/intern/cycles/render/session.cpp | |
--- a/intern/cycles/render/session.cpp 2020-01-10 20:37:06.000000000 +0300 | |
+++ b/intern/cycles/render/session.cpp 2020-01-10 20:42:43.474256721 +0300 | |
@@ -900,7 +900,7 @@ | |
Integrator *integrator = scene->integrator; | |
BakeManager *bake_manager = scene->bake_manager; | |
- if (integrator->sampling_pattern == SAMPLING_PATTERN_CMJ || bake_manager->get_baking()) { | |
+ if (integrator->sampling_pattern != SAMPLING_PATTERN_SOBOL || bake_manager->get_baking()) { | |
int aa_samples = tile_manager.num_samples; | |
if (aa_samples != integrator->aa_samples) { | |
@@ -1022,6 +1022,7 @@ | |
task.update_progress_sample = function_bind(&Progress::add_samples, &this->progress, _1, _2); | |
task.need_finish_queue = params.progressive_refine; | |
task.integrator_branched = scene->integrator->method == Integrator::BRANCHED_PATH; | |
+ task.integrator_adaptive = scene->integrator->sampling_pattern == SAMPLING_PATTERN_PMJ; | |
task.requested_tile_size = params.tile_size; | |
task.passes_size = tile_manager.params.get_passes_size(); | |
diff -Naur a/intern/cycles/render/session.cpp.orig b/intern/cycles/render/session.cpp.orig | |
--- a/intern/cycles/render/session.cpp.orig 1970-01-01 03:00:00.000000000 +0300 | |
+++ b/intern/cycles/render/session.cpp.orig 2020-01-10 20:37:06.000000000 +0300 | |
@@ -0,0 +1,1163 @@ | |
+/* | |
+ * Copyright 2011-2013 Blender Foundation | |
+ * | |
+ * Licensed under the Apache License, Version 2.0 (the "License"); | |
+ * you may not use this file except in compliance with the License. | |
+ * You may obtain a copy of the License at | |
+ * | |
+ * http://www.apache.org/licenses/LICENSE-2.0 | |
+ * | |
+ * Unless required by applicable law or agreed to in writing, software | |
+ * distributed under the License is distributed on an "AS IS" BASIS, | |
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
+ * See the License for the specific language governing permissions and | |
+ * limitations under the License. | |
+ */ | |
+ | |
+#include <string.h> | |
+#include <limits.h> | |
+ | |
+#include "render/buffers.h" | |
+#include "render/camera.h" | |
+#include "device/device.h" | |
+#include "render/graph.h" | |
+#include "render/integrator.h" | |
+#include "render/light.h" | |
+#include "render/mesh.h" | |
+#include "render/object.h" | |
+#include "render/scene.h" | |
+#include "render/session.h" | |
+#include "render/bake.h" | |
+ | |
+#include "util/util_foreach.h" | |
+#include "util/util_function.h" | |
+#include "util/util_logging.h" | |
+#include "util/util_math.h" | |
+#include "util/util_opengl.h" | |
+#include "util/util_task.h" | |
+#include "util/util_time.h" | |
+ | |
+CCL_NAMESPACE_BEGIN | |
+ | |
+/* Note about preserve_tile_device option for tile manager: | |
+ * progressive refine and viewport rendering does requires tiles to | |
+ * always be allocated for the same device | |
+ */ | |
+Session::Session(const SessionParams ¶ms_) | |
+ : params(params_), | |
+ tile_manager(params.progressive, | |
+ params.samples, | |
+ params.tile_size, | |
+ params.start_resolution, | |
+ params.background == false || params.progressive_refine, | |
+ params.background, | |
+ params.tile_order, | |
+ max(params.device.multi_devices.size(), 1), | |
+ params.pixel_size), | |
+ stats(), | |
+ profiler() | |
+{ | |
+ device_use_gl = ((params.device.type != DEVICE_CPU) && !params.background); | |
+ | |
+ TaskScheduler::init(params.threads); | |
+ | |
+ device = Device::create(params.device, stats, profiler, params.background); | |
+ | |
+ if (params.background && !params.write_render_cb) { | |
+ buffers = NULL; | |
+ display = NULL; | |
+ } | |
+ else { | |
+ buffers = new RenderBuffers(device); | |
+ display = new DisplayBuffer(device, params.display_buffer_linear); | |
+ } | |
+ | |
+ session_thread = NULL; | |
+ scene = NULL; | |
+ | |
+ reset_time = 0.0; | |
+ last_update_time = 0.0; | |
+ | |
+ delayed_reset.do_reset = false; | |
+ delayed_reset.samples = 0; | |
+ | |
+ display_outdated = false; | |
+ gpu_draw_ready = false; | |
+ gpu_need_display_buffer_update = false; | |
+ pause = false; | |
+ kernels_loaded = false; | |
+ | |
+ /* TODO(sergey): Check if it's indeed optimal value for the split kernel. */ | |
+ max_closure_global = 1; | |
+} | |
+ | |
+Session::~Session() | |
+{ | |
+ if (session_thread) { | |
+ /* wait for session thread to end */ | |
+ progress.set_cancel("Exiting"); | |
+ | |
+ gpu_need_display_buffer_update = false; | |
+ gpu_need_display_buffer_update_cond.notify_all(); | |
+ | |
+ { | |
+ thread_scoped_lock pause_lock(pause_mutex); | |
+ pause = false; | |
+ } | |
+ pause_cond.notify_all(); | |
+ | |
+ wait(); | |
+ } | |
+ | |
+ if (params.write_render_cb) { | |
+ /* Copy to display buffer and write out image if requested */ | |
+ delete display; | |
+ | |
+ display = new DisplayBuffer(device, false); | |
+ display->reset(buffers->params); | |
+ copy_to_display_buffer(params.samples); | |
+ | |
+ int w = display->draw_width; | |
+ int h = display->draw_height; | |
+ uchar4 *pixels = display->rgba_byte.copy_from_device(0, w, h); | |
+ params.write_render_cb((uchar *)pixels, w, h, 4); | |
+ } | |
+ | |
+ /* clean up */ | |
+ tile_manager.device_free(); | |
+ | |
+ delete buffers; | |
+ delete display; | |
+ delete scene; | |
+ delete device; | |
+ | |
+ TaskScheduler::exit(); | |
+} | |
+ | |
+void Session::start() | |
+{ | |
+ if (!session_thread) { | |
+ session_thread = new thread(function_bind(&Session::run, this)); | |
+ } | |
+} | |
+ | |
+bool Session::ready_to_reset() | |
+{ | |
+ double dt = time_dt() - reset_time; | |
+ | |
+ if (!display_outdated) | |
+ return (dt > params.reset_timeout); | |
+ else | |
+ return (dt > params.cancel_timeout); | |
+} | |
+ | |
+/* GPU Session */ | |
+ | |
+void Session::reset_gpu(BufferParams &buffer_params, int samples) | |
+{ | |
+ thread_scoped_lock pause_lock(pause_mutex); | |
+ | |
+ /* block for buffer access and reset immediately. we can't do this | |
+ * in the thread, because we need to allocate an OpenGL buffer, and | |
+ * that only works in the main thread */ | |
+ thread_scoped_lock display_lock(display_mutex); | |
+ thread_scoped_lock buffers_lock(buffers_mutex); | |
+ | |
+ display_outdated = true; | |
+ reset_time = time_dt(); | |
+ | |
+ reset_(buffer_params, samples); | |
+ | |
+ gpu_need_display_buffer_update = false; | |
+ gpu_need_display_buffer_update_cond.notify_all(); | |
+ | |
+ pause_cond.notify_all(); | |
+} | |
+ | |
+bool Session::draw_gpu(BufferParams &buffer_params, DeviceDrawParams &draw_params) | |
+{ | |
+ /* block for buffer access */ | |
+ thread_scoped_lock display_lock(display_mutex); | |
+ | |
+ /* first check we already rendered something */ | |
+ if (gpu_draw_ready) { | |
+ /* then verify the buffers have the expected size, so we don't | |
+ * draw previous results in a resized window */ | |
+ if (!buffer_params.modified(display->params)) { | |
+ /* for CUDA we need to do tone-mapping still, since we can | |
+ * only access GL buffers from the main thread. */ | |
+ if (gpu_need_display_buffer_update) { | |
+ thread_scoped_lock buffers_lock(buffers_mutex); | |
+ copy_to_display_buffer(tile_manager.state.sample); | |
+ gpu_need_display_buffer_update = false; | |
+ gpu_need_display_buffer_update_cond.notify_all(); | |
+ } | |
+ | |
+ display->draw(device, draw_params); | |
+ | |
+ if (display_outdated && (time_dt() - reset_time) > params.text_timeout) | |
+ return false; | |
+ | |
+ return true; | |
+ } | |
+ } | |
+ | |
+ return false; | |
+} | |
+ | |
+void Session::run_gpu() | |
+{ | |
+ bool tiles_written = false; | |
+ | |
+ reset_time = time_dt(); | |
+ last_update_time = time_dt(); | |
+ | |
+ progress.set_render_start_time(); | |
+ | |
+ while (!progress.get_cancel()) { | |
+ /* advance to next tile */ | |
+ bool no_tiles = !tile_manager.next(); | |
+ | |
+ DeviceKernelStatus kernel_state = DEVICE_KERNEL_UNKNOWN; | |
+ if (no_tiles) { | |
+ kernel_state = device->get_active_kernel_switch_state(); | |
+ } | |
+ | |
+ if (params.background) { | |
+ /* if no work left and in background mode, we can stop immediately */ | |
+ if (no_tiles) { | |
+ progress.set_status("Finished"); | |
+ break; | |
+ } | |
+ } | |
+ | |
+ /* Don't go in pause mode when image was rendered with preview kernels | |
+ * When feature kernels become available the session will be reset. */ | |
+ else if (no_tiles && kernel_state == DEVICE_KERNEL_WAITING_FOR_FEATURE_KERNEL) { | |
+ time_sleep(0.1); | |
+ } | |
+ else if (no_tiles && kernel_state == DEVICE_KERNEL_FEATURE_KERNEL_AVAILABLE) { | |
+ reset_gpu(tile_manager.params, params.samples); | |
+ } | |
+ | |
+ else { | |
+ /* if in interactive mode, and we are either paused or done for now, | |
+ * wait for pause condition notify to wake up again */ | |
+ thread_scoped_lock pause_lock(pause_mutex); | |
+ | |
+ if (!pause && !tile_manager.done()) { | |
+ /* reset could have happened after no_tiles was set, before this lock. | |
+ * in this case we shall not wait for pause condition | |
+ */ | |
+ } | |
+ else if (pause || no_tiles) { | |
+ update_status_time(pause, no_tiles); | |
+ | |
+ while (1) { | |
+ scoped_timer pause_timer; | |
+ pause_cond.wait(pause_lock); | |
+ if (pause) { | |
+ progress.add_skip_time(pause_timer, params.background); | |
+ } | |
+ | |
+ update_status_time(pause, no_tiles); | |
+ progress.set_update(); | |
+ | |
+ if (!pause) | |
+ break; | |
+ } | |
+ } | |
+ | |
+ if (progress.get_cancel()) | |
+ break; | |
+ } | |
+ | |
+ if (!no_tiles) { | |
+ /* update scene */ | |
+ scoped_timer update_timer; | |
+ if (update_scene()) { | |
+ profiler.reset(scene->shaders.size(), scene->objects.size()); | |
+ } | |
+ progress.add_skip_time(update_timer, params.background); | |
+ | |
+ if (!device->error_message().empty()) | |
+ progress.set_error(device->error_message()); | |
+ | |
+ if (progress.get_cancel()) | |
+ break; | |
+ | |
+ /* buffers mutex is locked entirely while rendering each | |
+ * sample, and released/reacquired on each iteration to allow | |
+ * reset and draw in between */ | |
+ thread_scoped_lock buffers_lock(buffers_mutex); | |
+ | |
+ /* update status and timing */ | |
+ update_status_time(); | |
+ | |
+ /* render */ | |
+ render(); | |
+ | |
+ device->task_wait(); | |
+ | |
+ if (!device->error_message().empty()) | |
+ progress.set_cancel(device->error_message()); | |
+ | |
+ /* update status and timing */ | |
+ update_status_time(); | |
+ | |
+ gpu_need_display_buffer_update = true; | |
+ gpu_draw_ready = true; | |
+ progress.set_update(); | |
+ | |
+ /* wait for until display buffer is updated */ | |
+ if (!params.background) { | |
+ while (gpu_need_display_buffer_update) { | |
+ if (progress.get_cancel()) | |
+ break; | |
+ | |
+ gpu_need_display_buffer_update_cond.wait(buffers_lock); | |
+ } | |
+ } | |
+ | |
+ if (!device->error_message().empty()) | |
+ progress.set_error(device->error_message()); | |
+ | |
+ tiles_written = update_progressive_refine(progress.get_cancel()); | |
+ | |
+ if (progress.get_cancel()) | |
+ break; | |
+ } | |
+ } | |
+ | |
+ if (!tiles_written) | |
+ update_progressive_refine(true); | |
+} | |
+ | |
+/* CPU Session */ | |
+ | |
+void Session::reset_cpu(BufferParams &buffer_params, int samples) | |
+{ | |
+ thread_scoped_lock reset_lock(delayed_reset.mutex); | |
+ thread_scoped_lock pause_lock(pause_mutex); | |
+ | |
+ display_outdated = true; | |
+ reset_time = time_dt(); | |
+ | |
+ delayed_reset.params = buffer_params; | |
+ delayed_reset.samples = samples; | |
+ delayed_reset.do_reset = true; | |
+ device->task_cancel(); | |
+ | |
+ pause_cond.notify_all(); | |
+} | |
+ | |
+bool Session::draw_cpu(BufferParams &buffer_params, DeviceDrawParams &draw_params) | |
+{ | |
+ thread_scoped_lock display_lock(display_mutex); | |
+ | |
+ /* first check we already rendered something */ | |
+ if (display->draw_ready()) { | |
+ /* then verify the buffers have the expected size, so we don't | |
+ * draw previous results in a resized window */ | |
+ if (!buffer_params.modified(display->params)) { | |
+ display->draw(device, draw_params); | |
+ | |
+ if (display_outdated && (time_dt() - reset_time) > params.text_timeout) | |
+ return false; | |
+ | |
+ return true; | |
+ } | |
+ } | |
+ | |
+ return false; | |
+} | |
+ | |
+bool Session::acquire_tile(Device *tile_device, RenderTile &rtile) | |
+{ | |
+ if (progress.get_cancel()) { | |
+ if (params.progressive_refine == false) { | |
+ /* for progressive refine current sample should be finished for all tiles */ | |
+ return false; | |
+ } | |
+ } | |
+ | |
+ thread_scoped_lock tile_lock(tile_mutex); | |
+ | |
+ /* get next tile from manager */ | |
+ Tile *tile; | |
+ int device_num = device->device_number(tile_device); | |
+ | |
+ if (!tile_manager.next_tile(tile, device_num)) | |
+ return false; | |
+ | |
+ /* fill render tile */ | |
+ rtile.x = tile_manager.state.buffer.full_x + tile->x; | |
+ rtile.y = tile_manager.state.buffer.full_y + tile->y; | |
+ rtile.w = tile->w; | |
+ rtile.h = tile->h; | |
+ rtile.start_sample = tile_manager.state.sample; | |
+ rtile.num_samples = tile_manager.state.num_samples; | |
+ rtile.resolution = tile_manager.state.resolution_divider; | |
+ rtile.tile_index = tile->index; | |
+ rtile.task = (tile->state == Tile::DENOISE) ? RenderTile::DENOISE : RenderTile::PATH_TRACE; | |
+ | |
+ tile_lock.unlock(); | |
+ | |
+ /* in case of a permanent buffer, return it, otherwise we will allocate | |
+ * a new temporary buffer */ | |
+ if (buffers) { | |
+ tile_manager.state.buffer.get_offset_stride(rtile.offset, rtile.stride); | |
+ | |
+ rtile.buffer = buffers->buffer.device_pointer; | |
+ rtile.buffers = buffers; | |
+ | |
+ device->map_tile(tile_device, rtile); | |
+ | |
+ return true; | |
+ } | |
+ | |
+ if (tile->buffers == NULL) { | |
+ /* fill buffer parameters */ | |
+ BufferParams buffer_params = tile_manager.params; | |
+ buffer_params.full_x = rtile.x; | |
+ buffer_params.full_y = rtile.y; | |
+ buffer_params.width = rtile.w; | |
+ buffer_params.height = rtile.h; | |
+ | |
+ /* allocate buffers */ | |
+ tile->buffers = new RenderBuffers(tile_device); | |
+ tile->buffers->reset(buffer_params); | |
+ } | |
+ | |
+ tile->buffers->params.get_offset_stride(rtile.offset, rtile.stride); | |
+ | |
+ rtile.buffer = tile->buffers->buffer.device_pointer; | |
+ rtile.buffers = tile->buffers; | |
+ rtile.sample = tile_manager.state.sample; | |
+ | |
+ /* this will tag tile as IN PROGRESS in blender-side render pipeline, | |
+ * which is needed to highlight currently rendering tile before first | |
+ * sample was processed for it | |
+ */ | |
+ update_tile_sample(rtile); | |
+ | |
+ return true; | |
+} | |
+ | |
+void Session::update_tile_sample(RenderTile &rtile) | |
+{ | |
+ thread_scoped_lock tile_lock(tile_mutex); | |
+ | |
+ if (update_render_tile_cb) { | |
+ if (params.progressive_refine == false) { | |
+ /* todo: optimize this by making it thread safe and removing lock */ | |
+ | |
+ update_render_tile_cb(rtile, true); | |
+ } | |
+ } | |
+ | |
+ update_status_time(); | |
+} | |
+ | |
+void Session::release_tile(RenderTile &rtile) | |
+{ | |
+ thread_scoped_lock tile_lock(tile_mutex); | |
+ | |
+ progress.add_finished_tile(rtile.task == RenderTile::DENOISE); | |
+ | |
+ bool delete_tile; | |
+ | |
+ if (tile_manager.finish_tile(rtile.tile_index, delete_tile)) { | |
+ if (write_render_tile_cb && params.progressive_refine == false) { | |
+ write_render_tile_cb(rtile); | |
+ } | |
+ | |
+ if (delete_tile) { | |
+ delete rtile.buffers; | |
+ tile_manager.state.tiles[rtile.tile_index].buffers = NULL; | |
+ } | |
+ } | |
+ else { | |
+ if (update_render_tile_cb && params.progressive_refine == false) { | |
+ update_render_tile_cb(rtile, false); | |
+ } | |
+ } | |
+ | |
+ update_status_time(); | |
+} | |
+ | |
+void Session::map_neighbor_tiles(RenderTile *tiles, Device *tile_device) | |
+{ | |
+ thread_scoped_lock tile_lock(tile_mutex); | |
+ | |
+ int center_idx = tiles[4].tile_index; | |
+ assert(tile_manager.state.tiles[center_idx].state == Tile::DENOISE); | |
+ BufferParams buffer_params = tile_manager.params; | |
+ int4 image_region = make_int4(buffer_params.full_x, | |
+ buffer_params.full_y, | |
+ buffer_params.full_x + buffer_params.width, | |
+ buffer_params.full_y + buffer_params.height); | |
+ | |
+ for (int dy = -1, i = 0; dy <= 1; dy++) { | |
+ for (int dx = -1; dx <= 1; dx++, i++) { | |
+ int px = tiles[4].x + dx * params.tile_size.x; | |
+ int py = tiles[4].y + dy * params.tile_size.y; | |
+ if (px >= image_region.x && py >= image_region.y && px < image_region.z && | |
+ py < image_region.w) { | |
+ int tile_index = center_idx + dy * tile_manager.state.tile_stride + dx; | |
+ Tile *tile = &tile_manager.state.tiles[tile_index]; | |
+ assert(tile->buffers); | |
+ | |
+ tiles[i].buffer = tile->buffers->buffer.device_pointer; | |
+ tiles[i].x = tile_manager.state.buffer.full_x + tile->x; | |
+ tiles[i].y = tile_manager.state.buffer.full_y + tile->y; | |
+ tiles[i].w = tile->w; | |
+ tiles[i].h = tile->h; | |
+ tiles[i].buffers = tile->buffers; | |
+ | |
+ tile->buffers->params.get_offset_stride(tiles[i].offset, tiles[i].stride); | |
+ } | |
+ else { | |
+ tiles[i].buffer = (device_ptr)NULL; | |
+ tiles[i].buffers = NULL; | |
+ tiles[i].x = clamp(px, image_region.x, image_region.z); | |
+ tiles[i].y = clamp(py, image_region.y, image_region.w); | |
+ tiles[i].w = tiles[i].h = 0; | |
+ } | |
+ } | |
+ } | |
+ | |
+ assert(tiles[4].buffers); | |
+ device->map_neighbor_tiles(tile_device, tiles); | |
+ | |
+ /* The denoised result is written back to the original tile. */ | |
+ tiles[9] = tiles[4]; | |
+} | |
+ | |
+void Session::unmap_neighbor_tiles(RenderTile *tiles, Device *tile_device) | |
+{ | |
+ thread_scoped_lock tile_lock(tile_mutex); | |
+ device->unmap_neighbor_tiles(tile_device, tiles); | |
+} | |
+ | |
+void Session::run_cpu() | |
+{ | |
+ bool tiles_written = false; | |
+ | |
+ last_update_time = time_dt(); | |
+ | |
+ { | |
+ /* reset once to start */ | |
+ thread_scoped_lock reset_lock(delayed_reset.mutex); | |
+ thread_scoped_lock buffers_lock(buffers_mutex); | |
+ thread_scoped_lock display_lock(display_mutex); | |
+ | |
+ reset_(delayed_reset.params, delayed_reset.samples); | |
+ delayed_reset.do_reset = false; | |
+ } | |
+ | |
+ while (!progress.get_cancel()) { | |
+ /* advance to next tile */ | |
+ bool no_tiles = !tile_manager.next(); | |
+ bool need_copy_to_display_buffer = false; | |
+ | |
+ DeviceKernelStatus kernel_state = DEVICE_KERNEL_UNKNOWN; | |
+ if (no_tiles) { | |
+ kernel_state = device->get_active_kernel_switch_state(); | |
+ } | |
+ | |
+ if (params.background) { | |
+ /* if no work left and in background mode, we can stop immediately */ | |
+ if (no_tiles) { | |
+ progress.set_status("Finished"); | |
+ break; | |
+ } | |
+ } | |
+ | |
+ /* Don't go in pause mode when preview kernels are used | |
+ * When feature kernels become available the session will be resetted. */ | |
+ else if (no_tiles && kernel_state == DEVICE_KERNEL_WAITING_FOR_FEATURE_KERNEL) { | |
+ time_sleep(0.1); | |
+ } | |
+ else if (no_tiles && kernel_state == DEVICE_KERNEL_FEATURE_KERNEL_AVAILABLE) { | |
+ reset_cpu(tile_manager.params, params.samples); | |
+ } | |
+ | |
+ else { | |
+ /* if in interactive mode, and we are either paused or done for now, | |
+ * wait for pause condition notify to wake up again */ | |
+ thread_scoped_lock pause_lock(pause_mutex); | |
+ | |
+ if (!pause && delayed_reset.do_reset) { | |
+ /* reset once to start */ | |
+ thread_scoped_lock reset_lock(delayed_reset.mutex); | |
+ thread_scoped_lock buffers_lock(buffers_mutex); | |
+ thread_scoped_lock display_lock(display_mutex); | |
+ | |
+ reset_(delayed_reset.params, delayed_reset.samples); | |
+ delayed_reset.do_reset = false; | |
+ } | |
+ else if (pause || no_tiles) { | |
+ update_status_time(pause, no_tiles); | |
+ | |
+ while (1) { | |
+ scoped_timer pause_timer; | |
+ pause_cond.wait(pause_lock); | |
+ if (pause) { | |
+ progress.add_skip_time(pause_timer, params.background); | |
+ } | |
+ | |
+ update_status_time(pause, no_tiles); | |
+ progress.set_update(); | |
+ | |
+ if (!pause) | |
+ break; | |
+ } | |
+ } | |
+ | |
+ if (progress.get_cancel()) | |
+ break; | |
+ } | |
+ | |
+ if (!no_tiles) { | |
+ /* buffers mutex is locked entirely while rendering each | |
+ * sample, and released/reacquired on each iteration to allow | |
+ * reset and draw in between */ | |
+ thread_scoped_lock buffers_lock(buffers_mutex); | |
+ | |
+ /* update scene */ | |
+ scoped_timer update_timer; | |
+ if (update_scene()) { | |
+ profiler.reset(scene->shaders.size(), scene->objects.size()); | |
+ } | |
+ progress.add_skip_time(update_timer, params.background); | |
+ | |
+ if (!device->error_message().empty()) | |
+ progress.set_error(device->error_message()); | |
+ | |
+ if (progress.get_cancel()) | |
+ break; | |
+ | |
+ /* update status and timing */ | |
+ update_status_time(); | |
+ | |
+ /* render */ | |
+ render(); | |
+ | |
+ /* update status and timing */ | |
+ update_status_time(); | |
+ | |
+ if (!params.background) | |
+ need_copy_to_display_buffer = true; | |
+ | |
+ if (!device->error_message().empty()) | |
+ progress.set_error(device->error_message()); | |
+ } | |
+ | |
+ device->task_wait(); | |
+ | |
+ { | |
+ thread_scoped_lock reset_lock(delayed_reset.mutex); | |
+ thread_scoped_lock buffers_lock(buffers_mutex); | |
+ thread_scoped_lock display_lock(display_mutex); | |
+ | |
+ if (delayed_reset.do_reset) { | |
+ /* reset rendering if request from main thread */ | |
+ delayed_reset.do_reset = false; | |
+ reset_(delayed_reset.params, delayed_reset.samples); | |
+ } | |
+ else if (need_copy_to_display_buffer) { | |
+ /* Only copy to display_buffer if we do not reset, we don't | |
+ * want to show the result of an incomplete sample */ | |
+ copy_to_display_buffer(tile_manager.state.sample); | |
+ } | |
+ | |
+ if (!device->error_message().empty()) | |
+ progress.set_error(device->error_message()); | |
+ | |
+ tiles_written = update_progressive_refine(progress.get_cancel()); | |
+ } | |
+ | |
+ progress.set_update(); | |
+ } | |
+ | |
+ if (!tiles_written) | |
+ update_progressive_refine(true); | |
+} | |
+ | |
+DeviceRequestedFeatures Session::get_requested_device_features() | |
+{ | |
+ /* TODO(sergey): Consider moving this to the Scene level. */ | |
+ DeviceRequestedFeatures requested_features; | |
+ requested_features.experimental = params.experimental; | |
+ | |
+ scene->shader_manager->get_requested_features(scene, &requested_features); | |
+ | |
+ /* This features are not being tweaked as often as shaders, | |
+ * so could be done selective magic for the viewport as well. | |
+ */ | |
+ bool use_motion = scene->need_motion() == Scene::MotionType::MOTION_BLUR; | |
+ requested_features.use_hair = false; | |
+ requested_features.use_object_motion = false; | |
+ requested_features.use_camera_motion = use_motion && scene->camera->use_motion(); | |
+ foreach (Object *object, scene->objects) { | |
+ Mesh *mesh = object->mesh; | |
+ if (mesh->num_curves()) { | |
+ requested_features.use_hair = true; | |
+ } | |
+ if (use_motion) { | |
+ requested_features.use_object_motion |= object->use_motion() | mesh->use_motion_blur; | |
+ requested_features.use_camera_motion |= mesh->use_motion_blur; | |
+ } | |
+#ifdef WITH_OPENSUBDIV | |
+ if (mesh->subdivision_type != Mesh::SUBDIVISION_NONE) { | |
+ requested_features.use_patch_evaluation = true; | |
+ } | |
+#endif | |
+ if (object->is_shadow_catcher) { | |
+ requested_features.use_shadow_tricks = true; | |
+ } | |
+ requested_features.use_true_displacement |= mesh->has_true_displacement(); | |
+ } | |
+ | |
+ requested_features.use_background_light = scene->light_manager->has_background_light(scene); | |
+ | |
+ BakeManager *bake_manager = scene->bake_manager; | |
+ requested_features.use_baking = bake_manager->get_baking(); | |
+ requested_features.use_integrator_branched = (scene->integrator->method == | |
+ Integrator::BRANCHED_PATH); | |
+ if (params.run_denoising) { | |
+ requested_features.use_denoising = true; | |
+ requested_features.use_shadow_tricks = true; | |
+ } | |
+ | |
+ return requested_features; | |
+} | |
+ | |
+bool Session::load_kernels(bool lock_scene) | |
+{ | |
+ thread_scoped_lock scene_lock; | |
+ if (lock_scene) { | |
+ scene_lock = thread_scoped_lock(scene->mutex); | |
+ } | |
+ | |
+ DeviceRequestedFeatures requested_features = get_requested_device_features(); | |
+ | |
+ if (!kernels_loaded || loaded_kernel_features.modified(requested_features)) { | |
+ progress.set_status("Loading render kernels (may take a few minutes the first time)"); | |
+ | |
+ scoped_timer timer; | |
+ | |
+ VLOG(2) << "Requested features:\n" << requested_features; | |
+ if (!device->load_kernels(requested_features)) { | |
+ string message = device->error_message(); | |
+ if (message.empty()) | |
+ message = "Failed loading render kernel, see console for errors"; | |
+ | |
+ progress.set_error(message); | |
+ progress.set_status("Error", message); | |
+ progress.set_update(); | |
+ return false; | |
+ } | |
+ | |
+ progress.add_skip_time(timer, false); | |
+ VLOG(1) << "Total time spent loading kernels: " << time_dt() - timer.get_start(); | |
+ | |
+ kernels_loaded = true; | |
+ loaded_kernel_features = requested_features; | |
+ return true; | |
+ } | |
+ return false; | |
+} | |
+ | |
+void Session::run() | |
+{ | |
+ if (params.use_profiling && (params.device.type == DEVICE_CPU)) { | |
+ profiler.start(); | |
+ } | |
+ | |
+ /* session thread loop */ | |
+ progress.set_status("Waiting for render to start"); | |
+ | |
+ /* run */ | |
+ if (!progress.get_cancel()) { | |
+ /* reset number of rendered samples */ | |
+ progress.reset_sample(); | |
+ | |
+ if (device_use_gl) | |
+ run_gpu(); | |
+ else | |
+ run_cpu(); | |
+ } | |
+ | |
+ profiler.stop(); | |
+ | |
+ /* progress update */ | |
+ if (progress.get_cancel()) | |
+ progress.set_status("Cancel", progress.get_cancel_message()); | |
+ else | |
+ progress.set_update(); | |
+} | |
+ | |
+bool Session::draw(BufferParams &buffer_params, DeviceDrawParams &draw_params) | |
+{ | |
+ if (device_use_gl) | |
+ return draw_gpu(buffer_params, draw_params); | |
+ else | |
+ return draw_cpu(buffer_params, draw_params); | |
+} | |
+ | |
+void Session::reset_(BufferParams &buffer_params, int samples) | |
+{ | |
+ if (buffers && buffer_params.modified(tile_manager.params)) { | |
+ gpu_draw_ready = false; | |
+ buffers->reset(buffer_params); | |
+ if (display) { | |
+ display->reset(buffer_params); | |
+ } | |
+ } | |
+ | |
+ tile_manager.reset(buffer_params, samples); | |
+ progress.reset_sample(); | |
+ | |
+ bool show_progress = params.background || tile_manager.get_num_effective_samples() != INT_MAX; | |
+ progress.set_total_pixel_samples(show_progress ? tile_manager.state.total_pixel_samples : 0); | |
+ | |
+ if (!params.background) | |
+ progress.set_start_time(); | |
+ progress.set_render_start_time(); | |
+} | |
+ | |
+void Session::reset(BufferParams &buffer_params, int samples) | |
+{ | |
+ if (device_use_gl) | |
+ reset_gpu(buffer_params, samples); | |
+ else | |
+ reset_cpu(buffer_params, samples); | |
+} | |
+ | |
+void Session::set_samples(int samples) | |
+{ | |
+ if (samples != params.samples) { | |
+ params.samples = samples; | |
+ tile_manager.set_samples(samples); | |
+ | |
+ { | |
+ thread_scoped_lock pause_lock(pause_mutex); | |
+ } | |
+ pause_cond.notify_all(); | |
+ } | |
+} | |
+ | |
+void Session::set_pause(bool pause_) | |
+{ | |
+ bool notify = false; | |
+ | |
+ { | |
+ thread_scoped_lock pause_lock(pause_mutex); | |
+ | |
+ if (pause != pause_) { | |
+ pause = pause_; | |
+ notify = true; | |
+ } | |
+ } | |
+ | |
+ if (notify) | |
+ pause_cond.notify_all(); | |
+} | |
+ | |
+void Session::wait() | |
+{ | |
+ if (session_thread) { | |
+ session_thread->join(); | |
+ delete session_thread; | |
+ } | |
+ | |
+ session_thread = NULL; | |
+} | |
+ | |
+bool Session::update_scene() | |
+{ | |
+ thread_scoped_lock scene_lock(scene->mutex); | |
+ | |
+ /* update camera if dimensions changed for progressive render. the camera | |
+ * knows nothing about progressive or cropped rendering, it just gets the | |
+ * image dimensions passed in */ | |
+ Camera *cam = scene->camera; | |
+ int width = tile_manager.state.buffer.full_width; | |
+ int height = tile_manager.state.buffer.full_height; | |
+ int resolution = tile_manager.state.resolution_divider; | |
+ | |
+ if (width != cam->width || height != cam->height) { | |
+ cam->width = width; | |
+ cam->height = height; | |
+ cam->resolution = resolution; | |
+ cam->tag_update(); | |
+ } | |
+ | |
+ /* number of samples is needed by multi jittered | |
+ * sampling pattern and by baking */ | |
+ Integrator *integrator = scene->integrator; | |
+ BakeManager *bake_manager = scene->bake_manager; | |
+ | |
+ if (integrator->sampling_pattern == SAMPLING_PATTERN_CMJ || bake_manager->get_baking()) { | |
+ int aa_samples = tile_manager.num_samples; | |
+ | |
+ if (aa_samples != integrator->aa_samples) { | |
+ integrator->aa_samples = aa_samples; | |
+ integrator->tag_update(scene); | |
+ } | |
+ } | |
+ | |
+ /* update scene */ | |
+ if (scene->need_update()) { | |
+ bool new_kernels_needed = load_kernels(false); | |
+ | |
+ /* Update max_closures. */ | |
+ KernelIntegrator *kintegrator = &scene->dscene.data.integrator; | |
+ if (params.background) { | |
+ kintegrator->max_closures = get_max_closure_count(); | |
+ } | |
+ else { | |
+ /* Currently viewport render is faster with higher max_closures, needs investigating. */ | |
+ kintegrator->max_closures = MAX_CLOSURE; | |
+ } | |
+ | |
+ progress.set_status("Updating Scene"); | |
+ MEM_GUARDED_CALL(&progress, scene->device_update, device, progress); | |
+ | |
+ DeviceKernelStatus kernel_switch_status = device->get_active_kernel_switch_state(); | |
+ bool kernel_switch_needed = kernel_switch_status == DEVICE_KERNEL_FEATURE_KERNEL_AVAILABLE || | |
+ kernel_switch_status == DEVICE_KERNEL_FEATURE_KERNEL_INVALID; | |
+ if (kernel_switch_status == DEVICE_KERNEL_WAITING_FOR_FEATURE_KERNEL) { | |
+ progress.set_kernel_status("Compiling render kernels"); | |
+ } | |
+ if (new_kernels_needed || kernel_switch_needed) { | |
+ progress.set_kernel_status("Compiling render kernels"); | |
+ device->wait_for_availability(loaded_kernel_features); | |
+ progress.set_kernel_status(""); | |
+ } | |
+ | |
+ if (kernel_switch_needed) { | |
+ reset(tile_manager.params, params.samples); | |
+ } | |
+ return true; | |
+ } | |
+ return false; | |
+} | |
+ | |
+void Session::update_status_time(bool show_pause, bool show_done) | |
+{ | |
+ int progressive_sample = tile_manager.state.sample; | |
+ int num_samples = tile_manager.get_num_effective_samples(); | |
+ | |
+ int tile = progress.get_rendered_tiles(); | |
+ int num_tiles = tile_manager.state.num_tiles; | |
+ | |
+ /* update status */ | |
+ string status, substatus; | |
+ | |
+ if (!params.progressive) { | |
+ const bool is_cpu = params.device.type == DEVICE_CPU; | |
+ const bool rendering_finished = (tile == num_tiles); | |
+ const bool is_last_tile = (tile + 1) == num_tiles; | |
+ | |
+ substatus = string_printf("Rendered %d/%d Tiles", tile, num_tiles); | |
+ | |
+ if (!rendering_finished && (device->show_samples() || (is_cpu && is_last_tile))) { | |
+ /* Some devices automatically support showing the sample number: | |
+ * - CUDADevice | |
+ * - OpenCLDevice when using the megakernel (the split kernel renders multiple | |
+ * samples at the same time, so the current sample isn't really defined) | |
+ * - CPUDevice when using one thread | |
+ * For these devices, the current sample is always shown. | |
+ * | |
+ * The other option is when the last tile is currently being rendered by the CPU. | |
+ */ | |
+ substatus += string_printf(", Sample %d/%d", progress.get_current_sample(), num_samples); | |
+ } | |
+ if (params.full_denoising || params.optix_denoising) { | |
+ substatus += string_printf(", Denoised %d tiles", progress.get_denoised_tiles()); | |
+ } | |
+ else if (params.run_denoising) { | |
+ substatus += string_printf(", Prefiltered %d tiles", progress.get_denoised_tiles()); | |
+ } | |
+ } | |
+ else if (tile_manager.num_samples == Integrator::MAX_SAMPLES) | |
+ substatus = string_printf("Path Tracing Sample %d", progressive_sample + 1); | |
+ else | |
+ substatus = string_printf("Path Tracing Sample %d/%d", progressive_sample + 1, num_samples); | |
+ | |
+ if (show_pause) { | |
+ status = "Rendering Paused"; | |
+ } | |
+ else if (show_done) { | |
+ status = "Rendering Done"; | |
+ progress.set_end_time(); /* Save end time so that further calls to get_time are accurate. */ | |
+ } | |
+ else { | |
+ status = substatus; | |
+ substatus.clear(); | |
+ } | |
+ | |
+ progress.set_status(status, substatus); | |
+} | |
+ | |
+void Session::render() | |
+{ | |
+ /* Clear buffers. */ | |
+ if (buffers && tile_manager.state.sample == tile_manager.range_start_sample) { | |
+ buffers->zero(); | |
+ } | |
+ | |
+ /* Add path trace task. */ | |
+ DeviceTask task(DeviceTask::RENDER); | |
+ | |
+ task.acquire_tile = function_bind(&Session::acquire_tile, this, _1, _2); | |
+ task.release_tile = function_bind(&Session::release_tile, this, _1); | |
+ task.map_neighbor_tiles = function_bind(&Session::map_neighbor_tiles, this, _1, _2); | |
+ task.unmap_neighbor_tiles = function_bind(&Session::unmap_neighbor_tiles, this, _1, _2); | |
+ task.get_cancel = function_bind(&Progress::get_cancel, &this->progress); | |
+ task.update_tile_sample = function_bind(&Session::update_tile_sample, this, _1); | |
+ task.update_progress_sample = function_bind(&Progress::add_samples, &this->progress, _1, _2); | |
+ task.need_finish_queue = params.progressive_refine; | |
+ task.integrator_branched = scene->integrator->method == Integrator::BRANCHED_PATH; | |
+ task.requested_tile_size = params.tile_size; | |
+ task.passes_size = tile_manager.params.get_passes_size(); | |
+ | |
+ if (params.run_denoising) { | |
+ task.denoising = params.denoising; | |
+ | |
+ assert(!scene->film->need_update); | |
+ task.pass_stride = scene->film->pass_stride; | |
+ task.target_pass_stride = task.pass_stride; | |
+ task.pass_denoising_data = scene->film->denoising_data_offset; | |
+ task.pass_denoising_clean = scene->film->denoising_clean_offset; | |
+ | |
+ task.denoising_from_render = true; | |
+ task.denoising_do_filter = params.full_denoising; | |
+ task.denoising_use_optix = params.optix_denoising; | |
+ task.denoising_write_passes = params.write_denoising_passes; | |
+ } | |
+ | |
+ device->task_add(task); | |
+} | |
+ | |
+void Session::copy_to_display_buffer(int sample) | |
+{ | |
+ /* add film conversion task */ | |
+ DeviceTask task(DeviceTask::FILM_CONVERT); | |
+ | |
+ task.x = tile_manager.state.buffer.full_x; | |
+ task.y = tile_manager.state.buffer.full_y; | |
+ task.w = tile_manager.state.buffer.width; | |
+ task.h = tile_manager.state.buffer.height; | |
+ task.rgba_byte = display->rgba_byte.device_pointer; | |
+ task.rgba_half = display->rgba_half.device_pointer; | |
+ task.buffer = buffers->buffer.device_pointer; | |
+ task.sample = sample; | |
+ tile_manager.state.buffer.get_offset_stride(task.offset, task.stride); | |
+ | |
+ if (task.w > 0 && task.h > 0) { | |
+ device->task_add(task); | |
+ device->task_wait(); | |
+ | |
+ /* set display to new size */ | |
+ display->draw_set(task.w, task.h); | |
+ } | |
+ | |
+ display_outdated = false; | |
+} | |
+ | |
+bool Session::update_progressive_refine(bool cancel) | |
+{ | |
+ int sample = tile_manager.state.sample + 1; | |
+ bool write = sample == tile_manager.num_samples || cancel; | |
+ | |
+ double current_time = time_dt(); | |
+ | |
+ if (current_time - last_update_time < params.progressive_update_timeout) { | |
+ /* if last sample was processed, we need to write buffers anyway */ | |
+ if (!write && sample != 1) | |
+ return false; | |
+ } | |
+ | |
+ if (params.progressive_refine) { | |
+ foreach (Tile &tile, tile_manager.state.tiles) { | |
+ if (!tile.buffers) { | |
+ continue; | |
+ } | |
+ | |
+ RenderTile rtile; | |
+ rtile.x = tile_manager.state.buffer.full_x + tile.x; | |
+ rtile.y = tile_manager.state.buffer.full_y + tile.y; | |
+ rtile.w = tile.w; | |
+ rtile.h = tile.h; | |
+ rtile.sample = sample; | |
+ rtile.buffers = tile.buffers; | |
+ | |
+ if (write) { | |
+ if (write_render_tile_cb) | |
+ write_render_tile_cb(rtile); | |
+ } | |
+ else { | |
+ if (update_render_tile_cb) | |
+ update_render_tile_cb(rtile, true); | |
+ } | |
+ } | |
+ } | |
+ | |
+ last_update_time = current_time; | |
+ | |
+ return write; | |
+} | |
+ | |
+void Session::device_free() | |
+{ | |
+ scene->device_free(); | |
+ | |
+ tile_manager.device_free(); | |
+ | |
+ /* used from background render only, so no need to | |
+ * re-create render/display buffers here | |
+ */ | |
+} | |
+ | |
+void Session::collect_statistics(RenderStats *render_stats) | |
+{ | |
+ scene->collect_statistics(render_stats); | |
+ if (params.use_profiling && (params.device.type == DEVICE_CPU)) { | |
+ render_stats->collect_profiling(scene, profiler); | |
+ } | |
+} | |
+ | |
+int Session::get_max_closure_count() | |
+{ | |
+ if (scene->shader_manager->use_osl()) { | |
+ /* OSL always needs the maximum as we can't predict the | |
+ * number of closures a shader might generate. */ | |
+ return MAX_CLOSURE; | |
+ } | |
+ | |
+ int max_closures = 0; | |
+ for (int i = 0; i < scene->shaders.size(); i++) { | |
+ int num_closures = scene->shaders[i]->graph->get_num_closures(); | |
+ max_closures = max(max_closures, num_closures); | |
+ } | |
+ max_closure_global = max(max_closure_global, max_closures); | |
+ | |
+ if (max_closure_global > MAX_CLOSURE) { | |
+ /* This is usually harmless as more complex shader tend to get many | |
+ * closures discarded due to mixing or low weights. We need to limit | |
+ * to MAX_CLOSURE as this is hardcoded in CPU/mega kernels, and it | |
+ * avoids excessive memory usage for split kernels. */ | |
+ VLOG(2) << "Maximum number of closures exceeded: " << max_closure_global << " > " | |
+ << MAX_CLOSURE; | |
+ | |
+ max_closure_global = MAX_CLOSURE; | |
+ } | |
+ | |
+ return max_closure_global; | |
+} | |
+ | |
+CCL_NAMESPACE_END | |
diff -Naur a/intern/cycles/render/session.h b/intern/cycles/render/session.h | |
--- a/intern/cycles/render/session.h 2020-01-10 20:37:06.000000000 +0300 | |
+++ b/intern/cycles/render/session.h 2020-01-10 20:42:43.474256721 +0300 | |
@@ -55,6 +55,7 @@ | |
int start_resolution; | |
int pixel_size; | |
int threads; | |
+ bool adaptive_sampling; | |
bool use_profiling; | |
@@ -87,6 +88,7 @@ | |
start_resolution = INT_MAX; | |
pixel_size = 1; | |
threads = 0; | |
+ adaptive_sampling = false; | |
use_profiling = false; | |
@@ -114,6 +116,7 @@ | |
&& progressive == params.progressive && experimental == params.experimental && | |
tile_size == params.tile_size && start_resolution == params.start_resolution && | |
pixel_size == params.pixel_size && threads == params.threads && | |
+ adaptive_sampling == params.adaptive_sampling && | |
use_profiling == params.use_profiling && | |
display_buffer_linear == params.display_buffer_linear && | |
cancel_timeout == params.cancel_timeout && reset_timeout == params.reset_timeout && | |
diff -Naur a/intern/cycles/render/session.h.orig b/intern/cycles/render/session.h.orig | |
--- a/intern/cycles/render/session.h.orig 1970-01-01 03:00:00.000000000 +0300 | |
+++ b/intern/cycles/render/session.h.orig 2020-01-10 20:37:06.000000000 +0300 | |
@@ -0,0 +1,239 @@ | |
+/* | |
+ * Copyright 2011-2013 Blender Foundation | |
+ * | |
+ * Licensed under the Apache License, Version 2.0 (the "License"); | |
+ * you may not use this file except in compliance with the License. | |
+ * You may obtain a copy of the License at | |
+ * | |
+ * http://www.apache.org/licenses/LICENSE-2.0 | |
+ * | |
+ * Unless required by applicable law or agreed to in writing, software | |
+ * distributed under the License is distributed on an "AS IS" BASIS, | |
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
+ * See the License for the specific language governing permissions and | |
+ * limitations under the License. | |
+ */ | |
+ | |
+#ifndef __SESSION_H__ | |
+#define __SESSION_H__ | |
+ | |
+#include "render/buffers.h" | |
+#include "device/device.h" | |
+#include "render/shader.h" | |
+#include "render/stats.h" | |
+#include "render/tile.h" | |
+ | |
+#include "util/util_progress.h" | |
+#include "util/util_stats.h" | |
+#include "util/util_thread.h" | |
+#include "util/util_vector.h" | |
+ | |
+CCL_NAMESPACE_BEGIN | |
+ | |
+class BufferParams; | |
+class Device; | |
+class DeviceScene; | |
+class DeviceRequestedFeatures; | |
+class DisplayBuffer; | |
+class Progress; | |
+class RenderBuffers; | |
+class Scene; | |
+ | |
+/* Session Parameters */ | |
+ | |
+class SessionParams { | |
+ public: | |
+ DeviceInfo device; | |
+ bool background; | |
+ bool progressive_refine; | |
+ | |
+ bool progressive; | |
+ bool experimental; | |
+ int samples; | |
+ int2 tile_size; | |
+ TileOrder tile_order; | |
+ int start_resolution; | |
+ int pixel_size; | |
+ int threads; | |
+ | |
+ bool use_profiling; | |
+ | |
+ bool display_buffer_linear; | |
+ | |
+ bool run_denoising; | |
+ bool write_denoising_passes; | |
+ bool full_denoising; | |
+ bool optix_denoising; | |
+ DenoiseParams denoising; | |
+ | |
+ double cancel_timeout; | |
+ double reset_timeout; | |
+ double text_timeout; | |
+ double progressive_update_timeout; | |
+ | |
+ ShadingSystem shadingsystem; | |
+ | |
+ function<bool(const uchar *pixels, int width, int height, int channels)> write_render_cb; | |
+ | |
+ SessionParams() | |
+ { | |
+ background = false; | |
+ progressive_refine = false; | |
+ | |
+ progressive = false; | |
+ experimental = false; | |
+ samples = 1024; | |
+ tile_size = make_int2(64, 64); | |
+ start_resolution = INT_MAX; | |
+ pixel_size = 1; | |
+ threads = 0; | |
+ | |
+ use_profiling = false; | |
+ | |
+ run_denoising = false; | |
+ write_denoising_passes = false; | |
+ full_denoising = false; | |
+ optix_denoising = false; | |
+ | |
+ display_buffer_linear = false; | |
+ | |
+ cancel_timeout = 0.1; | |
+ reset_timeout = 0.1; | |
+ text_timeout = 1.0; | |
+ progressive_update_timeout = 1.0; | |
+ | |
+ shadingsystem = SHADINGSYSTEM_SVM; | |
+ tile_order = TILE_CENTER; | |
+ } | |
+ | |
+ bool modified(const SessionParams ¶ms) | |
+ { | |
+ return !(device == params.device && background == params.background && | |
+ progressive_refine == params.progressive_refine | |
+ /* && samples == params.samples */ | |
+ && progressive == params.progressive && experimental == params.experimental && | |
+ tile_size == params.tile_size && start_resolution == params.start_resolution && | |
+ pixel_size == params.pixel_size && threads == params.threads && | |
+ use_profiling == params.use_profiling && | |
+ display_buffer_linear == params.display_buffer_linear && | |
+ cancel_timeout == params.cancel_timeout && reset_timeout == params.reset_timeout && | |
+ text_timeout == params.text_timeout && | |
+ progressive_update_timeout == params.progressive_update_timeout && | |
+ tile_order == params.tile_order && shadingsystem == params.shadingsystem); | |
+ } | |
+}; | |
+ | |
+/* Session | |
+ * | |
+ * This is the class that contains the session thread, running the render | |
+ * control loop and dispatching tasks. */ | |
+ | |
+class Session { | |
+ public: | |
+ Device *device; | |
+ Scene *scene; | |
+ RenderBuffers *buffers; | |
+ DisplayBuffer *display; | |
+ Progress progress; | |
+ SessionParams params; | |
+ TileManager tile_manager; | |
+ Stats stats; | |
+ Profiler profiler; | |
+ | |
+ function<void(RenderTile &)> write_render_tile_cb; | |
+ function<void(RenderTile &, bool)> update_render_tile_cb; | |
+ | |
+ explicit Session(const SessionParams ¶ms); | |
+ ~Session(); | |
+ | |
+ void start(); | |
+ bool draw(BufferParams ¶ms, DeviceDrawParams &draw_params); | |
+ void wait(); | |
+ | |
+ bool ready_to_reset(); | |
+ void reset(BufferParams ¶ms, int samples); | |
+ void set_samples(int samples); | |
+ void set_pause(bool pause); | |
+ | |
+ bool update_scene(); | |
+ bool load_kernels(bool lock_scene = true); | |
+ | |
+ void device_free(); | |
+ | |
+ /* Returns the rendering progress or 0 if no progress can be determined | |
+ * (for example, when rendering with unlimited samples). */ | |
+ float get_progress(); | |
+ | |
+ void collect_statistics(RenderStats *stats); | |
+ | |
+ protected: | |
+ struct DelayedReset { | |
+ thread_mutex mutex; | |
+ bool do_reset; | |
+ BufferParams params; | |
+ int samples; | |
+ } delayed_reset; | |
+ | |
+ void run(); | |
+ | |
+ void update_status_time(bool show_pause = false, bool show_done = false); | |
+ | |
+ void copy_to_display_buffer(int sample); | |
+ void render(); | |
+ void reset_(BufferParams ¶ms, int samples); | |
+ | |
+ void run_cpu(); | |
+ bool draw_cpu(BufferParams ¶ms, DeviceDrawParams &draw_params); | |
+ void reset_cpu(BufferParams ¶ms, int samples); | |
+ | |
+ void run_gpu(); | |
+ bool draw_gpu(BufferParams ¶ms, DeviceDrawParams &draw_params); | |
+ void reset_gpu(BufferParams ¶ms, int samples); | |
+ | |
+ bool acquire_tile(Device *tile_device, RenderTile &tile); | |
+ void update_tile_sample(RenderTile &tile); | |
+ void release_tile(RenderTile &tile); | |
+ | |
+ void map_neighbor_tiles(RenderTile *tiles, Device *tile_device); | |
+ void unmap_neighbor_tiles(RenderTile *tiles, Device *tile_device); | |
+ | |
+ bool device_use_gl; | |
+ | |
+ thread *session_thread; | |
+ | |
+ volatile bool display_outdated; | |
+ | |
+ volatile bool gpu_draw_ready; | |
+ volatile bool gpu_need_display_buffer_update; | |
+ thread_condition_variable gpu_need_display_buffer_update_cond; | |
+ | |
+ bool pause; | |
+ thread_condition_variable pause_cond; | |
+ thread_mutex pause_mutex; | |
+ thread_mutex tile_mutex; | |
+ thread_mutex buffers_mutex; | |
+ thread_mutex display_mutex; | |
+ | |
+ bool kernels_loaded; | |
+ DeviceRequestedFeatures loaded_kernel_features; | |
+ | |
+ double reset_time; | |
+ | |
+ /* progressive refine */ | |
+ double last_update_time; | |
+ bool update_progressive_refine(bool cancel); | |
+ | |
+ DeviceRequestedFeatures get_requested_device_features(); | |
+ | |
+ /* ** Split kernel routines ** */ | |
+ | |
+ /* Maximumnumber of closure during session lifetime. */ | |
+ int max_closure_global; | |
+ | |
+ /* Get maximum number of closures to be used in kernel. */ | |
+ int get_max_closure_count(); | |
+}; | |
+ | |
+CCL_NAMESPACE_END | |
+ | |
+#endif /* __SESSION_H__ */ | |
diff -Naur a/intern/cycles/util/util_atomic.h b/intern/cycles/util/util_atomic.h | |
--- a/intern/cycles/util/util_atomic.h 2020-01-10 20:37:06.000000000 +0300 | |
+++ b/intern/cycles/util/util_atomic.h 2020-01-10 20:42:43.474256721 +0300 | |
@@ -77,6 +77,7 @@ | |
# define atomic_fetch_and_add_uint32(p, x) atomic_add((p), (x)) | |
# define atomic_fetch_and_inc_uint32(p) atomic_inc((p)) | |
# define atomic_fetch_and_dec_uint32(p) atomic_dec((p)) | |
+# define atomic_fetch_and_or_uint32(p, x) atomic_or((p), (x)) | |
# define CCL_LOCAL_MEM_FENCE CLK_LOCAL_MEM_FENCE | |
# define ccl_barrier(flags) barrier(flags) | |
@@ -91,6 +92,7 @@ | |
# define atomic_fetch_and_sub_uint32(p, x) atomicSub((unsigned int *)(p), (unsigned int)(x)) | |
# define atomic_fetch_and_inc_uint32(p) atomic_fetch_and_add_uint32((p), 1) | |
# define atomic_fetch_and_dec_uint32(p) atomic_fetch_and_sub_uint32((p), 1) | |
+# define atomic_fetch_and_or_uint32(p, x) atomicOr((unsigned int *)(p), (unsigned int)(x)) | |
ccl_device_inline float atomic_compare_and_swap_float(volatile float *dest, | |
const float old_val, |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment