Pokechu22 · May 4, 2022 01:01
diff --git a/RS2_BumpMapping1.md b/RS2_BumpMapping1.md
diff --git a/RS2_BumpMapping2.md b/RS2_BumpMapping2.md
diff --git a/RS2_BumpMapping3.md b/RS2_BumpMapping3.md
diff --git a/ZReproNotes.txt b/ZReproNotes.txt
 040972A8 3F800000

 8024e52c: current speed modifier.

 -> 0424E52C BF800000

 7fde8228

 -----

 3590. GSWE64_2022-04-28_11-23-38.png
 3890?  No, 3891.

 So, 4192?

 ---

 OK, now frame 5930.

 And frame 4185...  Always wait 301 frames, in any case.

 ```
 diff --git a/Source/Core/VideoCommon/RenderBase.cpp b/Source/Core/VideoCommon/RenderBase.cpp
 index d895d648ed..3c8fae73ff 100644
 --- a/Source/Core/VideoCommon/RenderBase.cpp
 +++ b/Source/Core/VideoCommon/RenderBase.cpp
 @@ -1376,7 +1376,7 @@ void Renderer::Swap(u32 xfb_addr, u32 fb_width, u32 fb_stride, u32 fb_height, u6
         perf_sample.num_draw_calls = g_stats.this_frame.num_draw_calls;
         DolphinAnalytics::Instance().ReportPerformanceInfo(std::move(perf_sample));
 
 -        if (IsFrameDumping())
 +        if (IsFrameDumping() && ((Movie::GetCurrentFrame() - (4170)) % 301) < 30)
           DumpCurrentFrame(xfb_entry->texture.get(), xfb_rect, ticks, m_frame_count);
 
         // Begin new frame
 ```

 11409: to the right.  11410: to the left, barely.  11710: to the right. 11711: to the right, barely.
 (11710-4185)/301 is 25.

 So, we wait 301 frames, but also add (ctr - 4185)/(301*25) to the frame count to wait one extra frame.  I guess?

 Ehh, doing that is honestly a bit more noticable.

 Ok, once the drops start happening, the good frames are:

 30074
 30428
 30933

 31229
 31530
 31831

 Well, those approximately work, but they break.  Changing it slightly:

 ```
 diff --git a/Source/Core/VideoCommon/RenderBase.cpp b/Source/Core/VideoCommon/RenderBase.cpp
 index d895d648ed..d95069a894 100644
 --- a/Source/Core/VideoCommon/RenderBase.cpp
 +++ b/Source/Core/VideoCommon/RenderBase.cpp
 @@ -1376,7 +1376,30 @@ void Renderer::Swap(u32 xfb_addr, u32 fb_width, u32 fb_stride, u32 fb_height, u6
         perf_sample.num_draw_calls = g_stats.this_frame.num_draw_calls;
         DolphinAnalytics::Instance().ReportPerformanceInfo(std::move(perf_sample));
 
 -        if (IsFrameDumping())
 +        u64 frame = Movie::GetCurrentFrame();
 +        bool valid = false;
 +        if (frame < 30000)
 +        {
 +          frame -= 4185;
 +          // + 1 to avoid duplicate frames (the duplicate will occur when modulo is not 0)
 +          // This might be needed due to 59.94 vs 60 FPS?
 +          frame -= (frame + 1) / (301 * 25);
 +          valid = (frame % 301) == 0;
 +        }
 +        // The game lags at about this point, and the 301 rule breaks temporarily.  This seems to be
 +        // the best set of frames.
 +        else if (frame < 31228)
 +        {
 +          valid = (frame == 30073 || frame == 30427 || frame == 30932);
 +        }
 +        else
 +        {
 +          frame -= 31228;
 +          // + 1 to avoid duplicate frames (the duplicate will occur when modulo is not 0)
 +          frame -= (frame + 1) / (301 * 25);
 +          valid = (frame % 301) == 0;
 +        }
 +        if (IsFrameDumping() && valid)
           DumpCurrentFrame(xfb_entry->texture.get(), xfb_rect, ticks, m_frame_count);
 
         // Begin new frame
 ```

 That works but the +1 offset is noticeable and not great.  I can just get rid of that.

 ```
 diff --git a/Source/Core/VideoCommon/RenderBase.cpp b/Source/Core/VideoCommon/RenderBase.cpp
 index d895d648ed..78f1960653 100644
 --- a/Source/Core/VideoCommon/RenderBase.cpp
 +++ b/Source/Core/VideoCommon/RenderBase.cpp
 @@ -1376,7 +1376,25 @@ void Renderer::Swap(u32 xfb_addr, u32 fb_width, u32 fb_stride, u32 fb_height, u6
         perf_sample.num_draw_calls = g_stats.this_frame.num_draw_calls;
         DolphinAnalytics::Instance().ReportPerformanceInfo(std::move(perf_sample));
 
 -        if (IsFrameDumping())
 +        u64 frame = Movie::GetCurrentFrame();
 +        bool valid = false;
 +        if (frame < 30000)
 +        {
 +          frame -= 4185;
 +          valid = (frame % 301) == 0;
 +        }
 +        // The game lags at about this point, and the 301 rule breaks temporarily.  This seems to be
 +        // the best set of frames.
 +        else if (frame < 31224)
 +        {
 +          valid = (frame == 30070 || frame == 30420 || frame == 30927);
 +        }
 +        else
 +        {
 +          frame -= 31224;
 +          valid = (frame % 301) == 0;
 +        }
 +        if (IsFrameDumping() && valid)
           DumpCurrentFrame(xfb_entry->texture.get(), xfb_rect, ticks, m_frame_count);
 
         // Begin new frame
 ```

 (cd Test4; for f in *.png; do convert $f ../Test4Alt/$f -compose difference -composite -evaluate Multiply 8 -evaluate-sequence Add ../Test4Diff/$f; done)

 ldir _173.xyz float3 -0.4001046419, 0.2027553469, -0.8937597871
 _binormal _119.xyz float3 -0.0042309803, -0.0024213707, 0.0008056075
 _tangent _108.xyz float3 0.0007323285, 0.0003421183, 0.004874411
 rawbinormal _89.xyz float3 0.00, 0.00, 1.984375
 rawtangent _84.xyz float3 1.984375, 0.00, 0.00

 ldir _173.xyz float3 -0.3507781029, -0.3892965019, -0.8517059088
 _binormal _119.xyz float3 -0.0030002145, -0.0035373855, 0.001702601
 _tangent _108.xyz float3 0.0011418356, 0.0012639464, 0.0046380903
 rawbinormal _89.xyz float3 0.00, 0.00, 1.984375
 rawtangent _84.xyz float3 1.984375, 0.00, 0.00


 (290.4000244141, 321.6000061035) to (290.3951721191, 321.600982666)
 (290.4000244141, 321.6000061035) to (290.3954467773, 321.6004943848)
diff --git a/ZReproNotes2.txt b/ZReproNotes2.txt
 Frame 4500: (13312, 768, 15744) to (13440, -512, 15872) on screen lower left, (13568, -5120 15232) to (16396, -5632, 15232) on lower right.

 Frame 5000: (12416, -11776, 15104) to (12544, 3456, 15232) lower left, (12928, -11648, 14464) to (13056, 2560, 14592) lower right

 I *think* this means that 13312 < x < 16396 on frame 4500 and 12416 < x < 13056

 OK, or the big draw: a bounding box of (11904, -13056, 14080) to (13568, 14336, 15744) on frame 4500, and (12288, -12416, 14464) to (13184, 13524, 15360) on frame 5000.

 -----

 ```
 13696.00 -1536.00  15872.00  0.00  1.921875 -0.46875  0.2431372553  0.00  1.00  0.9921568632  0.00  0.2431372553  0.00  1.00
 13824.00 -512.00  16000.00  0.09375  1.984375 -0.09375  0.2431372553  0.501960814  1.00  0.9843137264  0.00  0.2431372553  0.501960814  1.00
 13696.00 -3328.00  15744.00  0.00  1.96875 -0.203125  0.2392156869  0.501960814  1.00  0.9764705896  0.00  0.2392156869  0.501960814  1.00
 13824.00 -1664.00  15872.00  0.171875  1.921875 -0.46875  0.2431372553  0.00  1.00  1.00  0.501960814  0.2431372553  0.00  1.00
 ```

 Between 15744 and 16000 (probably 15872).

 Let's try either 7fdf6190 or 80216190

 Code at 7fcbd60c writes it.

 That code is located at 801bd718 found by `lfs	f6, -0x58A8 (rtoc)` c0c2a758 at 7fcbd618.  I've moved MAIN_.text2 from 80100000 to 7fc00000 and that gives somewhat better results (not perfect though; data is still messed up).

 ... wait, huh.  That's NOT a match.  The base needs to be 7fbfff00, instead.  I wonder why?

 I've also added a new block from 7fdf0000 to 7fffffff (size 210000).

 Input parameter is 7fde813c (starts in r3, moved to r29), which I think is a vec3f of the direction to move.  Also, I needed to expand the new block to start at 7fde6420 instead (to include that address).

 What if I just nop out the stores at 7fcbd60c and 7fcbd608?  Hmm, that doesn't solve it :|

 7fde813c might actually be a position.  I'm not sure.

 Func at 7fcbd414 is called by 7fcc163c.  Inserting a BLR at the start of 7fcc163c causes the world to stop rendering, so I'm calling it `DrawWorldMaybe`.  Inserting a BLR at the start of 7fcbd414 causes the world culling to stop updating as the camera moves.  I'm calling it `UpdateViewBounds` for now.

 7fde813c is written by 7fc15438.  Inserting a BLR at the start of that causes the ship to keep moving but the camera to stay still.  I'm calling 7fc15438 CopyCameraPos.

 I could trace things back further, but I don't think I really need to.  This is good enough (as long as I combine it with invincibility, by writing 1.0 (3f800000) to 800972a8 (this is per datel) - I think this needs to happen during startup.)

 Oh, also, changing a function needs to be a real patch because otherwise if the code gets paged out the patch is undone.  I don't have a good workaround for this other than choosing a location where it's unlikely to be disturbed.

 Alright, doing this works, except there's some pop-in with the clouds.  But, eh, good enough.

 ------

 Just to clarify my process here: I created a savestate where I was pointed straight towards the ground and about to hit it, and then recorded a fifolog.  Since I was aiming right at the ground, only a bit of it was visible, and I was directly above that bit, so I could get the world coordinates by looking at the vertices in renderdoc.  From that I was able to load the savestate and use Dolphin's cheat search to find a value that was similar to those world coordinates, and then I was able to find what was setting that value.  Looking straight at the ground also probably helped because there were fewer page faults since less stuff was being done.

 For creating the images of the emboss effect only, this works decently well:

 ```patch
 diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp
 index 4a9e9105a8..410df1b2d6 100644
 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp
 +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp
 @@ -1358,6 +1358,9 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i
   const auto& stage = uid_data->stagehash[n];
   out.Write("\n\t// TEV stage {}\n", n);
 
 +  bool is_special = uid_data->genMode_numtevstages + 1 == 7 && uid_data->stagehash[3].cc == 0x40f800 &&
 +                    uid_data->stagehash[4].cc == 0x4cf802 && uid_data->stagehash[5].cc == 0x0802bf;
 +
   // Quirk: when the tex coord is not less than the number of tex gens (i.e. the tex coord does not
   // exist), then tex coord 0 is used (though sometimes glitchy effects happen on console).
   u32 texcoord = stage.tevorders_texcoord;
 @@ -1593,6 +1596,17 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i
   cc.hex = stage.cc;
   ac.hex = stage.ac;
 
 +  if (is_special && n == 0)
 +  {
 +    cc.a = cc.b = cc.c = TevColorArg::Zero;
 +    cc.d = TevColorArg::One;
 +  }
 +  if (is_special && n == 2)
 +  {
 +    cc.a = cc.b = cc.c = TevColorArg::Zero;
 +    cc.d = TevColorArg::Half;
 +  }
 +
   if (cc.a == TevColorArg::RasAlpha || cc.a == TevColorArg::RasColor ||
       cc.b == TevColorArg::RasAlpha || cc.b == TevColorArg::RasColor ||
       cc.c == TevColorArg::RasAlpha || cc.c == TevColorArg::RasColor ||
 ```
	040972A8 3F800000

	8024e52c: current speed modifier.

	-> 0424E52C BF800000

	7fde8228

	-----

	3590. GSWE64_2022-04-28_11-23-38.png
	3890? No, 3891.

	So, 4192?

	---

	OK, now frame 5930.

	And frame 4185... Always wait 301 frames, in any case.

	```
	diff --git a/Source/Core/VideoCommon/RenderBase.cpp b/Source/Core/VideoCommon/RenderBase.cpp
	index d895d648ed..3c8fae73ff 100644
	--- a/Source/Core/VideoCommon/RenderBase.cpp
	+++ b/Source/Core/VideoCommon/RenderBase.cpp
	@@ -1376,7 +1376,7 @@ void Renderer::Swap(u32 xfb_addr, u32 fb_width, u32 fb_stride, u32 fb_height, u6
	perf_sample.num_draw_calls = g_stats.this_frame.num_draw_calls;
	DolphinAnalytics::Instance().ReportPerformanceInfo(std::move(perf_sample));

	- if (IsFrameDumping())
	+ if (IsFrameDumping() && ((Movie::GetCurrentFrame() - (4170)) % 301) < 30)
	DumpCurrentFrame(xfb_entry->texture.get(), xfb_rect, ticks, m_frame_count);

	// Begin new frame
	```

	11409: to the right. 11410: to the left, barely. 11710: to the right. 11711: to the right, barely.
	(11710-4185)/301 is 25.

	So, we wait 301 frames, but also add (ctr - 4185)/(301*25) to the frame count to wait one extra frame. I guess?

	Ehh, doing that is honestly a bit more noticable.

	Ok, once the drops start happening, the good frames are:

	30074
	30428
	30933

	31229
	31530
	31831

	Well, those approximately work, but they break. Changing it slightly:

	```
	diff --git a/Source/Core/VideoCommon/RenderBase.cpp b/Source/Core/VideoCommon/RenderBase.cpp
	index d895d648ed..d95069a894 100644
	--- a/Source/Core/VideoCommon/RenderBase.cpp
	+++ b/Source/Core/VideoCommon/RenderBase.cpp
	@@ -1376,7 +1376,30 @@ void Renderer::Swap(u32 xfb_addr, u32 fb_width, u32 fb_stride, u32 fb_height, u6
	perf_sample.num_draw_calls = g_stats.this_frame.num_draw_calls;
	DolphinAnalytics::Instance().ReportPerformanceInfo(std::move(perf_sample));

	- if (IsFrameDumping())
	+ u64 frame = Movie::GetCurrentFrame();
	+ bool valid = false;
	+ if (frame < 30000)
	+ {
	+ frame -= 4185;
	+ // + 1 to avoid duplicate frames (the duplicate will occur when modulo is not 0)
	+ // This might be needed due to 59.94 vs 60 FPS?
	+ frame -= (frame + 1) / (301 * 25);
	+ valid = (frame % 301) == 0;
	+ }
	+ // The game lags at about this point, and the 301 rule breaks temporarily. This seems to be
	+ // the best set of frames.
	+ else if (frame < 31228)
	+ {
	+ valid = (frame == 30073 \|\| frame == 30427 \|\| frame == 30932);
	+ }
	+ else
	+ {
	+ frame -= 31228;
	+ // + 1 to avoid duplicate frames (the duplicate will occur when modulo is not 0)
	+ frame -= (frame + 1) / (301 * 25);
	+ valid = (frame % 301) == 0;
	+ }
	+ if (IsFrameDumping() && valid)
	DumpCurrentFrame(xfb_entry->texture.get(), xfb_rect, ticks, m_frame_count);

	// Begin new frame
	```

	That works but the +1 offset is noticeable and not great. I can just get rid of that.

	```
	diff --git a/Source/Core/VideoCommon/RenderBase.cpp b/Source/Core/VideoCommon/RenderBase.cpp
	index d895d648ed..78f1960653 100644
	--- a/Source/Core/VideoCommon/RenderBase.cpp
	+++ b/Source/Core/VideoCommon/RenderBase.cpp
	@@ -1376,7 +1376,25 @@ void Renderer::Swap(u32 xfb_addr, u32 fb_width, u32 fb_stride, u32 fb_height, u6
	perf_sample.num_draw_calls = g_stats.this_frame.num_draw_calls;
	DolphinAnalytics::Instance().ReportPerformanceInfo(std::move(perf_sample));

	- if (IsFrameDumping())
	+ u64 frame = Movie::GetCurrentFrame();
	+ bool valid = false;
	+ if (frame < 30000)
	+ {
	+ frame -= 4185;
	+ valid = (frame % 301) == 0;
	+ }
	+ // The game lags at about this point, and the 301 rule breaks temporarily. This seems to be
	+ // the best set of frames.
	+ else if (frame < 31224)
	+ {
	+ valid = (frame == 30070 \|\| frame == 30420 \|\| frame == 30927);
	+ }
	+ else
	+ {
	+ frame -= 31224;
	+ valid = (frame % 301) == 0;
	+ }
	+ if (IsFrameDumping() && valid)
	DumpCurrentFrame(xfb_entry->texture.get(), xfb_rect, ticks, m_frame_count);

	// Begin new frame
	```

	(cd Test4; for f in *.png; do convert $f ../Test4Alt/$f -compose difference -composite -evaluate Multiply 8 -evaluate-sequence Add ../Test4Diff/$f; done)

	ldir _173.xyz float3 -0.4001046419, 0.2027553469, -0.8937597871
	_binormal _119.xyz float3 -0.0042309803, -0.0024213707, 0.0008056075
	_tangent _108.xyz float3 0.0007323285, 0.0003421183, 0.004874411
	rawbinormal _89.xyz float3 0.00, 0.00, 1.984375
	rawtangent _84.xyz float3 1.984375, 0.00, 0.00

	ldir _173.xyz float3 -0.3507781029, -0.3892965019, -0.8517059088
	_binormal _119.xyz float3 -0.0030002145, -0.0035373855, 0.001702601
	_tangent _108.xyz float3 0.0011418356, 0.0012639464, 0.0046380903
	rawbinormal _89.xyz float3 0.00, 0.00, 1.984375
	rawtangent _84.xyz float3 1.984375, 0.00, 0.00


	(290.4000244141, 321.6000061035) to (290.3951721191, 321.600982666)
	(290.4000244141, 321.6000061035) to (290.3954467773, 321.6004943848)
	Frame 4500: (13312, 768, 15744) to (13440, -512, 15872) on screen lower left, (13568, -5120 15232) to (16396, -5632, 15232) on lower right.

	Frame 5000: (12416, -11776, 15104) to (12544, 3456, 15232) lower left, (12928, -11648, 14464) to (13056, 2560, 14592) lower right

	I think this means that 13312 < x < 16396 on frame 4500 and 12416 < x < 13056

	OK, or the big draw: a bounding box of (11904, -13056, 14080) to (13568, 14336, 15744) on frame 4500, and (12288, -12416, 14464) to (13184, 13524, 15360) on frame 5000.

	-----

	```
	13696.00 -1536.00 15872.00 0.00 1.921875 -0.46875 0.2431372553 0.00 1.00 0.9921568632 0.00 0.2431372553 0.00 1.00
	13824.00 -512.00 16000.00 0.09375 1.984375 -0.09375 0.2431372553 0.501960814 1.00 0.9843137264 0.00 0.2431372553 0.501960814 1.00
	13696.00 -3328.00 15744.00 0.00 1.96875 -0.203125 0.2392156869 0.501960814 1.00 0.9764705896 0.00 0.2392156869 0.501960814 1.00
	13824.00 -1664.00 15872.00 0.171875 1.921875 -0.46875 0.2431372553 0.00 1.00 1.00 0.501960814 0.2431372553 0.00 1.00
	```

	Between 15744 and 16000 (probably 15872).

	Let's try either 7fdf6190 or 80216190

	Code at 7fcbd60c writes it.

	That code is located at 801bd718 found by `lfs f6, -0x58A8 (rtoc)` c0c2a758 at 7fcbd618. I've moved MAIN_.text2 from 80100000 to 7fc00000 and that gives somewhat better results (not perfect though; data is still messed up).

	... wait, huh. That's NOT a match. The base needs to be 7fbfff00, instead. I wonder why?

	I've also added a new block from 7fdf0000 to 7fffffff (size 210000).

	Input parameter is 7fde813c (starts in r3, moved to r29), which I think is a vec3f of the direction to move. Also, I needed to expand the new block to start at 7fde6420 instead (to include that address).

	What if I just nop out the stores at 7fcbd60c and 7fcbd608? Hmm, that doesn't solve it :\|

	7fde813c might actually be a position. I'm not sure.

	Func at 7fcbd414 is called by 7fcc163c. Inserting a BLR at the start of 7fcc163c causes the world to stop rendering, so I'm calling it `DrawWorldMaybe`. Inserting a BLR at the start of 7fcbd414 causes the world culling to stop updating as the camera moves. I'm calling it `UpdateViewBounds` for now.

	7fde813c is written by 7fc15438. Inserting a BLR at the start of that causes the ship to keep moving but the camera to stay still. I'm calling 7fc15438 CopyCameraPos.

	I could trace things back further, but I don't think I really need to. This is good enough (as long as I combine it with invincibility, by writing 1.0 (3f800000) to 800972a8 (this is per datel) - I think this needs to happen during startup.)

	Oh, also, changing a function needs to be a real patch because otherwise if the code gets paged out the patch is undone. I don't have a good workaround for this other than choosing a location where it's unlikely to be disturbed.

	Alright, doing this works, except there's some pop-in with the clouds. But, eh, good enough.

	------

	Just to clarify my process here: I created a savestate where I was pointed straight towards the ground and about to hit it, and then recorded a fifolog. Since I was aiming right at the ground, only a bit of it was visible, and I was directly above that bit, so I could get the world coordinates by looking at the vertices in renderdoc. From that I was able to load the savestate and use Dolphin's cheat search to find a value that was similar to those world coordinates, and then I was able to find what was setting that value. Looking straight at the ground also probably helped because there were fewer page faults since less stuff was being done.

	For creating the images of the emboss effect only, this works decently well:

	```patch
	diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp
	index 4a9e9105a8..410df1b2d6 100644
	--- a/Source/Core/VideoCommon/PixelShaderGen.cpp
	+++ b/Source/Core/VideoCommon/PixelShaderGen.cpp
	@@ -1358,6 +1358,9 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i
	const auto& stage = uid_data->stagehash[n];
	out.Write("\n\t// TEV stage {}\n", n);

	+ bool is_special = uid_data->genMode_numtevstages + 1 == 7 && uid_data->stagehash[3].cc == 0x40f800 &&
	+ uid_data->stagehash[4].cc == 0x4cf802 && uid_data->stagehash[5].cc == 0x0802bf;
	+
	// Quirk: when the tex coord is not less than the number of tex gens (i.e. the tex coord does not
	// exist), then tex coord 0 is used (though sometimes glitchy effects happen on console).
	u32 texcoord = stage.tevorders_texcoord;
	@@ -1593,6 +1596,17 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i
	cc.hex = stage.cc;
	ac.hex = stage.ac;

	+ if (is_special && n == 0)
	+ {
	+ cc.a = cc.b = cc.c = TevColorArg::Zero;
	+ cc.d = TevColorArg::One;
	+ }
	+ if (is_special && n == 2)
	+ {
	+ cc.a = cc.b = cc.c = TevColorArg::Zero;
	+ cc.d = TevColorArg::Half;
	+ }
	+
	if (cc.a == TevColorArg::RasAlpha \|\| cc.a == TevColorArg::RasColor \|\|
	cc.b == TevColorArg::RasAlpha \|\| cc.b == TevColorArg::RasColor \|\|
	cc.c == TevColorArg::RasAlpha \|\| cc.c == TevColorArg::RasColor \|\|
	```