Created
May 13, 2020 05:57
-
-
Save hcs64/82d3b22247bca3dadc61d415a6662434 to your computer and use it in GitHub Desktop.
Faster PPU swizzle ucode
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// N64 'Bare Metal' 16BPP 272x240 SNES PPU 2BPP Tile 8x8 Demo by krom (Peter Lemon): | |
// Modified for faster vector ops and benchmark output (count register) by hcs: | |
arch n64.cpu | |
endian msb | |
output "PPU2BPPTile8x8.N64", create | |
fill 1052672 // Set ROM Size | |
origin $00000000 | |
base $80000000 // Entry Point Of Code | |
include "LIB/N64.INC" // Include N64 Definitions | |
include "LIB/N64_HEADER.ASM" // Include 64 Byte Header & Vector Table | |
insert "LIB/N64_BOOTCODE.BIN" // Include 4032 Byte Boot Code | |
Start: | |
include "LIB/N64_GFX.INC" // Include Graphics Macros | |
include "LIB/N64_RSP.INC" // Include RSP Macros | |
N64_INIT() // Run N64 Initialisation Routine | |
ScreenNTSC(272, 240, BPP16, $A0100000) // Screen NTSC: 272x240, 16BPP, DRAM Origin $A0100000 | |
WaitScanline($200) // Wait For Scanline To Reach Vertical Blank | |
// Convert SNES Palette To N64 TLUT | |
// Load RSP Code To IMEM | |
DMASPRD(RSPPALCode, RSPPALCodeEnd, SP_IMEM) // DMA Data Read DRAM->RSP MEM: Start Address, End Address, Destination RSP MEM Address | |
DMASPWait() // Wait For RSP DMA To Finish | |
SetSPPC(RSPPALStart) // Set RSP Program Counter: Start Address | |
StartSP() // Start RSP Execution: RSP Status = Clear Halt, Broke, Interrupt, Single Step, Interrupt On Break | |
DelayPAL: // Wait For RSP To Compute | |
lwu t0,SP_STATUS(a0) // T0 = RSP Status | |
andi t0,RSP_HLT // RSP Status &= RSP Halt Flag | |
beqz t0,DelayPAL // IF (RSP Halt Flag == 0) Delay PAL | |
nop // Delay Slot | |
// Copy SNES Clear Color To RDP List | |
la a0,N64TLUT // A0 = N64 TLUT Address | |
la a1,RDPSNESCLEARCOL+4 // A1 = N64 RDP SNES Clear Color Address | |
lhu t0,0(a0) // T0 = TLUT Color 0 | |
sh t0,0(a1) // Store Color 0 To RDP Fill Color Hi | |
sh t0,2(a1) // Store Color 0 To RDP Fill Color Lo | |
// Convert SNES Tiles To N64 Linear Texture | |
// Load RSP Code To IMEM | |
DMASPRD(RSPTILECode, RSPTILECodeEnd, SP_IMEM) // DMA Data Read DRAM->RSP MEM: Start Address, End Address, Destination RSP MEM Address | |
DMASPWait() // Wait For RSP DMA To Finish | |
SetSPPC(RSPTILEStart) // Set RSP Program Counter: Start Address | |
mtc0 r0, 9 // Clear count | |
StartSP() // Start RSP Execution: RSP Status = Clear Halt, Broke, Interrupt, Single Step, Interrupt On Break | |
DelayTILES: // Wait For RSP To Compute | |
lwu t0,SP_STATUS(a0) // T0 = RSP Status | |
andi t0,RSP_HLT // RSP Status &= RSP Halt Flag | |
beqz t0,DelayTILES // IF (RSP Halt Flag == 0) Delay TILES | |
nop // Delay Slot | |
mfc0 t0, 9 | |
la t1, RSP_Cycles | |
sw t0, 0(t1) | |
// Convert SNES Tile Map To RDP List | |
la a0,SNESMAP // A0 = SNES Tile Map Address | |
la a1,$A0000000|((RDPSNESTILE+12)&$3FFFFFF) // A1 = N64 RDP SNES Tile Map Address | |
la a2,N64TILE // A2 = N64 Tile Address | |
ori t0,r0,895 // T0 = Number Of Tiles To Convert | |
MAPLoop: | |
lbu t1,0(a0) // T1 = SNES Tile Map # Lo Byte | |
lbu t2,1(a0) // T2 = SNES Tile Map # Hi Byte | |
addiu a0,2 // A0 += 2 | |
sll t2,8 // T2 <<= 8 | |
or t1,t2 // T1 != T2 | |
sll t1,5 // T1 *= 32 | |
addu t1,a2 // T1 += N64 Tile Address | |
sw t1,0(a1) // Store SNES Tile Map # To N64 RDP SNES Tile Map | |
addiu a1,40 // A1 += 40 | |
bnez t0,MAPLoop // IF (Number Of Tiles To Convert != 0) Map Loop | |
subiu t0,1 // Decrement Number Of Tiles To Convert (Delay Slot) | |
WaitScanline($200) // Wait For Scanline To Reach Vertical Blank | |
DPC(RDPBuffer, RDPBufferEnd) // Run DPC Command Buffer: Start Address, End Address | |
// Wait for DP to finish with the screen | |
lui t0, MI_BASE | |
-;lw t1, MI_INTR(t0) | |
andi t1, 1<<5 // DP | |
beqz t1,- | |
nop | |
la a0, RSP_Cycles | |
lw a0, 0(a0) | |
la a1, $A0100000+((64*272)+64)*2 // framebuffer pos | |
la t2, digits | |
li t3, 10 | |
digit_loop: | |
divu a0, t3 | |
mfhi a0 // remainder | |
sll a0, 3 | |
addu a0, t2 | |
ld t0, 0(a0) | |
li t4, 7 // rows to do | |
-;li t5, 7 // pixels to do | |
-;bltz t0,+ | |
lli t6, 0xfffe | |
lli t6, 0x0000 | |
+;sh t6, 0(a1) | |
addi a1, 2 | |
dsll t0, 1 | |
bnez t5,- | |
addi t5, -1 | |
addi a1, (272*2)-(8*2) // move down a line | |
bnez t4,-- | |
addi t4, -1 | |
mflo a0 // quotient | |
nop | |
nop | |
addi a1, -((8*2)+(8*272*2)) // move back a char | |
bnez a0, digit_loop | |
nop | |
Loop: | |
j Loop | |
nop // Delay Slot | |
align(4) | |
RSP_Cycles:;dd 0 | |
align(8) | |
digits: | |
///////////////////// | |
// $30: 0 | |
db %00111010 | |
db %01100100 | |
db %01001010 | |
db %01010010 | |
db %00100110 | |
db %01011100 | |
db %00000000 | |
db %00000000 | |
// $31: 1 | |
db %00011000 | |
db %00111000 | |
db %00011000 | |
db %00011000 | |
db %00011000 | |
db %00111100 | |
db %00000000 | |
db %00000000 | |
// $32: 2 | |
db %00111000 | |
db %01001100 | |
db %00001100 | |
db %00011000 | |
db %00110000 | |
db %01111110 | |
db %00000000 | |
db %00000000 | |
// $33: 3 | |
db %00111100 | |
db %01000110 | |
db %00011100 | |
db %00000110 | |
db %01000110 | |
db %00111100 | |
db %00000000 | |
db %00000000 | |
// $34: 4 | |
db %00001100 | |
db %00011100 | |
db %00101100 | |
db %01001100 | |
db %01111110 | |
db %00001100 | |
db %00000000 | |
db %00000000 | |
// $35: 5 | |
db %01111110 | |
db %01100000 | |
db %01111100 | |
db %00000110 | |
db %01000110 | |
db %00111100 | |
db %00000000 | |
db %00000000 | |
// $36: 6 | |
db %00111100 | |
db %01100000 | |
db %01111100 | |
db %01100110 | |
db %01100110 | |
db %00111100 | |
db %00000000 | |
db %00000000 | |
// $37: 7 | |
db %01111110 | |
db %01100110 | |
db %00001100 | |
db %00111110 | |
db %00011000 | |
db %00011000 | |
db %00000000 | |
db %00000000 | |
// $38: 8 | |
db %00111100 | |
db %01100110 | |
db %00111100 | |
db %01100110 | |
db %01100110 | |
db %00111100 | |
db %00000000 | |
db %00000000 | |
// $39: 9 | |
db %00111100 | |
db %01100110 | |
db %01100110 | |
db %00111110 | |
db %00000110 | |
db %00111100 | |
db %00000000 | |
db %00000000 | |
align(8) // Align 64-Bit | |
N64TLUT: | |
fill 512 // Generates 512 Bytes Containing $00 | |
align(8) // Align 64-Bit | |
N64TILE: | |
fill 65536 // Generates 65536 Bytes Containing $00 | |
align(8) // Align 64-Bit | |
insert SNESPAL, "BG.pal" | |
align(8) // Align 64-Bit | |
insert SNESTILE, "BG.pic" | |
align(8) // Align 64-Bit | |
insert SNESMAP, "BG.map" // SNES 32x32 Background Tile Map (2048 Bytes) | |
align(8) // Align 64-Bit | |
RSPPALData: | |
base $0000 // Set Base Of RSP Data Object To Zero | |
// Uses Whole Vector For 1st 8 Colors To Preserve SNES Palette Color 0 Alpha | |
// Uses Element 9 To OR Vector By Scalar $0001 For Other Colors | |
AlphaOR: | |
dh $0000, $0001, $0001, $0001, $0001, $0001, $0001, $0001 | |
// 1 * $0000, 7 * $0001 (OR Alpha 1 Bit) (1st 8 Colors) | |
// $0001 (OR Alpha 1 Bit) (Other Colors) (e9) | |
// Uses Elements 8..12 To AND Vector By Scalar | |
ANDByte: | |
dh $00FF, $FF00, $001F, $03E0, $7C00, $0000, $0000, $0000 | |
// $00FF (AND Lo Byte) (e8) | |
// $FF00 (AND Hi Byte) (e9) | |
// $001F (AND Red 5 Bits) (e10) | |
// $03E0 (AND Green 5 Bits) (e11) | |
// $7C00 (AND Blue 5 Bits) (e12) | |
// Uses Elements 8..11 To Multiply Vector By Scalar For Pseudo Vector Shifts | |
PALShift: | |
dh $0100, $0800, $0002, $0080 | |
// $0100 (Left Shift Using Multiply: << 8), (Right Shift Using Multiply: >> 8) (Big-Endian Convert) (e8) | |
// $0800 (Left Shift Using Multiply: << 11) (Red) (e9) | |
// $0002 (Left Shift Using Multiply: << 1) (Green) (e10) | |
// $0080 (Right Shift Using Multiply: >> 9) (Blue) (e11) | |
align(8) // Align 64-Bit | |
base RSPPALData+pc() // Set End Of RSP Data Object | |
RSPPALDataEnd: | |
align(8) // Align 64-Bit | |
RSPSHIFTData: | |
base $0000 // Set Base Of RSP Data Object To Zero | |
// Uses Elements 8..15 To Multiply Vector By Scalar For Pseudo Vector Shifts | |
ShiftLeftRightA: | |
dh $0001, $0002, $0004, $0008, $0010, $0020, $0040, $0080 | |
// $0001 (Left Shift Using Multiply: << 0), (Right Shift Using Multiply: >> 16) (e8) | |
// $0002 (Left Shift Using Multiply: << 1), (Right Shift Using Multiply: >> 15) (e9) | |
// $0004 (Left Shift Using Multiply: << 2), (Right Shift Using Multiply: >> 14) (e10) | |
// $0008 (Left Shift Using Multiply: << 3), (Right Shift Using Multiply: >> 13) (e11) | |
// $0010 (Left Shift Using Multiply: << 4), (Right Shift Using Multiply: >> 12) (e12) | |
// $0020 (Left Shift Using Multiply: << 5), (Right Shift Using Multiply: >> 11) (e13) | |
// $0040 (Left Shift Using Multiply: << 6), (Right Shift Using Multiply: >> 10) (e14) | |
// $0080 (Left Shift Using Multiply: << 7), (Right Shift Using Multiply: >> 9) (e15) | |
ShiftLeftRightB: | |
dh $0100, $0200, $0400, $0800, $1000, $2000, $4000, $8000 | |
// $0100 (Left Shift Using Multiply: << 8), (Right Shift Using Multiply: >> 8) (e8) | |
// $0200 (Left Shift Using Multiply: << 9), (Right Shift Using Multiply: >> 7) (e9) | |
// $0400 (Left Shift Using Multiply: << 10), (Right Shift Using Multiply: >> 6) (e10) | |
// $0800 (Left Shift Using Multiply: << 11), (Right Shift Using Multiply: >> 5) (e11) | |
// $1000 (Left Shift Using Multiply: << 12), (Right Shift Using Multiply: >> 4) (e12) | |
// $2000 (Left Shift Using Multiply: << 13), (Right Shift Using Multiply: >> 3) (e13) | |
// $4000 (Left Shift Using Multiply: << 14), (Right Shift Using Multiply: >> 2) (e14) | |
// $8000 (Left Shift Using Multiply: << 15), (Right Shift Using Multiply: >> 1) (e15) | |
ANDBit: | |
dh $0101, $0202, $0404, $0808 | |
dh $1010, $2020, $4040, $8080 | |
// Pseudo shifts (and adds) to put bits in 12,11 and 8,7 (for sfv) | |
ShiftMux0: | |
dh 1<<(16+( 7- 8)) // -16 | |
dh 1<< (11- 9) // unused, combined with mux1 | |
dh 1<<(16+( 7-10)) // -16 | |
dh 1<< (11-11) // unused, combined with mux1 | |
dh 1<<(16+( 7-12)) // -16 | |
dh 1<<(16+(11-13)) // -16 | |
dh 1<<(16+( 7-14)) // -16 | |
dh 1<<(16+(11-15)) // -16 | |
ShiftMux1: | |
dh 1<<( 8- 0) | |
dh (1<<(12- 1))|(1<<(11-9)) | |
dh 1<<( 8- 2) | |
dh (1<<(12- 3))|(1<<(11-11)) | |
dh 1<<( 8- 4) | |
dh 1<<(12- 5) | |
dh 1<<( 8- 6) | |
dh 1<<(12- 7) | |
// These may work for pre-swapped tiles | |
ShiftMux2: | |
dh (1<< ( 7- 0)) + (1<<(8-8)) | |
dh (1 << (11- 1)) + (1 << (12 - 9)) | |
dh 1<< ( 7- 2) | |
dh (1<< (11- 3)) + (1<<(12-11)) | |
dh 1<< ( 7- 4) | |
dh 1<< (11- 5) | |
dh 1<< ( 7- 6) | |
dh 1<< (11- 7) | |
ShiftMux3: | |
dh 1<< ( 8- 8) // unused, combined with mux0 | |
dh 1<< (12- 9) // unused, combined with mux0 | |
dh 1<<(16+( 8-10)) // -16 | |
dh 1<< (12-11) // unused, combined with mux0 | |
dh 1<<(16+( 8-12)) // -16 | |
dh 1<<(16+(12-13)) // -16 | |
dh 1<<(16+( 8-14)) // -16 | |
dh 1<<(16+(12-15)) // -16 | |
align(8) // Align 64-Bit | |
base RSPSHIFTData+pc() // Set End Of RSP Data Object | |
RSPSHIFTDataEnd: | |
align(8) // Align 64-Bit | |
RSPPALCode: | |
arch n64.rsp | |
base $0000 // Set Base Of RSP Code Object To Zero | |
RSPPALStart: | |
// Load Static Palette Data | |
RSPDMASPRD(RSPPALData, RSPPALDataEnd, SP_DMEM) // RSP DMA Data Read DRAM->RSP MEM: Start Address, End Address, Destination RSP MEM Address | |
RSPDMASPWait() // Wait For RSP DMA To Finish | |
lqv v0[e0],AlphaOR(r0) // V0 = 1 * $0000, 7 * $0001 (OR Alpha 1 Bit) (128-Bit Quad) | |
lqv v1[e0],ANDByte(r0) // V1 = AND Lo/Hi/Red/Green/Blue Bytes (128-Bit Quad) | |
ldv v2[e0],PALShift(r0) // V2 = Shift Using Multiply: Red/Green/Blue (64-Bit Double) | |
// Decode Colors | |
ori a0,r0,0 // A0 = Palette Start Offset | |
la a1,N64TLUT // A1 = Aligned DRAM Physical RAM Offset ($00000000..$007FFFFF 8MB) | |
la a2,SNESPAL // A2 = Aligned DRAM Physical RAM Offset ($00000000..$007FFFFF 8MB) | |
ori t0,r0,511 // T0 = Length Of DMA Transfer In Bytes - 1 | |
ori t1,r0,30 // T1 = Color Counter | |
mtc0 a0,c0 // Store Memory Offset To SP Memory Address Register ($A4040000) | |
mtc0 a2,c1 // Store RAM Offset To SP DRAM Address Register ($A4040004) | |
mtc0 t0,c2 // Store DMA Length To SP Read Length Register ($A4040008) | |
RSPDMASPWait() // Wait For RSP DMA To Finish | |
// Vector Grab 1st 8 Colors: | |
lqv v3[e0],0(a0) // V3 = Palette Colors 0..7 | |
vand v4,v3,v1[e8] // V4 = Lo Byte Color 0..7 (& $00FF) | |
vand v5,v3,v1[e9] // V5 = Hi Byte Color 0..7 (& $FF00) | |
vmudn v4,v2[e8] // V4 = Lo Byte Color 0..7 << 8 | |
vmudl v5,v2[e8] // V5 = Hi Byte Color 0..7 >> 8 | |
vor v4,v5[e0] // V4 = Color 0..7 Big-Endian | |
vand v5,v4,v1[e10] // V5 = RED 5 Bits, Color 0..7 (& $001F) | |
vmudn v5,v2[e9] // V5 = RED 5 Bits, Color 0..7 << 11 | |
vand v6,v4,v1[e11] // V6 = GREEN 5 Bits, Color 0..7 (& $03E0) | |
vmudn v6,v2[e10] // V6 = GREEN 5 Bits, Color 0..7 << 1 | |
vor v5,v6[e0] // V5 = RED,GREEN 10 Bits, Color 0..7 | |
vand v6,v4,v1[e12] // V6 = BLUE 5 Bits, Color 0..7 (& $7C00) | |
vmudl v6,v2[e11] // V6 = BLUE 5 Bits, Color 0..7 >> 9 | |
vor v5,v6[e0] // V5 = RED,GREEN,BLUE 15 Bits, Color 0..7 | |
vor v5,v0[e0] // V5 = RED,GREEN,BLUE,ALPHA 16 Bits, Color 0..7 | |
// Store Colors 0..7: | |
sqv v5[e0],0(a0) // Palette Colors 0..8 = V5 Quad | |
LoopColors: | |
// Vector Grab Next 8 Colors: | |
addi a0,16 | |
lqv v3[e0],0(a0) // V3 = Palette Colors 0..7 | |
vand v4,v3,v1[e8] // V4 = Lo Byte Color 0..7 (& $00FF) | |
vand v5,v3,v1[e9] // V5 = Hi Byte Color 0..7 (& $FF00) | |
vmudn v4,v2[e8] // V4 = Lo Byte Color 0..7 << 8 | |
vmudl v5,v2[e8] // V5 = Hi Byte Color 0..7 >> 8 | |
vor v4,v5[e0] // V4 = Color 0..7 Big-Endian | |
vand v5,v4,v1[e10] // V5 = RED 5 Bits, Color 0..7 (& $001F) | |
vmudn v5,v2[e9] // V5 = RED 5 Bits, Color 0..7 << 11 | |
vand v6,v4,v1[e11] // V6 = GREEN 5 Bits, Color 0..7 (& $03E0) | |
vmudn v6,v2[e10] // V6 = GREEN 5 Bits, Color 0..7 << 1 | |
vor v5,v6[e0] // V5 = RED,GREEN 10 Bits, Color 0..7 | |
vand v6,v4,v1[e12] // V6 = BLUE 5 Bits, Color 0..7 (& $7C00) | |
vmudl v6,v2[e11] // V6 = BLUE 5 Bits, Color 0..7 >> 9 | |
vor v5,v6[e0] // V5 = RED,GREEN,BLUE 15 Bits, Color 0..7 | |
vor v5,v0[e9] // V5 = RED,GREEN,BLUE,ALPHA 16 Bits, Color 0..7 | |
// Store Colors 0..7: | |
sqv v5[e0],0(a0) // Palette Colors 0..8 = V5 Quad | |
bnez t1,LoopColors // IF (Tile Counter != 0) Loop Colors | |
subi t1,1 // Decrement Color Counter (Delay Slot) | |
ori a0,r0,0 // A0 = SP Memory Address Offset DMEM ($A4000000..$A4001FFF 8KB) | |
ori t0,r0,511 // T0 = Length Of DMA Transfer In Bytes - 1 | |
mtc0 a0,c0 // Store Memory Offset To SP Memory Address Register ($A4040000) | |
mtc0 a1,c1 // Store RAM Offset To SP DRAM Address Register ($A4040004) | |
mtc0 t0,c3 // Store DMA Length To SP Write Length Register ($A404000C) | |
RSPDMASPWait() // Wait For RSP DMA To Finish | |
break // Set SP Status Halt, Broke & Check For Interrupt | |
align(8) // Align 64-Bit | |
base RSPPALCode+pc() // Set End Of RSP Code Object | |
RSPPALCodeEnd: | |
align(8) // Align 64-Bit | |
RSPTILECode: | |
arch n64.rsp | |
base $0000 // Set Base Of RSP Code Object To Zero | |
RSPTILEStart: | |
// Load Static Shift Data | |
RSPDMASPRD(RSPSHIFTData, RSPSHIFTDataEnd, SP_DMEM) // RSP DMA Data Read DRAM->RSP MEM: Start Address, End Address, Destination RSP MEM Address | |
RSPDMASPWait() // Wait For RSP DMA To Finish | |
lqv v8[e0],ShiftMux0(r0) | |
lqv v9[e0],ShiftMux1(r0) | |
lqv v10[e0],ANDBit(r0) | |
// Decode Tiles | |
ori t3,r0,7 // T3 = Tile Block Repeat Counter | |
ori a0,r0,2048 // A0 = SNES Tile Start Offset | |
la a1,N64TILE // A1 = Aligned DRAM Physical RAM Offset ($00000000..$007FFFFF 8MB) | |
la a2,SNESTILE // A2 = Aligned DRAM Physical RAM Offset ($00000000..$007FFFFF 8MB) | |
ori a3,r0,0 // A3 = N64 Tile Start Offset | |
LoopTileBlocks: | |
// Uses DMA To Copy 2048 Bytes To DMEM, For 2BPPSNES->4BPPN64 | |
ori t0,r0,2047 // T0 = Length Of DMA Transfer In Bytes - 1 | |
ori t1,r0,127 // T1 = Tile Counter | |
mtc0 a0,c0 // Store Memory Offset To SP Memory Address Register ($A4040000) | |
mtc0 a2,c1 // Store RAM Offset To SP DRAM Address Register ($A4040004) | |
mtc0 t0,c2 // Store DMA Length To SP Read Length Register ($A4040008) | |
RSPDMASPWait() // Wait For RSP DMA To Finish | |
//j skip3 | |
//nop | |
lqv v11[e0], 0(a0) // V3 = Tile BitPlane 0,1 Row 0..7 | |
LoopTiles: | |
// AND out individual bits | |
vand v7,v11,v10[e15] | |
vand v6,v11,v10[e14] | |
vand v5,v11,v10[e13] | |
vand v4,v11,v10[e12] | |
vand v3,v11,v10[e11] | |
vand v2,v11,v10[e10] | |
vand v1,v11,v10[e9] | |
vand v0,v11,v10[e8] | |
// Prefetch the next tile | |
lqv v11[e0], 16(a0) // V3 = Tile BitPlane 0,1 Row 0..7 | |
addiu a0, 16 | |
// Column 7,6 | |
vmudl v12,v7,v8[e15] | |
vmadn v12,v7,v9[e15] | |
vmadl v12,v6,v8[e14] | |
vmadn v12,v6,v9[e14] | |
// Column 5,4 | |
vmudl v13,v5,v8[e13] | |
vmadn v13,v5,v9[e13] | |
vmadl v13,v4,v8[e12] | |
vmadn v13,v4,v9[e12] | |
// Column 3,2 | |
vmudn v14,v3,v9[e11] | |
vmadl v14,v2,v8[e10] | |
vmadn v14,v2,v9[e10] | |
// Column 1,0 | |
vmudn v15,v1,v9[e9] | |
vmadl v15,v0,v8[e8] | |
vmadn v15,v0,v9[e8] | |
if 1 == 1 { | |
// TODO: would it be better to interleave these stores with the multiplies above? | |
// Store Columns 7,6 | |
sfv v12[e0],0(a3) | |
sfv v12[e8],16(a3) | |
addi a3, 1 | |
// Store Columns 5,4 | |
sfv v13[e0],0(a3) | |
sfv v13[e8],16(a3) | |
addi a3, 1 | |
// Store Columns 3,2 | |
sfv v14[e0],0(a3) | |
sfv v14[e8],16(a3) | |
addi a3, 1 | |
// Store Columns 1,0 | |
sfv v15[e0],0(a3) | |
sfv v15[e8],16(a3) | |
addi a3, 1+(32-4) | |
} else { | |
// This is a workaround for cen64 not having sfv support. | |
mfc2 t0,v12[e0] | |
srl t2, t0, 7 | |
sb t2, 0(a3) | |
addiu a3, 1 | |
mfc2 t0,v13[e0] | |
srl t2, t0, 7 | |
sb t2, 0(a3) | |
addiu a3, 1 | |
mfc2 t0,v14[e0] | |
srl t2, t0, 7 | |
sb t2, 0(a3) | |
addiu a3, 1 | |
mfc2 t0,v15[e0] | |
srl t2, t0, 7 | |
sb t2, 0(a3) | |
addiu a3, 1 | |
mfc2 t0,v12[e2] | |
srl t2, t0, 7 | |
sb t2, 0(a3) | |
addiu a3, 1 | |
mfc2 t0,v13[e2] | |
srl t2, t0, 7 | |
sb t2, 0(a3) | |
addiu a3, 1 | |
mfc2 t0,v14[e2] | |
srl t2, t0, 7 | |
sb t2, 0(a3) | |
addiu a3, 1 | |
mfc2 t0,v15[e2] | |
srl t2, t0, 7 | |
sb t2, 0(a3) | |
addiu a3, 1 | |
mfc2 t0,v12[e4] | |
srl t2, t0, 7 | |
sb t2, 0(a3) | |
addiu a3, 1 | |
mfc2 t0,v13[e4] | |
srl t2, t0, 7 | |
sb t2, 0(a3) | |
addiu a3, 1 | |
mfc2 t0,v14[e4] | |
srl t2, t0, 7 | |
sb t2, 0(a3) | |
addiu a3, 1 | |
mfc2 t0,v15[e4] | |
srl t2, t0, 7 | |
sb t2, 0(a3) | |
addiu a3, 1 | |
mfc2 t0,v12[e6] | |
srl t2, t0, 7 | |
sb t2, 0(a3) | |
addiu a3, 1 | |
mfc2 t0,v13[e6] | |
srl t2, t0, 7 | |
sb t2, 0(a3) | |
addiu a3, 1 | |
mfc2 t0,v14[e6] | |
srl t2, t0, 7 | |
sb t2, 0(a3) | |
addiu a3, 1 | |
mfc2 t0,v15[e6] | |
srl t2, t0, 7 | |
sb t2, 0(a3) | |
addiu a3, 1 | |
mfc2 t0,v12[e8] | |
srl t2, t0, 7 | |
sb t2, 0(a3) | |
addiu a3, 1 | |
mfc2 t0,v13[e8] | |
srl t2, t0, 7 | |
sb t2, 0(a3) | |
addiu a3, 1 | |
mfc2 t0,v14[e8] | |
srl t2, t0, 7 | |
sb t2, 0(a3) | |
addiu a3, 1 | |
mfc2 t0,v15[e8] | |
srl t2, t0, 7 | |
sb t2, 0(a3) | |
addiu a3, 1 | |
mfc2 t0,v12[e10] | |
srl t2, t0, 7 | |
sb t2, 0(a3) | |
addiu a3, 1 | |
mfc2 t0,v13[e10] | |
srl t2, t0, 7 | |
sb t2, 0(a3) | |
addiu a3, 1 | |
mfc2 t0,v14[e10] | |
srl t2, t0, 7 | |
sb t2, 0(a3) | |
addiu a3, 1 | |
mfc2 t0,v15[e10] | |
srl t2, t0, 7 | |
sb t2, 0(a3) | |
addiu a3, 1 | |
mfc2 t0,v12[e12] | |
srl t2, t0, 7 | |
sb t2, 0(a3) | |
addiu a3, 1 | |
mfc2 t0,v13[e12] | |
srl t2, t0, 7 | |
sb t2, 0(a3) | |
addiu a3, 1 | |
mfc2 t0,v14[e12] | |
srl t2, t0, 7 | |
sb t2, 0(a3) | |
addiu a3, 1 | |
mfc2 t0,v15[e12] | |
srl t2, t0, 7 | |
sb t2, 0(a3) | |
addiu a3, 1 | |
mfc2 t0,v12[e14] | |
srl t2, t0, 7 | |
sb t2, 0(a3) | |
addiu a3, 1 | |
mfc2 t0,v13[e14] | |
srl t2, t0, 7 | |
sb t2, 0(a3) | |
addiu a3, 1 | |
mfc2 t0,v14[e14] | |
srl t2, t0, 7 | |
sb t2, 0(a3) | |
addiu a3, 1 | |
mfc2 t0,v15[e14] | |
srl t2, t0, 7 | |
sb t2, 0(a3) | |
addiu a3, 1 | |
} | |
bnez t1,LoopTiles // IF (Tile Counter != 0) Loop Tiles | |
subi t1,1 // Decrement Tile Counter (Delay Slot) | |
skip3: | |
// DMA out tiles | |
ori a0,r0,0 // A0 = SP Memory Address Offset DMEM ($A4000000..$A4001FFF 8KB) | |
// Uses DMA & To Copy 128 Tiles (4096 Bytes) To RDRAM | |
li t0,4095 | |
mtc0 a0,c0 // Store Memory Offset To SP Memory Address Register ($A4040000) | |
mtc0 a1,c1 // Store RAM Offset To SP DRAM Address Register ($A4040004) | |
mtc0 t0,c3 // Store DMA Length To SP Write Length Register ($A404000C) | |
RSPDMASPWait() // Wait For RSP DMA To Finish | |
ori a0,r0,2048 // A0 = SNES Tile Start Offset | |
addiu a1,4096 // A1 = Next N64 Tile Offset | |
addiu a2,2048 // A2 = Next SNES Tile Offset | |
ori a3,r0,0 // A3 = Tile Start Offset | |
bnez t3,LoopTileBlocks // IF (Tile Block Repeat Counter != 0) Loop Tile Blocks | |
subi t3,1 // Decrement Tile Block Repeat Counter (Delay Slot) | |
break // Set SP Status Halt, Broke & Check For Interrupt | |
align(8) // Align 64-Bit | |
base RSPTILECode+pc() // Set End Of RSP Code Object | |
RSPTILECodeEnd: | |
align(8) // Align 64-Bit | |
RDPBuffer: | |
arch n64.rdp | |
Set_Scissor 8<<2,8<<2, 0,0, 264<<2,232<<2 // Set Scissor: XH 8.0,YH 8.0, Scissor Field Enable Off,Field Off, XL 264.0,YL 232.0 | |
Set_Other_Modes CYCLE_TYPE_FILL // Set Other Modes | |
Set_Color_Image IMAGE_DATA_FORMAT_RGBA,SIZE_OF_PIXEL_16B,272-1, $00100000 // Set Color Image: FORMAT RGBA,SIZE 16B,WIDTH 272, DRAM ADDRESS $00100000 | |
RDPSNESCLEARCOL: | |
Set_Fill_Color $00010001 // Set Fill Color: PACKED COLOR 16B R5G5B5A1 Pixels | |
Fill_Rectangle 271<<2,239<<2, 0<<2,0<<2 // Fill Rectangle: XL 271.0,YL 239.0, XH 0.0,YH 0.0 | |
Set_Other_Modes EN_TLUT|SAMPLE_TYPE|BI_LERP_0|ALPHA_DITHER_SEL_NO_DITHER|B_M2A_0_1|FORCE_BLEND|IMAGE_READ_EN // Set Other Modes | |
Set_Combine_Mode $0,$00, 0,0, $1,$01, $0,$F, 1,0, 0,0,0, 7,7,7 // Set Combine Mode: SubA RGB0,MulRGB0, SubA Alpha0,MulAlpha0, SubA RGB1,MulRGB1, SubB RGB0,SubB RGB1, SubA Alpha1,MulAlpha1, AddRGB0,SubB Alpha0,AddAlpha0, AddRGB1,SubB Alpha1,AddAlpha1 | |
Set_Texture_Image IMAGE_DATA_FORMAT_RGBA,SIZE_OF_PIXEL_16B,1-1, N64TLUT // Set Texture Image: FORMAT RGBA,SIZE 16B,WIDTH 1, N64TLUT DRAM ADDRESS | |
Set_Tile 0,0,0, $100, 0,0, 0,0,0,0, 0,0,0,0 // Set Tile: TMEM Address $100, Tile 0 | |
Load_Tlut 0<<2,0<<2, 0, 255<<2,0<<2 // Load Tlut: SL 0.0,TL 0.0, Tile 0, SH 255.0,TH 0.0 | |
Sync_Tile // Sync Tile | |
// BG Column 0..31 / Row 0..27 | |
Set_Tile IMAGE_DATA_FORMAT_COLOR_INDX,SIZE_OF_PIXEL_4B,1, $000, 0,0, 0,0,0,0, 0,0,0,0 // Set Tile: FORMAT COLOR INDEX,SIZE 4B,Tile Line Size 1 (64bit Words), TMEM Address $000, Tile 0 | |
RDPSNESTILE: | |
define y(0) | |
while {y} < 28 { | |
define x(0) | |
while {x} < 32 { | |
Sync_Tile // Sync Tile | |
Set_Texture_Image IMAGE_DATA_FORMAT_COLOR_INDX,SIZE_OF_PIXEL_8B,4-1, N64TILE+(32*(({y}*32)+{x})) // Set Texture Image: FORMAT COLOR INDEX,SIZE 8B,WIDTH 4, Tile DRAM ADDRESS | |
Load_Tile 0<<2,0<<2, 0, 7<<2,7<<2 // Load Tile: SL,TL, Tile, SH,TH | |
Texture_Rectangle (16+({x}*8))<<2,(16+({y}*8))<<2, 0, (8+({x}*8))<<2,(8+({y}*8))<<2, 0<<5,0<<5, 1<<10,1<<10 // Texture Rectangle: XL,YL, Tile, XH,YH, S,T, DSDX,DTDY | |
evaluate x({x} + 1) | |
} | |
evaluate y({y} + 1) | |
} | |
Sync_Full // Ensure�Entire�Scene�Is�Fully�Drawn | |
RDPBufferEnd: |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment