Last active
February 14, 2022 14:37
-
-
Save ped7g/cc65958c69f08564d140ffd14a9ef715 to your computer and use it in GitHub Desktop.
ZX Spectrum Next example of optimisations using the Z80N extended instructions
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- | |
; Author: Ped7g ; (C) 2022 ; license: https://opensource.org/licenses/MIT | |
; Z80N (ZX Next) assembly, sjasmplus syntax: https://github.com/z00m128/sjasmplus | |
; | |
; code-size optimisation based on facebook post with small example showcasing the usage of routines | |
; | |
; default config of example is doing the counter-clockwise rotation of buffer to screen, | |
; flip comment to get clock-wise variant: | |
computeULAFromVRamAddress EQU computeULAFromVRamAddressCCW | |
; computeULAFromVRamAddress EQU computeULAFromVRamAddressCW | |
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- | |
; Original facebook post sparking the interest: | |
/* | |
I have a 1k video buffer which represents a 32x32 character matrix starting at 0x9000. Characters are 8x8 pixels. | |
The buffer is rotated 90 degrees clockwise so the character at 0x9000 is the top of the rightmost column, 0x9020 is the top of the second column from the left etc…. | |
This routine should compute the corresponding ULA address on the ZX Spectrum Next, Including the 90 degree rotation anti clockwise so it can be displayed normally… so 0x9000 should be the top left corner of the ULA. 0x9020 the first acharacter on the secong ULA character line.. Can it be optimised? | |
Using a table of pre-calculated addresses is one option…. Are there others? | |
; Entry: DE = VRAM Address | |
; Return:- | |
;Carry set, HL = Valid ULA Address | |
;Carry Clear: VRAM Address out of range | |
computeULAFromVRamAddress | |
EX DE,HL | |
LD A,L | |
AND 0x1F | |
RLCA | |
RLCA | |
RLCA ; * 8 as needs to be pixel coordinate | |
LD E,A ; E=Y (ZXN X) | |
LD A,H | |
SUB 0x90 ; subtract base to get offset | |
SRL A | |
RR L | |
SRL A | |
RR L | |
SRL A | |
RR L | |
SRL A | |
RR L | |
SRL A | |
RR L ; divide by 32 | |
LD A,L | |
CP 0x18 ; ZXN only has 24 character lines | |
RET NC ; out of range | |
RLCA | |
RLCA | |
RLCA : convert to pixel coordinates | |
LD D,A ;D=X (ZXN Y) | |
PIXELAD | |
EX DE,HL | |
SCF | |
RET | |
*/ | |
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- | |
; ZX Spectrum Next example, showing the 32x32 map counter-clock-wise rotated (cut to 32x24) | |
DEVICE ZXSPECTRUMNEXT | |
ORG $9000 | |
stack_top: ; stack will go downward from $9000 | |
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- | |
; 32x32 ASCII map which will be displayed rotated counter-clockwise | |
VRAM: | |
;01234567890123456789012345678901; 32 chars | |
DB "- - - - - - - - - - - - - - - - " ; L00 | |
DB "C-C-C-C-C-C-C-C-C-C-C-C-C-C-C-C-" ; L01 | |
DB "0C0C0C0C0C1C1C1C1C1C2C2C2C2C2C3C" ; L02 | |
DB "00204060800121416181022242628203" ; L03 | |
DB " 1 3 5 7 9 1 3 5 7 9 1 3 5 7 9 1" ; L04 | |
DB " " ; L05 | |
DB " S L " ; L06 | |
DB " N A " ; L07 | |
DB " L SMM S iT +|!|!|!|+ L " ; L08 | |
DB "|0 AUO T nr - - 0|" ; L09 | |
DB " 9 HLT a = / = 9 " ; L10 | |
DB " OT V tn - // - " ; L11 | |
DB " NCO I es = // = " ; L12 | |
DB " L O B S xp - /'_ - L " ; L13 | |
DB "|1 IY I to = /-_ = 1|" ; L14 | |
DB " 4 TLT B s - /-_ - 4 " ; L15 | |
DB " ARA L ei = /;_ = " ; L16 | |
DB " TA E dn - m\\ )/ - " ; L17 | |
DB " L OEE ig = _ '_= L " ; L18 | |
DB "|1 R D R t - m/ ({_- 1|" ; L19 | |
DB " 9 TI O oA = \\;_> = 9 " ; L20 | |
DB " EAS W rS - \\-_ - " ; L21 | |
DB " S C = \\-_ = " ; L22 | |
DB " L IST ( iI - \\'_ - L " ; L23 | |
DB "|2 WWF C sI = \\\\ = 2|" ; L24 | |
DB " 4 -OE O - \\\\ - 4 " ; L25 | |
DB " KRL L fa = \\ = " ; L26 | |
DB " C = ur - - " ; L27 | |
DB " L OPD 0 nt +!|!|!|!+ L " ; L28 | |
DB "|2 LON 8 2|" ; L29 | |
DB " 9 CTA ) 9 " ; L30 | |
DB " " ; L31 | |
ASSERT $9000+32*32 == $ | |
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- | |
; rotate buffer CCW 90degree, using transformation of coordinates: | |
; ULA.X = 8 * VRAM.y | |
; ULA.Y = 8 * (0x1F - VRAM.x) | |
;----------------------------------------------------------------------------------------- | |
; Entry: DE = VRAM address %????'??yy'yyyx'xxxx (any buffer address not affecting coordinate values) | |
; Modifies: A, B | |
; Return: | |
; - carry set, HL = ULA address (72T) | |
; - carry clear - out of range (44T) | |
computeULAFromVRamAddressCCW: | |
ld b,3 | |
bsla de,b ; DE = %???y'yyyy'xxxx'x000 | |
ld a,8*0x1F | |
sub e ; A = ULA.Y = 8 * (0x1F - VRAM.x) | |
cp 8*24 | |
ret nc ; cf=0 => VRAM address out of range (ULA.Y >= 192) | |
ld e,d | |
bsla de,b ; E = ULA.X = 8 * VRAM.y | |
ld d,a ; D = ULA.Y | |
pixelad ; HL = ULA address, cf=1 still from `cp 8*24` | |
ret | |
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- | |
; rotate buffer CW 90degree, using transformation of coordinates: | |
; ULA.X = 8 * (0x1F - VRAM.y) | |
; ULA.Y = 8 * VRAM.x | |
;----------------------------------------------------------------------------------------- | |
; Entry: DE = VRAM address %????'??yy'yyyx'xxxx (any buffer address not affecting coordinate values) | |
; Modifies: A, B | |
; Return: | |
; - carry set, HL = ULA address (72T) | |
; - carry clear - out of range (72T) | |
computeULAFromVRamAddressCW: | |
ld b,3 | |
bsla de,b ; DE = %???y'yyyy'xxxx'x000, E = ULA.Y | |
ld a,d | |
ld d,e ; D = ULA.Y = 8 * VRAM.x | |
cpl ; 0x1F - (v & 0x1F) == (~v) & 0x1F (and upper bits are noise) | |
add a,a | |
add a,a | |
add a,a | |
ld e,a ; E = ULA.X = 8 * (0x1F - VRAM.y) | |
pixelad | |
ld a,h ; check validity of resulting address, HL >= 0x5800 had Y >= 192 | |
cp $58 ; cf=1 when address is valid | |
ret | |
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- | |
; calculate ULA attribute address from ULA pixel address | |
; Entry: HL = ULA address | |
; Return: HL = ULA attribute address | |
computeATTRFromULA: | |
ld a,h | |
rra | |
rra | |
rra | |
and 3 | |
or $58 | |
ld h,a | |
ret | |
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- | |
; print ASCII char from A to ULA address HL with default attribute (using ROM font at $3D00) | |
printCharFromDeAtHl: | |
; set attribute byte first | |
push hl | |
call computeATTRFromULA | |
ld (hl),$44 ; bright 1, paper 0, ink 4 | |
pop hl | |
; print the char itself | |
ld a,(de) ; char from VRAM to print | |
push de | |
ex de,hl | |
ld hl,$3C00/8 | |
add hl,a ; HL = char_font_address/8 | |
.3 add hl,hl ; HL *= 8 ; ROM font address of char | |
.8 ldws ; 8x LDWS will draw the char | |
pop de | |
ret | |
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- | |
; wait for scanline 192, to time any printing into vertical-retrace period | |
WaitForScanline192: | |
; read NextReg $1F - LSB of current raster line (MSB is always zero for line 192 -> ignored) | |
ld bc,$243B ; TBBLUE_REGISTER_SELECT_P_243B | |
ld a,$1F ; VIDEO_LINE_LSB_NR_1F | |
out (c),a ; select NextReg $1F | |
inc b ; BC = TBBLUE_REGISTER_ACCESS_P_253B | |
; if already at scanline 192, then wait extra whole frame (for super-fast game loops) | |
.cantStartAt192: | |
in a,(c) ; read the raster line LSB | |
cp 192 | |
jr z,.cantStartAt192 | |
; if not yet at scanline 192, wait for it ... wait for it ... | |
.waitLoop: | |
in a,(c) ; read the raster line LSB | |
cp 192 | |
jr nz,.waitLoop | |
ret | |
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- | |
; generate 16bit pseudo random value | |
; From http://map.grauw.nl/sources/external/z80bits.html#4.2 (Milos "baze" Bazelides Z80 bits) | |
Rand16: | |
; Out: HL = pseudo-random number, period 65536 | |
; modifies: A, DE | |
.s+1: ld de,0 ; seed | |
ld a,d | |
ld h,e | |
ld l,253 | |
or a | |
sbc hl,de | |
sbc a,0 | |
sbc hl,de | |
ld d,0 | |
sbc a,d | |
ld e,a | |
sbc hl,de | |
jr nc,.storeSeed | |
inc hl | |
.storeSeed: | |
ld (.s),hl | |
ret | |
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- | |
; main example code, displaying the 32x32 map in counter-clockwise rotation | |
start: | |
; border is black from NEX file definition, screen should be in ULA mode (by NEX loader reset) | |
nextreg 7,3 ; 28MHz mode | |
; display the initial print of VRAM buffer (do all 32x32 chars, handle invalid ULA addresses) | |
full_print: | |
ld de,VRAM | |
.loop: | |
push de | |
call computeULAFromVRamAddress ; cf=0 when outside of ULA screen, hl=address otherwise | |
pop de | |
call c,printCharFromDeAtHl ; print it, if HL is valid ULA address | |
inc de | |
bit 2,d | |
jr z,.loop ; loop until VRAM+0x400 is reached (end of VRAM buffer) | |
; now produce random coordinates every frame and change colour of such char and tinker with space chars | |
main_loop: | |
call WaitForScanline192 | |
.wasSpace: | |
call Rand16 | |
ld a,h | |
and $03 | |
ld h,a ; hl = random 0x000..0x3FF value (random 32x32 coordinates) | |
ex de,hl | |
add de,VRAM ; de = random address into VRAM buffer | |
ld a,(de) | |
cp ' ' | |
jr z,.wasSpace | |
call computeULAFromVRamAddress ; HL = ULA address of the random char at DE | |
jr nc,main_loop ; outside of ULA range, don't change attribute | |
call computeATTRFromULA | |
inc (hl) ; increment ink | |
res 3,(hl) ; force black paper | |
jr main_loop | |
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- | |
; create NEX file with the example | |
SAVENEX OPEN "cw32x32.nex", start, stack_top | |
SAVENEX AUTO | |
SAVENEX CLOSE |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment