Created
March 1, 2022 13:04
-
-
Save ped7g/0c4e94796b474994ed88d0bdd1bf2f25 to your computer and use it in GitHub Desktop.
Z80 asm quicksort of uint16_t array
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
; Author: Ped7g ; (C) 2022 ; license: MIT / GPL / Public Domain (pick whichever fits best for you) | |
; assembled with: https://github.com/z00m128/sjasmplus/ | |
; based on https://github.com/MartinezTorres/z80_babel project, written as comparison | |
; | |
; Notes for myself about debugging/verification: in CSpect debugger `SAVE "data.bin",from,length` | |
; Then `hexdump -v -e '/2 "%u\n"' DATA.BIN > data.txt`. Then compare with `sort -g data.txt` | |
DEFINE ORG_ADR $8080 ; using only uncontended faster memory, but smaller buffers | |
OPT --syntax=abf : CSPECTMAP "qsort.map" | |
DEVICE ZXSPECTRUM48, ORG_ADR-1 | |
ORG ORG_ADR | |
/* // C routine from z80_babel project, used for algorithm in asm code | |
void quicksort_c(uint16_t *A, uint16_t len) { | |
if (len < 2) return; | |
uint16_t pivot = A[len / 2]; | |
uint16_t i, j; | |
for (i = 0, j = len - 1; ; i++, j--) { | |
while (A[i] < pivot) i++; | |
while (A[j] > pivot) j--; | |
if (i >= j) break; | |
uint16_t temp = A[i]; | |
A[i] = A[j]; | |
A[j] = temp; | |
} | |
quicksort_c(A, i); | |
quicksort_c(A + i, len - i); | |
} | |
*/ | |
;-------------------------------------------------------------------------------------------------------------------- | |
; Quicksort, inputs (__sdcccall(1) calling convention): | |
; HL = uint16_t* A (pointer to beginning of array) | |
; DE = uint16_t len (number of word elements in array) | |
; modifies: AF, A'F', BC, DE, HL | |
; WARNING: array can't be aligned to start/end of 64ki address space, like HL == 0x0000, or having last value at 0xFFFE | |
; WARNING: stack space required is on average about 6*log(len) (depending on the data, in extreme case it may be more) | |
quicksort_a: | |
; convert arguments to HL=A.begin(), DE=A.end() and continue with quicksort_a_impl | |
ex de,hl | |
add hl,hl | |
add hl,de | |
ex de,hl | |
; | | |
; fallthrough into implementation | |
; | | |
; v | |
;-------------------------------------------------------------------------------------------------------------------- | |
; Quicksort implementation, inputs: | |
; HL = uint16_t* A.begin() (pointer to beginning of array) | |
; DE = uint16_t* A.end() (pointer beyond array) | |
; modifies: AF, A'F', BC, HL (DE is preserved) | |
quicksort_a_impl: | |
; array must be located within 0x0002..0xFFFD | |
ld c,l | |
ld b,h ; BC = A.begin() | |
; if (len < 2) return; -> if (end <= begin+2) return; | |
inc hl | |
inc hl | |
or a | |
sbc hl,de ; HL = -(2*len-2), len = (2-HL)/2 | |
ret nc ; case: begin+2 >= end <=> (len < 2) | |
push de ; preserve A.end() for recursion | |
push bc ; preserve A.begin() for recursion | |
; uint16_t pivot = A[len / 2]; | |
rr h | |
rr l | |
dec hl | |
res 0,l | |
add hl,de | |
ld a,(hl) | |
inc hl | |
ld l,(hl) | |
ld h,b | |
ld b,l | |
ld l,c | |
ld c,a ; HL = A.begin(), DE = A.end(), BC = pivot | |
; flip HL/DE meaning, it makes simpler the recursive tail and (A[j] > pivot) test | |
ex de,hl ; DE = A.begin(), HL = A.end(), BC = pivot | |
dec de ; but keep "from" address (related to A[i]) at -1 as "default" state | |
; for (i = 0, j = len - 1; ; i++, j--) { ; DE = (A+i-1).hi, HL = A+j+1 | |
.find_next_swap: | |
; while (A[j] > pivot) j--; | |
.find_j: | |
dec hl | |
ld a,b | |
sub (hl) | |
dec hl ; HL = A+j (finally) | |
jr c,.find_j ; if cf=1, A[j].hi > pivot.hi | |
jr nz,.j_found ; if zf=0, A[j].hi < pivot.hi | |
ld a,c ; if (A[j].hi == pivot.hi) then A[j].lo vs pivot.lo is checked | |
sub (hl) | |
jr c,.find_j | |
.j_found: | |
; while (A[i] < pivot) i++; | |
.find_i: | |
inc de | |
ld a,(de) | |
inc de ; DE = (A+i).hi (ahead +0.5 for swap) | |
sub c | |
ld a,(de) | |
sbc a,b | |
jr c,.find_i ; cf=1 -> A[i] < pivot | |
; if (i >= j) break; // DE = (A+i).hi, HL = A+j, BC=pivot | |
sbc hl,de ; cf=0 since `jr c,.find_i` | |
jr c,.swaps_done | |
add hl,de ; DE = (A+i).hi, HL = A+j | |
; swap(A[i], A[j]); | |
inc hl | |
ld a,(de) | |
ldd | |
ex af,af' | |
ld a,(de) | |
ldi | |
ex af,af' | |
ld (hl),a ; Swap(A[i].hi, A[j].hi) done | |
dec hl | |
ex af,af' | |
ld (hl),a ; Swap(A[i].lo, A[j].lo) done | |
inc bc | |
inc bc ; pivot value restored (was -=2 by ldd+ldi) | |
; --j; HL = A+j is A+j+1 for next loop (ready) | |
; ++i; DE = (A+i).hi is (A+i-1).hi for next loop (ready) | |
jp .find_next_swap | |
.swaps_done: | |
; i >= j, all elements were already swapped WRT pivot, call recursively for the two sub-parts | |
dec de ; DE = A+i | |
; quicksort_c(A, i); | |
pop hl ; HL = A | |
call quicksort_a_impl | |
; quicksort_c(A + i, len - i); | |
ex de,hl ; HL = A+i | |
pop de ; DE = end() (and return it preserved) | |
jp quicksort_a_impl | |
DISPLAY "quicksort_a code size: ",/A,$-quicksort_a | |
DISPLAY "quicksort_a_impl code size: ",/A,$-quicksort_a_impl | |
;-------------------------------------------------------------------------------------------------------------------- | |
; test harness to debug and tune the routine | |
test_start: | |
; check empty array and len=1 array (shouldn't touch buffer, just exit) | |
ld hl,test_array | |
ld de,0 | |
call quicksort_a | |
ld hl,test_array | |
ld de,1 | |
call quicksort_a | |
; check simple 1-9 array | |
ld hl,test_array | |
ld de,test_array.count | |
call quicksort_a | |
; sort chunk of ROM and auto-test the result by code | |
ld a,3 | |
out ($FE),a | |
ld hl,time_test_array | |
ld de,test_array | |
ld bc,2048<<1 | |
ldir | |
; sort it | |
ld hl,test_array | |
ld de,2048 | |
call quicksort_a | |
; check it a bit (just order of elements, and xor checksum) | |
ld a,2 | |
out ($FE),a ; also xor-CRC seed | |
di | |
ld (.oldsp+1),sp | |
ld bc,$FF00 | high 2048 ; B = 255, C = 8x256 -> 2047x sbc | |
ld sp,test_array | |
pop de | |
xor e | |
xor d | |
.check_order: | |
ex de,hl | |
pop de | |
xor e | |
xor d | |
scf | |
sbc hl,de | |
jr nc,$ ; bad order, get stuck | |
djnz .check_order | |
dec c | |
jr nz,.check_order | |
; check xor-CRC against ROM | |
ld sp,time_test_array | |
ld bc,high 2048 ; B = 0, C = 8x256 | |
.check_crc: | |
pop de | |
xor e | |
xor d | |
djnz .check_crc | |
dec c | |
jr nz,.check_crc | |
cp 2 | |
jr nz,$ ; bad xor-CRC, get stuck | |
.oldsp: | |
ld sp,0 | |
ld a,4 | |
out ($FE),a | |
timing_check: | |
; sync with interrupt | |
ld a,1 | |
out ($FE),a | |
ei | |
halt | |
; copy shuffled original data - RED border | |
ld a,3 | |
out ($FE),a | |
ld hl,time_test_array | |
ld de,test_array | |
ld bc,time_test_array.count<<1 | |
ldir | |
; run the sort | |
ld a,5 | |
out ($FE),a | |
ld hl,test_array | |
ld de,time_test_array.count | |
call quicksort_a | |
jr timing_check | |
ALIGN 256 | |
test_array: | |
; dw 1,2,3,4,5,6,7,8,9 | |
dw 8,6,4,2,1,3,5,7,9 | |
.count: EQU ($-test_array)/2 | |
; time_test_array: EQU $0000 | |
; .count: EQU 2048 | |
time_test_array: EQU $0000 | |
.count: EQU 74 | |
; ^ max array length sortable within single frame (20ms) in CSpect emulator (with IM1) | |
SAVESNA "qsort.sna", test_start ;; DEBUG snapshot |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment