Created
February 23, 2022 10:41
-
-
Save ped7g/c55bfa0d55ca13ce029549636cdd1de5 to your computer and use it in GitHub Desktop.
Z80 asm Erastothenes sieve to calculate prime numbers
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
; Author: Ped7g ; (C) 2022 ; license: MIT / GPL / Public Domain (pick whichever fits best for you) | |
; assembled with: https://github.com/z00m128/sjasmplus/ | |
; based on https://github.com/MartinezTorres/z80_babel project, written as comparison | |
; DEFINE ORG_ADR $5D01 ; using maximum memory in zx48 snapshot -> prime numbers: 4339, last prime number 41491 | |
; DEFINE DO_MAX_BUFFER_CASE ; (4339 primes was with shorter test-code, not last version) | |
DEFINE ORG_ADR $8080 ; using only uncontended faster memory, but smaller buffers | |
OPT --syntax=abf | |
DEVICE ZXSPECTRUM48,ORG_ADR-1 | |
ORG ORG_ADR | |
test_start: | |
IFDEF DO_MAX_BUFFER_CASE | |
case0: ; pushing the limits sieving maximum primes possible, overlapping primes and work buffer up to $FFFF | |
ld ix,primes | |
ld de,primes+2 | |
ld bc,0x10000-(primes+2) | |
call sieve_a_1 | |
nop | |
ENDIF | |
case1: | |
; test with very low work_size == 3 (early exit with single prime "2") | |
ld ix,primes | |
ld de,work | |
ld bc,3 | |
call sieve_a_1 | |
nop ; expected result HL=1, primes[0] = 2 | |
case2: | |
ld ix,primes | |
ld de,work | |
ld bc,2048 ; workload like the original z80_babel test | |
call sieve_a_1 | |
nop ; expected result HL=0x0135 (309), primes[0] = ..., 0x7F7 (2039) | |
case3: | |
ld ix,primes | |
ld de,work | |
ld bc,work_size ; 26000 to produce 2860 primes | |
call sieve_a_1 | |
nop ; expected result HL=0x0B2C (2860), primes[0] = ..., 0x658F (25999) | |
; check result if it's as expected | |
ld a,6 | |
out (254),a | |
push hl | |
add hl,hl | |
ld bc,hl ; fake | |
ld de,0 ; crc add:xor | |
ld hl,primes | |
.crc | |
ld a,e | |
xor (hl) | |
ld e,a | |
xor d | |
add a,(hl) | |
ld d,a | |
inc hl | |
dec bc | |
ld a,b | |
or c | |
jr nz,.crc | |
ld a,4 | |
ld hl,0x0DEB | |
or a | |
sbc hl,de | |
jr z,.ok0 | |
ld a,2 | |
.ok0: | |
pop hl | |
ld de,0x0B2C | |
or a | |
sbc hl,de | |
jr z,.ok1 | |
ld a,2 | |
.ok1: | |
ld hl,25999 | |
ld de,(ix-2) ; fake | |
or a | |
sbc hl,de | |
jr z,.ok2 | |
ld a,2 | |
.ok2: | |
out (254),a | |
cp 4 | |
jr nz,$ | |
.timeL: | |
ei | |
halt | |
ld a,5 | |
out (254),a | |
ld ix,primes | |
ld de,work | |
ld bc,500 ;DEBUG | |
call sieve_a_1 | |
xor a | |
out (254),a | |
jr .timeL | |
jr $ | |
/* | |
uint16_t sieve_c_1(uint16_t *primes, uint8_t *work, uint16_t work_size ) { | |
for (uint16_t i = 0; i < work_size; i++) { | |
work[i] = 0x00; | |
} | |
uint16_t *p = primes; | |
*p++ = 2; | |
for (uint16_t i = 3; i < work_size; i+=2) { | |
if (work[i]) continue; | |
*p++ = i; | |
for (uint16_t j = i + i + i; j < work_size; j += i + i) | |
work[j] = 1; | |
} | |
return p-primes; | |
} | |
*/ | |
sieve_a_1: | |
; IX = primes | |
; DE = work (can overlap primes starting from primes+2 address) | |
; BC = work_size | |
; 108 bytes | |
; primes[0] = 2; | |
xor a | |
ld (ix+0),2 | |
ld (ix+1),a | |
; if work_size is < 4, return only one prime number 2 | |
ld hl,-4 | |
add hl,bc | |
jr nc,.small_size | |
; 4 <= work_size asserted | |
push ix ; store "primes" into stack for return code, IX becomes "p" | |
inc ix | |
inc ix ; p++; (2 is already written) | |
; clear the work buffer (from 0, like C code, even if starting from 3 is enough) | |
; for (uint16_t i = 0; i < work_size; i++) work[i] = 0x00; | |
ld l,e ; HL = work | |
ld h,d | |
push hl ; preserve original "work" on stack | |
inc de | |
ld (hl),a | |
dec bc | |
ldir | |
; HL = work + work_size - 1 => inverted equals to -(work+work_size) -> useful constant for tests | |
ld a,l | |
cpl | |
ld e,a | |
ld a,h | |
cpl | |
ld d,a ; DE = -(work+work_size) | |
; outer loop checking numbers | |
; for (uint16_t i = 3; i < work_size; i+=2) { | |
ld a,1 ; useful constant to check/fill work buffer | |
ld c,a ; BC = i-2 = 1 (BC was zero from `ldir`) | |
.outer_loop: | |
pop hl | |
push hl | |
inc bc ; i += 2 | |
inc bc | |
add hl,bc ; HL = work+i | |
jr c,.outer_loop_done | |
; if (work[i]) continue; | |
cp (hl) ; set ZF (the HL may be invalid here) | |
; test (i < work_size) <=> work+i < work+work_size <=> -(work+work_size)+work+i < 0 | |
add hl,de ; preserves ZF | |
jr c,.outer_loop_done | |
jr z,.outer_loop ; "continue" part of `if (work[i])` | |
sbc hl,de ; restore HL back to work+i | |
; is prime ; HL = work+i, BC = i, DE = -(work+work_size), IX = p | |
; *p++ = i | |
ld (ix+0),c | |
ld (ix+1),b | |
inc ix | |
inc ix | |
; inner loop setting the sieve | |
; for (uint16_t j = i + i + i; j < work_size; j += i + i) work[j] = 1; | |
push bc | |
push de | |
ex de,hl | |
add hl,bc | |
jr c,.inner_setup_ov | |
add hl,bc | |
jr c,.inner_setup_ov | |
ld c,l | |
ld b,h | |
ex de,hl ; BC = i+i-(work+work_size), HL = work+i | |
pop de ; DE = -(work+work_size) | |
jr .inner_loop_entry | |
.inner_setup_ov: | |
pop de | |
pop bc | |
jr .outer_loop | |
.inner_loop: | |
sbc hl,de ; restore hl to hl+j | |
ld (hl),a ; work[j] = 1 | |
.inner_loop_entry: | |
add hl,bc ; hl += i+i + test for work_size, none of this overflow | |
jr nc,.inner_loop | |
pop bc | |
jr .outer_loop | |
.outer_loop_done: | |
; return p-primes; | |
pop de ; throw away "work" pointer | |
pop hl ; primes | |
ld a,ixl | |
sub l | |
ld l,a | |
ld a,ixh | |
sbc a,h ; cf:A:L = (p - primes) * 2; // technically 17b result | |
rra | |
rr l | |
ld h,a ; HL = number of primes | |
ret | |
.small_size: | |
ld hl,1 | |
ret ; return value 1 (number of primes in `primes`) | |
DISPLAY "sieve_a_1 code size: ",/A,$-sieve_a_1 | |
primes: ds 2*3100,$AA | |
work: ds 26000,$BB ; enough for 2860 primes, ending with 25999 | |
work_end: | |
work_size EQU work_end-work | |
SAVESNA "sieve.sna", test_start ;; DEBUG snapshot |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment