raybellis · October 1, 2020 10:06
diff --git a/div10.s b/div10.s
 ; input value location
 .exportzp	X0 := $10
 .exportzp	X1 := X0 + 1

 ; final result appears in [R3:R2]
 .exportzp	R0 := X1 + 1
 .exportzp	R1 := R0 + 1
 .exportzp	R2 := R0 + 2
 .exportzp	R3 := R0 + 3

 ; scratch memory
 .exportzp	Y0 := R3 + 1
 .exportzp	Y1 := Y0 + 1
 .exportzp	Y2 := Y0 + 2

 ;
 ; algorithm is to multiply by 6554 and divide by 65536 (2^16)
 ; 6554 = 0x199A
 ;
 ; r = (x << 12) + (x << 11) + (x << 8) + (x << 7) + (x << 4) + (x << 3) + (x << 1)
 ;
 ; but noting that there are some consecutive pairs of bits, generate
 ;   y = (x << 4) + (x << 3)
 ; and then use:
 ;   r = (y << 8) + (y << 4) + y + (x << 1)
 ;

 .macro	shift_y	_n
 	.repeat _n
 	asl	Y0
 	rol	Y1
 	rol	Y2
 	.endrep
 .endmacro

 .macro	add	_x, _y
 	lda	_x
 	adc	_y
 	sta	_y
 .endmacro

 .org	$0
 	jsr	div10
 	brk
 	.dword	0
 	.dword	0
 	.dword	0
 	.word	60000
 	.word	0
 	.dword	0
 	.dword	0
 	.dword	0

 div10:
 	;
 	; generate x << 1, store in r and x
 	; y = x << 1
 	; r = x << 1
 	;
 	lda	X0		; byte 0
 	asl	A
 	sta	R0
 	sta	Y0
 	lda	X1		; byte 1
 	rol	A
 	sta	R1
 	sta	Y1
 	lda	#0		; clear byte 3 of r for later
 	sta	R3
 	adc	#0		; process carry bit into byte 2
 	sta	R2
 	sta	Y2

 	;
 	; add x to y -> y = (x << 1) + x
 	; and then shift 3 times -> y = (x << 4) + (x << 3)
 	; nb: y has 20 bits of data
 	;
 	clc
 	add	X0, Y0
 	add	X1, Y1
 	add	#0, Y2
 	shift_y	3

 	;
 	; r = r + y -> r = y + (x << 1)
 	; -> R[2:0] = R[2:0] + Y[2:0] (nb: no carry possible into the MSB)
 	;
 	clc
 	add	Y0, R0
 	add	Y1, R1
 	add	Y2, R2

 	;
 	;      r = r + (y << 8)
 	; R[3:1] = R[3:1] + Y[2:0]
 	;
 	; -> r = (y << 8) + y + (x << 1)
 	;
 	clc
 	add	Y0, R1
 	add	Y1, R2
 	add	Y2, R3

 	;
 	; y = y << 4
 	; nb: this puts the top bit of y into the carry flag but there's also
 	; the possibility of a 1 remaining in bit 7, so we can't ignore the LSB
 	;
 	shift_y	4

 	;
 	; r = r + (y << 4)
 	;
 	add	#0, R3	; process bit 24 from the carry
 	clc		; and then the rest of the addition
 	add	Y0, R0
 	add	Y1, R1
 	add	Y2, R2
 	add	#0, R3

 	rts
	; input value location
	.exportzp X0 := $10
	.exportzp X1 := X0 + 1

	; final result appears in [R3:R2]
	.exportzp R0 := X1 + 1
	.exportzp R1 := R0 + 1
	.exportzp R2 := R0 + 2
	.exportzp R3 := R0 + 3

	; scratch memory
	.exportzp Y0 := R3 + 1
	.exportzp Y1 := Y0 + 1
	.exportzp Y2 := Y0 + 2

	;
	; algorithm is to multiply by 6554 and divide by 65536 (2^16)
	; 6554 = 0x199A
	;
	; r = (x << 12) + (x << 11) + (x << 8) + (x << 7) + (x << 4) + (x << 3) + (x << 1)
	;
	; but noting that there are some consecutive pairs of bits, generate
	; y = (x << 4) + (x << 3)
	; and then use:
	; r = (y << 8) + (y << 4) + y + (x << 1)
	;

	.macro shift_y _n
	.repeat _n
	asl Y0
	rol Y1
	rol Y2
	.endrep
	.endmacro

	.macro add _x, _y
	lda _x
	adc _y
	sta _y
	.endmacro

	.org $0
	jsr div10
	brk
	.dword 0
	.dword 0
	.dword 0
	.word 60000
	.word 0
	.dword 0
	.dword 0
	.dword 0

	div10:
	;
	; generate x << 1, store in r and x
	; y = x << 1
	; r = x << 1
	;
	lda X0 ; byte 0
	asl A
	sta R0
	sta Y0
	lda X1 ; byte 1
	rol A
	sta R1
	sta Y1
	lda #0 ; clear byte 3 of r for later
	sta R3
	adc #0 ; process carry bit into byte 2
	sta R2
	sta Y2

	;
	; add x to y -> y = (x << 1) + x
	; and then shift 3 times -> y = (x << 4) + (x << 3)
	; nb: y has 20 bits of data
	;
	clc
	add X0, Y0
	add X1, Y1
	add #0, Y2
	shift_y 3

	;
	; r = r + y -> r = y + (x << 1)
	; -> R[2:0] = R[2:0] + Y[2:0] (nb: no carry possible into the MSB)
	;
	clc
	add Y0, R0
	add Y1, R1
	add Y2, R2

	;
	; r = r + (y << 8)
	; R[3:1] = R[3:1] + Y[2:0]
	;
	; -> r = (y << 8) + y + (x << 1)
	;
	clc
	add Y0, R1
	add Y1, R2
	add Y2, R3

	;
	; y = y << 4
	; nb: this puts the top bit of y into the carry flag but there's also
	; the possibility of a 1 remaining in bit 7, so we can't ignore the LSB
	;
	shift_y 4

	;
	; r = r + (y << 4)
	;
	add #0, R3 ; process bit 24 from the carry
	clc ; and then the rest of the addition
	add Y0, R0
	add Y1, R1
	add Y2, R2
	add #0, R3

	rts
No results found