Skip to content

Instantly share code, notes, and snippets.

@wolfmanjm
Created June 19, 2025 18:02
Show Gist options
  • Select an option

  • Save wolfmanjm/b64fb1e3c05b8a0c9261a3ec25dce71b to your computer and use it in GitHub Desktop.

Select an option

Save wolfmanjm/b64fb1e3c05b8a0c9261a3ec25dce71b to your computer and use it in GitHub Desktop.
multicore.s
# -----------------------------------------------------------------------------
Definition Flag_visible, "execute-coprocessor"
execute_coprocessor: # ( xt -- ) Entry point for the coprocessor trampoline
# -----------------------------------------------------------------------------
# Store the execution token into a global variable
li x14, trampolineaddr # Note we are using li here as we forcefully circumvent the linker facilities
sw x8, 0(x14)
drop
# Launcher code running on core 0 here... Save x3 to x13 before use
# This launches coprocessor_trampoline.
j launch_core1
# -----------------------------------------------------------------------------
Definition Flag_visible, "stop-coprocessor"
stop_coprocessor: # ( -- )
# -----------------------------------------------------------------------------
# Stop code running on core 0 here... Save x3 to x13 before use
j stop_core1
# -----------------------------------------------------------------------------
Definition Flag_visible, "trampoline-xt"
# ( -- addr ) # Make the global variable visible for Forth. Just for testing, will be removed later
# -----------------------------------------------------------------------------
pushdatos
li x8, trampolineaddr
ret
# -----------------------------------------------------------------------------
coprocessor_trampoline: # Runs on the second core now
# -----------------------------------------------------------------------------
# No need to save registers, as we have no valid Forth context on core 1 yet.
# Core 1 initialisations here if necessary...
# Start cycle counter
csrrwi zero, 0x320, 4 # MCOUNTINHIBIT: Keep minstret(h) stopped, but run mcycle(h).
# Initialise the registers for the Forth definition to run:
li x2, returnstackcore1begin # Initialise return stack
li x9, datastackcore1begin # Initialise data stack
li x8, 42 # TOS is initially set to 42 as a "stack canary", but not stictly necessary
li x14, trampolineaddr # Address of global variable (no linker here, use li)
lw x14, 0(x14) # Fetch entry point from global variable
jalr x1, x14, 0 # Execute it
trampoline_trap: # In case the Forth definition returns, catch execution here.
j trampoline_trap
# -----------------------------------------------------------------------------
# x0 zero Hard-wired zero —
# x1 ra Return address Caller
# x2 sp return stack Stack pointer Callee
# x3 gp loop index
# x4 tp loop limit
# x5–7 t0–2 Scratch register, needs to be saved.
# x8 s0/fp TOS
# x9 s1 PSP Set data stack pointer
# x10–13 a0–3 Scratch register, needs to be saved.
# x14–17 a4–7 Free scratch register, not saved across calls.
# x18–27 s2–11 Free scratch register, not saved across calls.
# x28–31 t3–6 Free scratch register, not saved across calls.
# save these in my calls
# x3=gp, x4=tp, x5=t0, x6=t1, x7=t2, x8=s0, x9=s1, x10=a0, x11=a1, x12=a2, x13=a3
# -----------------------------------------------------------------------------
.macro pushregs
addi sp, sp, -48
sw ra, 0(sp)
sw x3, 4(sp)
sw x4, 8(sp)
sw x5, 12(sp)
sw x6, 16(sp)
sw x7, 20(sp)
sw x8, 24(sp)
sw x9, 28(sp)
sw x10, 32(sp)
sw x11, 36(sp)
sw x12, 40(sp)
sw x13, 44(sp)
.endm
.macro popregs
lw ra, 0(sp)
lw x3, 4(sp)
lw x4, 8(sp)
lw x5, 12(sp)
lw x6, 16(sp)
lw x7, 20(sp)
lw x8, 24(sp)
lw x9, 28(sp)
lw x10, 32(sp)
lw x11, 36(sp)
lw x12, 40(sp)
lw x13, 44(sp)
addi sp, sp, 48
.endm
# this is data handed off to bootrom to start core1
.p2align 4
core1_sp:
.dcb.b 256
core1_sp_end:
.word 0
cmd_sequence:
.word 0
.word 0
.word 1
.word __VECTOR_TABLE
.word core1_sp_end
core1_entry:
.word 0 # entry
cmd_sequence_end:
.word 0
# bootrom seems to need a vector table for core1 (Maybe not, no way to tell)
.p2align 6
__VECTOR_TABLE:
# Hardware vector table for standard RISC-V interrupts, indicated by `mtvec`.
.option push
.option norvc
.option norelax
j isr_riscv_machine_exception
.word 0
.word 0
j isr_riscv_machine_soft_irq
.word 0
.word 0
.word 0
j isr_riscv_machine_timer
.word 0
.word 0
.word 0
j isr_riscv_machine_external_irq
.option pop
isr_riscv_machine_exception: j isr_riscv_machine_exception
isr_riscv_machine_soft_irq: j isr_riscv_machine_soft_irq
isr_riscv_machine_timer: j isr_riscv_machine_timer
isr_riscv_machine_external_irq: j isr_riscv_machine_external_irq
# Register definitions used below
.equ SIO_BASE, 0xd0000000
.equ _SIO_FIFO_ST, 0x050
.equ _SIO_FIFO_WR, 0x054
.equ _SIO_FIFO_RD, 0x058
.equ SIO_FIFO_ST_VLD_BITS, 0x00000001
.equ SIO_FIFO_ST_RDY_BITS, 0x00000002
.equ PSM_BASE, 0x40018000
.equ _FRCE_ON, 0x00000000
.equ b_FRCE_ON_PROC1, 1<<24
.equ _FRCE_OFF, 0x00000004
.equ b_FRCE_OFF_PROC1, 1<<24
.equ o_FRCE_OFF_PROC1, 24
.equ _DONE, 0x0000000c
.equ b_DONE_PROC1, 1<<24
.equ WRITE_NORMAL, (0x0000) # Normal read write access
.equ WRITE_XOR , (0x1000) # Atomic XOR on write
.equ WRITE_SET , (0x2000) # Atomic bitmask set on write
.equ WRITE_CLR , (0x3000) # Atomic bitmask clear on write
.equ SIO_IRQ_FIFO, 25 # Select SIO's IRQ_FIFO output
.equ RVCSR_MEIEA_OFFSET, 0x00000be0
.equ RVCSR_MEIFA_OFFSET, 0x00000be2
.equ RVCSR_MIE_MEIE_BITS, 0x00000800
.equ RVCSR_MSTATUS_MIE_BITS, 0x00000008
# enable/disable (a1=1|0) the irq specified in a0
enable_irq:
# irq_set_mask_n_enabled(num / 32, 1u << (num % 32), enabled);
# hazard3_irqarray_clear(RVCSR_MEIFA_OFFSET, 2 * n, mask & 0xffffu);
# hazard3_irqarray_clear(RVCSR_MEIFA_OFFSET, 2 * n + 1, mask >> 16);
# hazard3_irqarray_set(RVCSR_MEIEA_OFFSET, 2 * n, mask & 0xffffu);
# hazard3_irqarray_set(RVCSR_MEIEA_OFFSET, 2 * n + 1, mask >> 16);
srli t0, a0, 5 # n
slli t0, t0, 1 # 2*n
andi t1, a0, 31 # mask
bset t1, zero, t1 # bitset
slli t2, t1, 16 # upper 16 bits are bit to set (mask),
or t2, t2, t0 # lower 5 bits are the window (n)
beqz a1, 1f
csrc RVCSR_MEIFA_OFFSET, t2
csrs RVCSR_MEIEA_OFFSET, t2 # enable
j 2f
1: csrc RVCSR_MEIEA_OFFSET, t2 # disable
2: srli t2, t1, 16
addi t0, t0, 1
slli t2, t2, 16 # upper 16 bits are bit to set (mask),
or t2, t2, t0 # lower 5 bits are the window (n)
beqz a1, 1f
csrc RVCSR_MEIFA_OFFSET, t2
csrs RVCSR_MEIEA_OFFSET, t2
j 2f
1: csrc RVCSR_MEIEA_OFFSET, t2
2: ret
# feeds the FIFO to get he bootrom to start up core1
launch_core1:
pushregs
# core1 will run the coprocessor_trampoline code above
# and initially use the stack above but trampoline will fix that
la t0, core1_entry
la t1, coprocessor_trampoline
sw t1, 0(t0)
# disable FIFO IRQ
1: li a0, SIO_IRQ_FIFO
li a1, 0
call enable_irq
li t3, SIO_BASE
# send sequence to core1
ta: la t0, cmd_sequence
1: lw t2, 0(t0)
bnez t2, 3f
# drain fifo
2: lw t4, _SIO_FIFO_ST(t3)
andi t4, t4, SIO_FIFO_ST_VLD_BITS
beqz t4, 3f
lw t4, _SIO_FIFO_RD(t3)
slt x0, x0, x1 # SEV h3.unblock
j 2b
# wait for room in FIFO
3: lw t4, _SIO_FIFO_ST(t3)
andi t4, t4, SIO_FIFO_ST_RDY_BITS
beqz t4, 3b
# write cmd to core1 fifo
sw t2, _SIO_FIFO_WR(t3)
slt x0, x0, x1 # SEV h3.unblock
# wait for response
4: lw t4, _SIO_FIFO_ST(t3)
andi t4, t4, SIO_FIFO_ST_VLD_BITS
bnez t4, 5f
slt x0, x0, x0 # WFE h3.block
j 4b
# read response and compare with what we sent
5: lw t4, _SIO_FIFO_RD(t3)
bne t4, t2, ta # move to next state on correct response (echo-d value) otherwise start over
addi t0, t0, 4 # seq+=4
la t4, cmd_sequence_end
bne t0, t4, 1b
# we are done and core1 should be running now
popregs
ret
# this will force core1 to stop, and reset
stop_core1:
pushregs
li t0, PSM_BASE|WRITE_SET
li t1, b_FRCE_OFF_PROC1
sw t1, _FRCE_OFF(t0)
li t0, PSM_BASE
1: lw t1, _FRCE_OFF(t0)
bexti t1, t1, o_FRCE_OFF_PROC1
beqz t1, 1b
# disable FIFO IRQ
li a0, SIO_IRQ_FIFO
li a1, 0
call enable_irq
li t0, PSM_BASE|WRITE_CLR
li t1, b_FRCE_OFF_PROC1
sw t1, _FRCE_OFF(t0)
# wait for response
li t0, SIO_BASE
2: lw t1, _SIO_FIFO_ST(t0)
andi t1, t1, SIO_FIFO_ST_VLD_BITS
bnez t1, 3f
slt x0, x0, x0 # WFE h3.block
j 2b
# read response and check it is zero
3: lw t1, _SIO_FIFO_RD(t0)
beqz t1, 4f
# should have read zero here
nop
4:
popregs
ret
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment