Skip to content

Instantly share code, notes, and snippets.

@wolfmanjm
Created June 18, 2025 22:19
Show Gist options
  • Save wolfmanjm/ee06c5b92ec3b0b066771a522c498b20 to your computer and use it in GitHub Desktop.
Save wolfmanjm/ee06c5b92ec3b0b066771a522c498b20 to your computer and use it in GitHub Desktop.
multicore
# -----------------------------------------------------------------------------
# Initialize register file
# li x1, 0 # Return address register, holds link back and is also used to compose long calls with auipc and jalr
# laf x2, returnstackanfang # Set return stack pointer (sp)
# li x3, 0 # Loop index
# li x4, 0 # Loop limit
# li x5, 0 # Scratch register, needs to be saved.
# li x6, 0 # Scratch register, needs to be saved.
# li x7, 0 # Scratch register, needs to be saved.
# li x8, 42 # TOS
# laf x9, datenstackanfang # PSP Set data stack pointer
# li x10, 0 # Scratch register, needs to be saved.
# li x11, 0 # Scratch register, needs to be saved.
# li x12, 0 # Scratch register, needs to be saved.
# li x13, 0 # Scratch register, needs to be saved.
# li x14, 0 # Free scratch register, not saved across calls.
# li x15, 0 # Free scratch register, not saved across calls.
# -----------------------------------------------------------------------------
# x0 zero Hard-wired zero —
# x1 ra Return address Caller
# x2 sp Stack pointer Callee
# x3 gp Global pointer —
# x4 tp Thread pointer —
# x5–7 t0–2 Temporaries Caller
# x8 s0/fp Saved register/frame pointer Callee
# x9 s1 Saved register Callee
# x10–11 a0–1 Function arguments/return values Caller
# x12–17 a2–7 Function arguments Caller
# x18–27 s2–11 Saved registers Callee
# x28–31 t3–6 Temporaries Caller
# save these
# x3=gp, x4=tp, x5=t0, x6=t1, x7=t2, x8=s0, x9=s1, x10=a0, x11=a1, x12=a2, x13=a3
.macro pushregs
addi sp, sp, -48
sw ra, 0(sp)
sw x3, 4(sp)
sw x4, 8(sp)
sw x5, 12(sp)
sw x6, 16(sp)
sw x7, 20(sp)
sw x8, 24(sp)
sw x9, 28(sp)
sw x10, 32(sp)
sw x11, 36(sp)
sw x12, 40(sp)
sw x13, 44(sp)
.endm
.macro popregs
lw ra, 0(sp)
lw x3, 4(sp)
lw x4, 8(sp)
lw x5, 12(sp)
lw x6, 16(sp)
lw x7, 20(sp)
lw x8, 24(sp)
lw x9, 28(sp)
lw x10, 32(sp)
lw x11, 36(sp)
lw x12, 40(sp)
lw x13, 44(sp)
addi sp, sp, 48
.endm
# this is data
.p2align 4
core1_sp:
.dcb.b 1024
core1_sp_end:
.word 0
cmd_sequence:
.word 0
.word 0
.word 1
.word __VECTOR_TABLE
.word core1_sp_end
core1_entry:
.word 0 # entry
cmd_sequence_end:
.word 0
.p2align 6
__VECTOR_TABLE:
# Hardware vector table for standard RISC-V interrupts, indicated by `mtvec`.
.option push
.option norvc
.option norelax
j isr_riscv_machine_exception
.word 0
.word 0
j isr_riscv_machine_soft_irq
.word 0
.word 0
.word 0
j isr_riscv_machine_timer
.word 0
.word 0
.word 0
j isr_riscv_machine_external_irq
.option pop
isr_riscv_machine_exception: j isr_riscv_machine_exception
isr_riscv_machine_soft_irq: j isr_riscv_machine_soft_irq
isr_riscv_machine_timer: j isr_riscv_machine_timer
isr_riscv_machine_external_irq: j isr_riscv_machine_external_irq
.equ SIO_BASE, 0xd0000000
.equ _SIO_FIFO_ST, 0x050
.equ _SIO_FIFO_WR, 0x054
.equ _SIO_FIFO_RD, 0x058
.equ SIO_FIFO_ST_VLD_BITS, 0x00000001
.equ SIO_FIFO_ST_RDY_BITS, 0x00000002
.equ PSM_BASE, 0x40018000
.equ _FRCE_ON, 0x00000000
.equ b_FRCE_ON_PROC1, 1<<24
.equ _FRCE_OFF, 0x00000004
.equ b_FRCE_OFF_PROC1, 1<<24
.equ o_FRCE_OFF_PROC1, 24
.equ _DONE, 0x0000000c
.equ b_DONE_PROC1, 1<<24
.equ WRITE_NORMAL, (0x0000) # Normal read write access
.equ WRITE_XOR , (0x1000) # Atomic XOR on write
.equ WRITE_SET , (0x2000) # Atomic bitmask set on write
.equ WRITE_CLR , (0x3000) # Atomic bitmask clear on write
.equ RVCSR_MEIEA_OFFSET, 0x00000be0
.equ RVCSR_MEIFA_OFFSET, 0x00000be2
.equ RVCSR_MIE_MEIE_BITS, 0x00000800
.equ RVCSR_MSTATUS_MIE_BITS, 0x00000008
# enable/disable (a1=1|0) the irq specified in a0
enable_irq:
# irq_set_mask_n_enabled(num / 32, 1u << (num % 32), enabled);
# hazard3_irqarray_clear(RVCSR_MEIFA_OFFSET, 2 * n, mask & 0xffffu);
# hazard3_irqarray_clear(RVCSR_MEIFA_OFFSET, 2 * n + 1, mask >> 16);
# hazard3_irqarray_set(RVCSR_MEIEA_OFFSET, 2 * n, mask & 0xffffu);
# hazard3_irqarray_set(RVCSR_MEIEA_OFFSET, 2 * n + 1, mask >> 16);
srli t0, a0, 5 # n
slli t0, t0, 1 # 2*n
andi t1, a0, 31 # mask
bset t1, zero, t1 # bitset
slli t2, t1, 16 # upper 16 bits are bit to set (mask),
or t2, t2, t0 # lower 5 bits are the window (n)
beqz a1, 1f
csrc RVCSR_MEIFA_OFFSET, t2
csrs RVCSR_MEIEA_OFFSET, t2 # enable
j 2f
1: csrc RVCSR_MEIEA_OFFSET, t2 # disable
2: srli t2, t1, 16
addi t0, t0, 1
slli t2, t2, 16 # upper 16 bits are bit to set (mask),
or t2, t2, t0 # lower 5 bits are the window (n)
beqz a1, 1f
csrc RVCSR_MEIFA_OFFSET, t2
csrs RVCSR_MEIEA_OFFSET, t2
j 2f
1: csrc RVCSR_MEIEA_OFFSET, t2
2: ret
# a0 is address of function to run in core1
# a1 is stack pointer for core1 (unless 0 in which case it is the builtin 1024bytes one above)
launch_core1:
addi sp, sp, -4
sw ra, 0(sp)
la t0, core1_entry
sw a0, 0(t0)
beqz a1, 1f
sw a1, -4(t0)
# disable FIFO IRQ
1: li a0, SIO_IRQ_FIFO
li a1, 0
call enable_irq
li t3, SIO_BASE
# send sequence to core1
ta: la t0, cmd_sequence
1: lw t2, 0(t0)
bnez t2, 3f
# drain fifo
2: lw t4, _SIO_FIFO_ST(t3)
andi t4, t4, SIO_FIFO_ST_VLD_BITS
beqz t4, 3f
lw t4, _SIO_FIFO_RD(t3)
slt x0, x0, x1 # SEV h3.unblock
j 2b
# wait for room in FIFO
3: lw t4, _SIO_FIFO_ST(t3)
andi t4, t4, SIO_FIFO_ST_RDY_BITS
beqz t4, 3b
# write cmd to core1 fifo
sw t2, _SIO_FIFO_WR(t3)
slt x0, x0, x1 # SEV h3.unblock
# wait for response
4: lw t4, _SIO_FIFO_ST(t3)
andi t4, t4, SIO_FIFO_ST_VLD_BITS
bnez t4, 5f
slt x0, x0, x0 # WFE h3.block
j 4b
# read response and compare with what we sent
5: lw t4, _SIO_FIFO_RD(t3)
bne t4, t2, ta # move to next state on correct response (echo-d value) otherwise start over
addi t0, t0, 4 # seq+=4
la t4, cmd_sequence_end
bne t0, t4, 1b
lw ra, 0(sp)
addi sp, sp, 4
ret
stop_core1:
addi sp, sp, -4
sw ra, 0(sp)
li t0, PSM_BASE|WRITE_SET
li t1, b_FRCE_OFF_PROC1
sw t1, _FRCE_OFF(t0)
li t0, PSM_BASE
1: lw t1, _FRCE_OFF(t0)
bexti t1, t1, o_FRCE_OFF_PROC1
beqz t1, 1b
# disable FIFO IRQ
li a0, SIO_IRQ_FIFO
li a1, 0
call enable_irq
li t0, PSM_BASE|WRITE_CLR
li t1, b_FRCE_OFF_PROC1
sw t1, _FRCE_OFF(t0)
# wait for response
li t0, SIO_BASE
2: lw t1, _SIO_FIFO_ST(t0)
andi t1, t1, SIO_FIFO_ST_VLD_BITS
bnez t1, 3f
slt x0, x0, x0 # WFE h3.block
j 2b
# read response and check it is zero
3: lw t1, _SIO_FIFO_RD(t0)
beqz t1, 4f
# should have read zero here
nop
4:
lw ra, 0(sp)
addi sp, sp, 4
ret
# -----------------------------------------------------------------------------
Definition Flag_visible, "start-multicore"
# -----------------------------------------------------------------------------
pushregs
la a0, blink_test
li a1, 0 # use internal stack
call launch_core1 # run blink test in core1
popregs
ret
# -----------------------------------------------------------------------------
Definition Flag_visible, "stop-multicore"
# -----------------------------------------------------------------------------
pushregs
call stop_core1
popregs
ret
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment