Skip to content

Instantly share code, notes, and snippets.

@andressbarajas
Created October 30, 2023 00:40
Show Gist options
  • Save andressbarajas/b044066121ea5c8812612149a9d018e1 to your computer and use it in GitHub Desktop.
Save andressbarajas/b044066121ea5c8812612149a9d018e1 to your computer and use it in GitHub Desktop.
Functions to be used inside sep_data
! r4 buffer (4-byte aligned at least)
! r5 left channel (32-byte aligned)
! r6 right channel (32-byte aligned)
! r7 len (Must be multiple of 32)
!
! void snd_pcm16_split_bbh(uint32_t *data, uint32_t *left, uint32_t *right, uint32_t size);
!
_snd_pcm16_split_bbh:
mov #-5,r1
shad r1,r7
tst r7,r7
bt .early_exit
.loop:
! pref
mov r4, r0
add #32, r0
pref @r0
! Load #1
mov.l @r4+, r2
mov.l @r4+, r3
! Split
swap.w r2, r0
mov r3, r1
xtrct r0, r1
swap.w r3, r0
xtrct r2, r0
! MOVCAL Store
movca.l r0, @r5
add #4, r5
mov r1, r0
movca.l r0, @r6
add #4, r6
! Load #2
mov.l @r4+, r2
mov.l @r4+, r3
! Split
swap.w r2, r0
mov r3, r1
xtrct r0, r1
swap.w r3, r0
xtrct r2, r0
! Store
mov.l r0, @r5
add #4, r5
mov.l r1, @r6
add #4, r6
! Load #3
mov.l @r4+, r2
mov.l @r4+, r3
! Split
swap.w r2, r0
mov r3, r1
xtrct r0, r1
swap.w r3, r0
xtrct r2, r0
! Store
mov.l r0, @r5
add #4, r5
mov.l r1, @r6
add #4, r6
! Load #4
mov.l @r4+, r2
mov.l @r4+, r3
! Split
swap.w r2, r0
mov r3, r1
xtrct r0, r1
swap.w r3, r0
xtrct r2, r0
! Store
mov.l r0, @r5
add #4, r5
mov.l r1, @r6
add #4, r6
! Load #5
mov.l @r4+, r2
mov.l @r4+, r3
! Split
swap.w r2, r0
mov r3, r1
xtrct r0, r1
swap.w r3, r0
xtrct r2, r0
! Store
mov.l r0, @r5
add #4, r5
mov.l r1, @r6
add #4, r6
! Load #6
mov.l @r4+, r2
mov.l @r4+, r3
! Split
swap.w r2, r0
mov r3, r1
xtrct r0, r1
swap.w r3, r0
xtrct r2, r0
! Store
mov.l r0, @r5
add #4, r5
mov.l r1, @r6
add #4, r6
! Load #7
mov.l @r4+, r2
mov.l @r4+, r3
! Split
swap.w r2, r0
mov r3, r1
xtrct r0, r1
swap.w r3, r0
xtrct r2, r0
! Store
mov.l r0, @r5
add #4, r5
mov.l r1, @r6
add #4, r6
! Load #8
mov.l @r4+, r2
mov.l @r4+, r3
! Split
swap.w r2, r0
mov r3, r1
xtrct r0, r1
swap.w r3, r0
xtrct r2, r0
! Store
mov.l r0, @r5
add #4, r5
mov.l r1, @r6
add #4, r6
dt r7
bf .loop
rts
nop
.early_exit:
rts
nop
! r4 buffer (4-byte aligned at least)
! r5 left channel (32-byte aligned)
! r6 right channel (32-byte aligned)
! r7 len (Must be multiple of 32)
!
! void snd_pcm16_split_bbh2(uint32_t *data, uint32_t *left, uint32_t *right, uint32_t size);
!
_snd_pcm16_split_bbh2:
mov #-5,r1
mov r7,r0
shad r1,r0
tst r0,r0
bt .exit_early
mov.l r8,@-r15
mov.l r9,@-r15
mov r0,r9
.outer_loop:
! pref
mov r4, r0
add #32, r0
pref @r0
! Load
mov.l @r4+, r1
mov.l @r4+, r2
! Split
swap.w r1, r3
mov r2, r7
xtrct r3, r7
swap.w r2, r3
xtrct r1, r3
! MOVCAL Store
mov r3, r0
movca.l r0, @r5
add #4, r5
mov r7, r0
movca.l r0, @r6
add #4, r6
mov #7,r1 ! i = 7
.inner_loop:
! Load
mov.l @r4+, r2
mov.l @r4+, r3
! Split
swap.w r2, r7
mov r3, r8
xtrct r7, r8
swap.w r3, r7
xtrct r2, r7
! Store
mov.l r7, @r5
add #4, r5
mov.l r8, @r6
add #4, r6
dt r1 ! for(i=7;i>0;i--)
bf .inner_loop
dt r9 ! while(len--)
bf .outer_loop
mov.l @r15+,r9
rts
mov.l @r15+,r8
.exit_early:
rts
nop
@andressbarajas
Copy link
Author

snd_pcm16_split_bbh trades size for speed. Example of usage:

static void sep_data(void *buffer, int len, int stereo) {
    if(stereo) {
        snd_pcm16_split_bbh(buffer, sep_buffer[0], sep_buffer[1], len);
        //snd_pcm16_split_bbh2(buffer, sep_buffer[0], sep_buffer[1], len);
    }
    else {
        memcpy(sep_buffer[0], buffer, len);
        sep_buffer[1] = sep_buffer[0];
    }
}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment