Skip to content

Instantly share code, notes, and snippets.

@feliwir
Created January 17, 2023 14:16
Show Gist options
  • Save feliwir/34bb234c4ef0a43c857bfd6968ad2682 to your computer and use it in GitHub Desktop.
Save feliwir/34bb234c4ef0a43c857bfd6968ad2682 to your computer and use it in GitHub Desktop.
%macro dotranspose 8
; %1=(00 01 02 03 04 05 06 07 40 41 42 43 44 45 46 47)
; %2=(10 11 12 13 14 15 16 17 50 51 52 53 54 55 56 57)
; %3=(20 21 22 23 24 25 26 27 60 61 62 63 64 65 66 67)
; %4=(30 31 32 33 34 35 36 37 70 71 72 73 74 75 76 77)
vpunpcklwd %5, %1, %2
vpunpckhwd %6, %1, %2
vpunpcklwd %7, %3, %4
vpunpckhwd %8, %3, %4
; transpose coefficients(phase 1)
; %5=(00 10 01 11 02 12 03 13 40 50 41 51 42 52 43 53)
; %6=(04 14 05 15 06 16 07 17 44 54 45 55 46 56 47 57)
; %7=(20 30 21 31 22 32 23 33 60 70 61 71 62 72 63 73)
; %8=(24 34 25 35 26 36 27 37 64 74 65 75 66 76 67 77)
vpunpckldq %1, %5, %7
vpunpckhdq %2, %5, %7
vpunpckldq %3, %6, %8
vpunpckhdq %4, %6, %8
; transpose coefficients(phase 2)
; %1=(00 10 20 30 01 11 21 31 40 50 60 70 41 51 61 71)
; %2=(02 12 22 32 03 13 23 33 42 52 62 72 43 53 63 73)
; %3=(04 14 24 34 05 15 25 35 44 54 64 74 45 55 65 75)
; %4=(06 16 26 36 07 17 27 37 46 56 66 76 47 57 67 77)
vpermq %1, %1, 0x8D
vpermq %2, %2, 0x8D
vpermq %3, %3, 0xD8
vpermq %4, %4, 0xD8
; transpose coefficients(phase 3)
; %1=(01 11 21 31 41 51 61 71 00 10 20 30 40 50 60 70)
; %2=(03 13 23 33 43 53 63 73 02 12 22 32 42 52 62 72)
; %3=(04 14 24 34 44 54 64 74 05 15 25 35 45 55 65 75)
; %4=(06 16 26 36 46 56 66 76 07 17 27 37 47 57 67 77)
%endmacro
%macro dotranspose 4
vmovdqa32 %3, %1
vmovdqa32 %4, %2
vperm2i128 %1, %3, %4, 0x20
vperm2i128 %2, %3, %4, 0x31
vpermd %1, %1, [0, 2, 4, 6, 1, 3, 5, 7]
vpermd %2, %2, [0, 2, 4, 6, 1, 3, 5, 7]
%endmacro
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment