Skip to content

Instantly share code, notes, and snippets.

@Terminus-IMRC
Last active January 21, 2018 15:40
Show Gist options
  • Save Terminus-IMRC/a4f187149f1d51208c0eb2b6f3978d26 to your computer and use it in GitHub Desktop.
Save Terminus-IMRC/a4f187149f1d51208c0eb2b6f3978d26 to your computer and use it in GitHub Desktop.
QPU benchmarks descriptions

QPU benchmarks descriptions

Inst

QI3

rep NNOPS {
	nop
}
alu.pe
mov host_int, 1
nop

QI4

Aborted.

mov host_int, 1
alu.pe
nop
nop
nop

QI5

rep N {
	sem15 inc
}
rep {
	sem15 inc
	sem15 dec
}

QI6

rep {
	mutex_acquire
	mutex_release
}

QI7

rep {
	uniforms_address <- addr
}

QI8

http://imrc.noip.me/blog/vc4/QI8/

QI9

Reading qpu_number.

QI10

n Thread 0 Thread 1
1 nop nop
2 sem0 inc nop
3 nop sem0 dec
4 nop nop
5 nop sem1 inc
6 sem1 dec nop
7 nop nop

QI11

n Thread 0 Thread 1
1 nop nop
2 sem0 dec nop
3 nop sem0 inc
4 nop nop
5 nop sem1 dec
6 sem1 inc nop
7 nop nop

QI12

n Thread 0 Thread 1
1 nop nop
2 nop sem0 dec
3 sem0 inc nop
4 nop nop

QI13

rep {
	sem0 inc
}

QI14

rep {
	sem0 dec
}

QI15

n Thread 0 Thread 1
1 sem0 inc sem0 dec
2 nop X
3 nop (bra) nop
4 nop nop (bra)
5 sem1 dec sem0 inc
6 X nop
7 nop nop

X: Expected stall caused by semaphore inst.

QI16

n Thread 0 Thread 1
1 sem0 dec nop
2 nop sem0 inc
3 nop nop
4 nop nop
5 sem1 inc sem1 dec
6 nop (bra) nop (bra)
7 nop nop
8 nop nop
9 nop nop

QI17

n Thread 0 Thread 1
1 sem0 dec nop
2 X sem0 inc
3 X nop
4 nop nop
5 nop sem1 dec
6 sem1 inc X
7 nop X
8 nop (bra) nop (bra)
9 nop nop
10 nop nop
11 nop nop

QI18

n Thread 0 Thread 1 Thread 2
1 sem0 inc nop nop
2 nop sem0 dec nop
3 nop sem1 inc nop
4 nop nop sem1 dec
5 nop nop sem2 inc
6 sem2 dec nop nop
7 nop (bra) nop (bra) nop (bra)
8 nop nop nop
9 nop nop nop
10 nop nop nop

QI19

n Thread 0 Thread 1 Thread 2 Thread 3 Thread 4
1 sem0 inc nop nop nop nop
2 nop sem0 dec nop nop nop
3 nop sem1 inc nop nop nop
4 nop nop sem1 dec nop nop
5 nop nop sem2 inc nop nop
6 nop nop nop sem2 dec nop
7 nop nop nop sem3 inc nop
8 nop nop nop nop sem3 dec
9 nop nop nop nop sem4 inc
10 sem4 dec nop nop nop nop
11 nop (bra) nop (bra) nop (bra) nop (bra) nop (bra)
12 nop nop nop nop nop
13 nop nop nop nop nop
14 nop nop nop nop nop

QI20

n Thread 0 Thread 1 Thread 2 Thread 3 Thread 4
1 sem0 inc nop nop nop nop
2 nop sem0 dec nop nop nop
3 nop sem1 inc nop nop nop
4 nop nop sem1 dec nop nop
5 nop nop sem2 inc nop nop
6 nop nop nop sem2 dec nop
7 nop nop nop sem3 inc nop
8 nop nop nop nop sem3 dec
9 nop nop nop nop sem4 inc
10 sem4 dec nop nop nop nop
11 sem5 inc sem5 inc sem5 inc sem5 inc sem5 inc
12 sem5 dec sem5 dec sem5 dec sem5 dec sem5 dec
13 nop (bra) nop (bra) nop (bra) nop (bra) nop (bra)
14 nop nop nop nop nop
15 nop nop nop nop nop
16 nop nop nop nop nop

QI21

n Thread 0 Thread 1
1 nop nop
2 sem0 dec nop
3 nop sem0 inc
4 sem2 inc sem2 inc
5 nop nop
6 sem2 dec sem1 dec
7 sem1 inc nop
8 nop sem2 dec

QI22-31

sem

TMU

  • QT8
rep {
	tmu0 with stride=4
	delays (implemented with delays of alus)
	sig tmu0
}
  • QT9
rep {
	tmu0 with stride=4
	delays (implemented with branch)
	sig tmu0
}
  • QT10
rep {
	tmu0 with +4*element_number with stride=4*16
	delays (implemented with branch)
	sig tmu0
}
  • QT11 -- The same as QT10. Aborted.

  • QT12

tmu0 <- addr + ELEM_STEP * element_number
delays
sig tmu0
  • QT13
tmu0 with +ftoi(unif*itof(element_number))
delays
sig tmu0
  • QT14 -- extends QT10.
rep {
	tmu0 with stride=128
	delays
	sig tmu0
}
  • QT15 -- extends QT10.
rep {
	tmu0 with stride=64
	tmu0 with stride=64
	delays
	sig tmu0
	sig tmu0
}
  • QT16 -- extends QT15
rep {
	tmu0 with stride=64
	tmu1 with stride=64
	delays
	sig tmu0
	sig tmu1
}
  • QT17 -- extends QT15
rep {
	tmu0 with stride=64
	tmu0 with stride=64
	delays
	sig tmu0
	20 nops
	sig tmu0
}
  • QT18 -- extends QT17
addr += e * 4
rep {
	tmu0 = addr
	tmu0 = addr + 64
	tmu0 = addr + 128
	delays
	sig tmu0
	20 nops
	sig tmu0
	20 nops
	sig tmu0
	addr += 172
}
  • QT19 -- extends QT18
addr += e * 4
rep {
	tmu0 = addr
	tmu0 = addr
	tmu0 = addr
	delays
	sig tmu0
	20 nops
	sig tmu0
	20 nops
	sig tmu0
	addr += 64
  • QT20 -- extends QT19
addr += e * 4
rep {
	tmu0 = addr
	tmu0 = addr
	tmu0 = addr
	tmu0 = addr
	delays
	sig tmu0
	20 nops
	sig tmu0
	20 nops
	sig tmu0
	20 nops
	sig tmu0
	addr += 64
  • QT21 -- extends QT15
rep {
	tmu0 with stride=64
	tmu0 with stride=64
	tmu0 with stride=64
	delays
	sig tmu0
	sig tmu0
	sig tmu0
}
  • QT22 -- extends QT15
rep {
	tmu0 with stride=64
	tmu0 with stride=64
	tmu0 with stride=64
	tmu0 with stride=64
	delays
	sig tmu0
	sig tmu0
	sig tmu0
	sig tmu0
}
@Terminus-IMRC
Copy link
Author

Todo: Checklist

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment