Suppose we have a nested Python dictionary like this:
d = {
'fc1': {'kernel': (768, 3072), 'bias': (3072,)},
'fc2': {'kernel': (3072, 768), 'bias': (768,)},
'self_attn': {
'k_proj': {'kernel': (768, 768), 'bias': (768,)},
'q_proj': {'kernel': (768, 768), 'bias': (768,)},
'v_proj': {'kernel': (768, 768), 'bias': (768,)},
'out_proj': {'kernel': (768, 768), 'bias': (768,)},
},
'self_attn_layer_norm': {'scale': (768,), 'bias': (768,)},
'encoder_attn': {
'k_proj': {'kernel': (768, 768), 'bias': (768,)},
'q_proj': {'kernel': (768, 768), 'bias': (768,)},
'v_proj': {'kernel': (768, 768), 'bias': (768,)},
'out_proj': {'kernel': (768, 768), 'bias': (768,)},
},
'encoder_attn_layer_norm': {'scale': (768,), 'bias': (768,)},
'final_layer_norm': {'scale': (768,), 'bias': (768,)},
}
We can convert it to Graphviz representation by this:
def dict2dot(d, root_name):
xs = []
xs.append('digraph A {')
xs.append(f' 0 [label="{root_name}"]')
def dfs(d, parent=0):
current = parent + 1
for k, v in d.items():
if not isinstance(v, dict): # leaf
xs.append(f' {current} [label="{k}\\n{v}"]')
xs.append(f' {parent} -> {current}')
current += 1
else:
xs.append(f' {current} [label="{k}"]')
xs.append(f' {parent} -> {current}')
current = dfs(v, current)
return current
dfs(d)
xs.append('}')
return '\n'.join(xs)
print(dict2dot(d, 'params'))
The result would be like this:
digraph A {
0 [label="params"]
1 [label="fc1"]
0 -> 1
2 [label="kernel\n(768, 3072)"]
1 -> 2
3 [label="bias\n(3072,)"]
1 -> 3
4 [label="fc2"]
0 -> 4
5 [label="kernel\n(3072, 768)"]
4 -> 5
6 [label="bias\n(768,)"]
4 -> 6
7 [label="self_attn"]
0 -> 7
8 [label="k_proj"]
7 -> 8
9 [label="kernel\n(768, 768)"]
8 -> 9
10 [label="bias\n(768,)"]
8 -> 10
11 [label="q_proj"]
7 -> 11
12 [label="kernel\n(768, 768)"]
11 -> 12
13 [label="bias\n(768,)"]
11 -> 13
14 [label="v_proj"]
7 -> 14
15 [label="kernel\n(768, 768)"]
14 -> 15
16 [label="bias\n(768,)"]
14 -> 16
17 [label="out_proj"]
7 -> 17
18 [label="kernel\n(768, 768)"]
17 -> 18
19 [label="bias\n(768,)"]
17 -> 19
20 [label="self_attn_layer_norm"]
0 -> 20
21 [label="scale\n(768,)"]
20 -> 21
22 [label="bias\n(768,)"]
20 -> 22
23 [label="encoder_attn"]
0 -> 23
24 [label="k_proj"]
23 -> 24
25 [label="kernel\n(768, 768)"]
24 -> 25
26 [label="bias\n(768,)"]
24 -> 26
27 [label="q_proj"]
23 -> 27
28 [label="kernel\n(768, 768)"]
27 -> 28
29 [label="bias\n(768,)"]
27 -> 29
30 [label="v_proj"]
23 -> 30
31 [label="kernel\n(768, 768)"]
30 -> 31
32 [label="bias\n(768,)"]
30 -> 32
33 [label="out_proj"]
23 -> 33
34 [label="kernel\n(768, 768)"]
33 -> 34
35 [label="bias\n(768,)"]
33 -> 35
36 [label="encoder_attn_layer_norm"]
0 -> 36
37 [label="scale\n(768,)"]
36 -> 37
38 [label="bias\n(768,)"]
36 -> 38
39 [label="final_layer_norm"]
0 -> 39
40 [label="scale\n(768,)"]
39 -> 40
41 [label="bias\n(768,)"]
39 -> 41
}
And we can generate a PNG file:
dot -Tpng test.dot -o result.png
License: CC0 1.0