Last active
August 29, 2015 13:55
-
-
Save brendano/8760267 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
****** Number of unique edge labels per formalism ****** | |
% for f in dm pas pcedt; { echo === $f; cat $f.sdp | grep -v '^#'|cut --complement -f1-6|tr '\t' '\n'|grep -Pv "^(_|)$" | dedup|wc -l } | |
=== dm | |
51 | |
=== pas | |
42 | |
=== pcedt | |
68 | |
****** Edge label counts per formalism ****** | |
% for f in dm pas pcedt; { echo === $f; cat $f.sdp | grep -v '^#'|cut --complement -f1-6|tr '\t' '\n'|grep -Pv "^(_|)$" | count|sort -rg } | |
=== dm | |
222649 ARG1 | |
135634 ARG2 | |
64691 compound | |
61085 BV | |
13090 poss | |
11890 _and_c | |
8101 loc | |
7094 ARG3 | |
5089 times | |
5005 mwe | |
4244 appos | |
3378 conj | |
2764 neg | |
2538 subord | |
1793 _or_c | |
1165 _but_c | |
840 plus | |
832 of | |
328 measure | |
285 parenthetical | |
247 temp | |
172 ARG4 | |
160 unspec_manner | |
154 discourse | |
88 _as+well+as_c | |
76 _not_c | |
62 _rather+than_c | |
42 _and+then_c | |
36 _nor_c | |
28 _then_c | |
28 _plus_c | |
25 _but+not_c | |
22 _versus_c | |
21 _instead+of_c | |
15 _but+also_c | |
10 _yet_c | |
10 _even_c | |
10 _and+thus_c | |
9 _and+so_c | |
8 _and+not_c | |
7 _if+not_c | |
5 _and+also_c | |
4 _minus_c | |
3 _much+less_c | |
3 _and+yet_c | |
3 _after_c | |
2 _though_c | |
2 _not+to+mention_c | |
2 _formerly_c | |
1 _except_c | |
1 _except+that_c | |
=== pas | |
102162 adj_ARG1 | |
72416 noun_ARG1 | |
72219 prep_ARG2 | |
71155 prep_ARG1 | |
70869 verb_ARG2 | |
70026 verb_ARG1 | |
69291 det_ARG1 | |
39723 punct_ARG1 | |
23213 aux_ARG2 | |
23148 aux_ARG1 | |
17814 coord_ARG2 | |
17803 coord_ARG1 | |
14018 comp_ARG1 | |
9114 conj_ARG1 | |
6755 poss_ARG2 | |
6467 poss_ARG1 | |
5812 conj_ARG2 | |
5298 verb_ARG3 | |
5211 app_ARG2 | |
5211 app_ARG1 | |
4697 relative_ARG1 | |
3174 comp_MOD | |
1861 lgs_ARG2 | |
1583 verb_MOD | |
1127 adj_ARG2 | |
816 lparen_ARG3 | |
815 lparen_ARG2 | |
815 lparen_ARG1 | |
637 noun_ARG2 | |
411 it_ARG1 | |
235 prep_ARG3 | |
203 dtv_ARG2 | |
199 quote_ARG2 | |
125 comp_ARG2 | |
121 relative_ARG2 | |
121 conj_ARG3 | |
119 quote_ARG3 | |
99 adj_MOD | |
89 aux_MOD | |
21 verb_ARG4 | |
12 prep_MOD | |
10 quote_ARG1 | |
=== pcedt | |
103347 RSTR | |
76457 PAT | |
69476 ACT | |
28248 CONJ.member | |
22086 NE | |
21543 APP | |
16468 TWHEN | |
16019 LOC | |
13273 EFF | |
12018 APPS.member | |
7957 REG | |
7291 EXT | |
6843 RHEM | |
4800 MANN | |
4761 ADDR | |
4088 PREC | |
3269 AIM | |
3266 MAT | |
3143 DIFF | |
2701 DIR3 | |
2650 DESCR | |
2622 ADVS.member | |
2382 ORIG | |
2355 DISJ.member | |
2288 BEN | |
2242 DIR1 | |
2223 COMPL | |
2103 ACMP | |
2096 CAUS | |
1944 CPHR | |
1661 CPR | |
1583 PAR | |
1569 CM | |
1567 THL | |
1386 COND | |
1376 MEANS | |
1276 THO | |
1127 ID | |
1076 OPER.member | |
1012 CRIT | |
989 TPAR | |
967 ATT | |
792 TFHL | |
778 TTILL | |
750 CNCS | |
620 TSIN | |
578 DPHR | |
486 MOD | |
411 RESTR | |
384 CONTRD | |
383 RESL | |
348 SM | |
261 GRAD.member | |
254 CSQ.member | |
243 TFRWH | |
243 AUTH | |
190 SUBS | |
177 DIR2 | |
82 INTT | |
79 CONFR.member | |
78 REAS.member | |
77 TOWH | |
48 CONTRA.member | |
36 PARTL | |
18 NE.member | |
11 HER | |
9 VOCAT | |
6 INTF |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment