Created
October 19, 2011 11:44
-
-
Save dedan/1298058 to your computer and use it in GitHub Desktop.
problems with mate-tools
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
1 My my my PRP$ PRP$ _ _ 0 0 NMOD NMOD _ _ | |
1 brother brother brother NN NN _ _ 0 0 ROOT ROOT _ _ | |
1 has have have VBZ VBZ _ _ 0 0 ROOT ROOT _ _ | |
1 a a a DT DT _ _ 0 0 ROOT ROOT _ _ | |
1 dog dog dog NN NN _ _ 0 0 ROOT ROOT _ _ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
1 My _ _ _ _ _ _ _ _ _ _ _ _ _ | |
1 brother _ _ _ _ _ _ _ _ _ _ _ _ _ | |
1 has _ _ _ _ _ _ _ _ _ _ _ _ _ | |
1 a _ _ _ _ _ _ _ _ _ _ _ _ _ | |
1 dog _ _ _ _ _ _ _ _ _ _ _ _ _ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
################################################## | |
## (1) The following needs to be set appropriately | |
################################################## | |
##TOKENIZER_MODEL="models/eng/EnglishTok.bin.gz" #This is not used here anyway. The input is assumed to be segmented/tokenized already. | |
#MORPH_MODEL="models/ger/morph-ger.model" #Morphological tagger is not applicable to English. Fix the path and uncomment if you are running german. | |
INPUT="one-word-per-line.txt" | |
LANG="eng" | |
LEMMATIZER_MODEL="models/eng/lemma-eng.model" | |
POS_MODEL="models/eng/tag-eng.model" | |
PARSER_MODEL="models/eng/prs-eng.model" | |
SRL_MODEL="models/eng/srl-eng.model" | |
OUTPUT="$LANG.out" | |
################################################## | |
## (2) These ones may need to be changed | |
################################################## | |
JAVA="java" #Edit this i you want to use a specific JRE. | |
MEM="4g" #Memory for the JVM, might need to be increased for large corpora. | |
CP="srl.jar:lib/anna.jar:lib/liblinear-1.51-with-deps.jar:lib/opennlp-tools-1.4.3.jar:lib/maxent-2.5.2.jar:lib/trove.jar:lib/seg.jar" | |
JVM_ARGS="-cp $CP -Xmx$MEM" | |
CMD="$JAVA $JVM_ARGS se.lth.cs.srl.CompletePipeline $LANG $NOPI $RERANKER -tagger $POS_MODEL -parser $PARSER_MODEL -srl $SRL_MODEL -test $INPUT -out $OUTPUT" | |
if [ "$TOKENIZER_MODEL" != "" ]; then | |
CMD="$CMD -token $TOKENIZER_MODEL" | |
fi | |
if [ "$LEMMATIZER_MODEL" != "" ]; then | |
CMD="$CMD -lemma $LEMMATIZER_MODEL" | |
fi | |
if [ "$MORPH_MODEL" != "" ]; then | |
CMD="$CMD -morph $MORPH_MODEL" | |
fi | |
echo "Executing: $CMD" | |
$CMD |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment