dedan · October 19, 2011 11:44
diff --git a/eng.out b/eng.out
 1	My	my	my	PRP$	PRP$	_	_	0	0	NMOD	NMOD	_	_

 1	brother	brother	brother	NN	NN	_	_	0	0	ROOT	ROOT	_	_

 1	has	have	have	VBZ	VBZ	_	_	0	0	ROOT	ROOT	_	_

 1	a	a	a	DT	DT	_	_	0	0	ROOT	ROOT	_	_

 1	dog	dog	dog	NN	NN	_	_	0	0	ROOT	ROOT	_	_
diff --git a/one-word-per-line.txt b/one-word-per-line.txt
 1	My	_	_	_	_	_	_	_	_	_	_	_	_	_

 1	brother	_	_	_	_	_	_	_	_	_	_	_	_	_

 1	has	_	_	_	_	_	_	_	_	_	_	_	_	_

 1	a	_	_	_	_	_	_	_	_	_	_	_	_	_

 1	dog	_	_	_	_	_	_	_	_	_	_	_	_	_
diff --git a/parse_full.sh b/parse_full.sh
 #!/bin/sh


 ##################################################
 ## (1) The following needs to be set appropriately
 ##################################################
 ##TOKENIZER_MODEL="models/eng/EnglishTok.bin.gz" #This is not used here anyway. The input is assumed to be segmented/tokenized already. 
 #MORPH_MODEL="models/ger/morph-ger.model" #Morphological tagger is not applicable to English. Fix the path and uncomment if you are running german.

 INPUT="one-word-per-line.txt"
 LANG="eng"
 LEMMATIZER_MODEL="models/eng/lemma-eng.model"
 POS_MODEL="models/eng/tag-eng.model"
 PARSER_MODEL="models/eng/prs-eng.model"
 SRL_MODEL="models/eng/srl-eng.model"
 OUTPUT="$LANG.out"

 ##################################################
 ## (2) These ones may need to be changed
 ##################################################
 JAVA="java" #Edit this i you want to use a specific JRE.
 MEM="4g" #Memory for the JVM, might need to be increased for large corpora.
 CP="srl.jar:lib/anna.jar:lib/liblinear-1.51-with-deps.jar:lib/opennlp-tools-1.4.3.jar:lib/maxent-2.5.2.jar:lib/trove.jar:lib/seg.jar"
 JVM_ARGS="-cp $CP -Xmx$MEM"


 CMD="$JAVA $JVM_ARGS se.lth.cs.srl.CompletePipeline $LANG $NOPI $RERANKER -tagger $POS_MODEL -parser $PARSER_MODEL -srl $SRL_MODEL -test $INPUT -out $OUTPUT"

 if [ "$TOKENIZER_MODEL" != "" ]; then
  CMD="$CMD -token $TOKENIZER_MODEL"
 fi

 if [ "$LEMMATIZER_MODEL" != "" ]; then
  CMD="$CMD -lemma $LEMMATIZER_MODEL"
 fi

 if [ "$MORPH_MODEL" != "" ]; then
  CMD="$CMD -morph $MORPH_MODEL"
 fi

 echo "Executing: $CMD"

 $CMD
	1 My my my PRP$ PRP$ _ _ 0 0 NMOD NMOD _ _

	1 brother brother brother NN NN _ _ 0 0 ROOT ROOT _ _

	1 has have have VBZ VBZ _ _ 0 0 ROOT ROOT _ _

	1 a a a DT DT _ _ 0 0 ROOT ROOT _ _

	1 dog dog dog NN NN _ _ 0 0 ROOT ROOT _ _
	1 My _ _ _ _ _ _ _ _ _ _ _ _ _

	1 brother _ _ _ _ _ _ _ _ _ _ _ _ _

	1 has _ _ _ _ _ _ _ _ _ _ _ _ _

	1 a _ _ _ _ _ _ _ _ _ _ _ _ _

	1 dog _ _ _ _ _ _ _ _ _ _ _ _ _
	#!/bin/sh


	##################################################
	## (1) The following needs to be set appropriately
	##################################################
	##TOKENIZER_MODEL="models/eng/EnglishTok.bin.gz" #This is not used here anyway. The input is assumed to be segmented/tokenized already.
	#MORPH_MODEL="models/ger/morph-ger.model" #Morphological tagger is not applicable to English. Fix the path and uncomment if you are running german.

	INPUT="one-word-per-line.txt"
	LANG="eng"
	LEMMATIZER_MODEL="models/eng/lemma-eng.model"
	POS_MODEL="models/eng/tag-eng.model"
	PARSER_MODEL="models/eng/prs-eng.model"
	SRL_MODEL="models/eng/srl-eng.model"
	OUTPUT="$LANG.out"

	##################################################
	## (2) These ones may need to be changed
	##################################################
	JAVA="java" #Edit this i you want to use a specific JRE.
	MEM="4g" #Memory for the JVM, might need to be increased for large corpora.
	CP="srl.jar:lib/anna.jar:lib/liblinear-1.51-with-deps.jar:lib/opennlp-tools-1.4.3.jar:lib/maxent-2.5.2.jar:lib/trove.jar:lib/seg.jar"
	JVM_ARGS="-cp $CP -Xmx$MEM"


	CMD="$JAVA $JVM_ARGS se.lth.cs.srl.CompletePipeline $LANG $NOPI $RERANKER -tagger $POS_MODEL -parser $PARSER_MODEL -srl $SRL_MODEL -test $INPUT -out $OUTPUT"

	if [ "$TOKENIZER_MODEL" != "" ]; then
	CMD="$CMD -token $TOKENIZER_MODEL"
	fi

	if [ "$LEMMATIZER_MODEL" != "" ]; then
	CMD="$CMD -lemma $LEMMATIZER_MODEL"
	fi

	if [ "$MORPH_MODEL" != "" ]; then
	CMD="$CMD -morph $MORPH_MODEL"
	fi

	echo "Executing: $CMD"

	$CMD