Last active
December 22, 2016 01:03
-
-
Save njtierney/d0f2bdf37edf6746cc06eee6ee0f4171 to your computer and use it in GitHub Desktop.
Using openNLP
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
``` r | |
# running | |
## Requires package 'openNLPmodels.en' from the repository at | |
## <http://datacube.wu.ac.at>. | |
# install.packages("openNLPmodels.en", | |
# repos = "http://datacube.wu.ac.at/", | |
# type = "source") | |
require("NLP") | |
require("openNLP") | |
## Some text. | |
s <- paste(c("Pierre Vinken, 61 years old, will join the board as a ", | |
"nonexecutive director Nov. 29.\n", | |
"Mr. Vinken is chairman of Elsevier N.V., ", | |
"the Dutch publishing group."), | |
collapse = "") | |
s <- as.String(s) | |
## Need sentence and word token annotations. | |
sent_token_annotator <- Maxent_Sent_Token_Annotator() | |
word_token_annotator <- Maxent_Word_Token_Annotator() | |
a2 <- annotate(s, list(sent_token_annotator, word_token_annotator)) | |
parse_annotator <- Parse_Annotator() | |
#> Error in .jnew("opennlp.tools.parser.ParserModel", .jcast(.jnew("java.io.FileInputStream", : java.lang.OutOfMemoryError: Java heap space | |
## Compute the parse annotations only. | |
p <- parse_annotator(s, a2) | |
#> Error in eval(expr, envir, enclos): could not find function "parse_annotator" | |
## Extract the formatted parse trees. | |
ptexts <- sapply(p$features, `[[`, "parse") | |
#> Error in lapply(X = X, FUN = FUN, ...): object 'p' not found | |
ptexts | |
#> Error in eval(expr, envir, enclos): object 'ptexts' not found | |
## Read into NLP Tree objects. | |
ptrees <- lapply(ptexts, Tree_parse) | |
#> Error in lapply(ptexts, Tree_parse): object 'ptexts' not found | |
ptrees | |
#> Error in eval(expr, envir, enclos): object 'ptrees' not found | |
session_info() | |
#> Session info ------------------------------------------------------------- | |
#> setting value | |
#> version R version 3.3.2 (2016-10-31) | |
#> system x86_64, darwin13.4.0 | |
#> ui RStudio (1.0.44) | |
#> language (EN) | |
#> collate en_AU.UTF-8 | |
#> tz Australia/Brisbane | |
#> date 2016-12-22 | |
#> Packages ----------------------------------------------------------------- | |
#> package * version date source | |
#> backports 1.0.4 2016-10-24 cran (@1.0.4) | |
#> clipr 0.3.0 2016-11-19 cran (@0.3.0) | |
#> crayon 1.3.2 2016-06-28 cran (@1.3.2) | |
#> devtools * 1.12.0.9000 2016-12-18 Github (hadley/devtools@1ce84b0) | |
#> digest 0.6.10 2016-08-02 cran (@0.6.10) | |
#> evaluate 0.10 2016-10-11 cran (@0.10) | |
#> htmltools 0.3.5 2016-03-21 CRAN (R 3.3.0) | |
#> knitr 1.15.6 2016-12-18 Github (yihui/knitr@849f2d0) | |
#> magrittr 1.5 2014-11-22 CRAN (R 3.3.0) | |
#> memoise 1.0.0.9001 2016-12-18 Github (hadley/memoise@884d565) | |
#> NLP * 0.1-9 2016-02-18 CRAN (R 3.3.0) | |
#> openNLP * 0.2-6 2016-02-18 CRAN (R 3.3.0) | |
#> openNLPdata 1.5.3-2 2015-06-24 CRAN (R 3.3.0) | |
#> pkgbuild 0.0.0.9000 2016-12-18 Github (r-pkgs/pkgbuild@65eace0) | |
#> pkgload 0.0.0.9000 2016-12-18 Github (r-pkgs/pkgload@def2b10) | |
#> R6 2.2.0 2016-10-05 cran (@2.2.0) | |
#> Rcpp 0.12.8.2 2016-12-18 Github (RcppCore/Rcpp@e4ca728) | |
#> reprex 0.0.0.9001 2016-09-11 Github (jennybc/reprex@826ddf4) | |
#> rJava 0.9-8 2016-01-07 CRAN (R 3.3.0) | |
#> rmarkdown 1.2.9000 2016-12-18 Github (rstudio/rmarkdown@f94f2f7) | |
#> rprojroot 1.1 2016-10-29 cran (@1.1) | |
#> stringi 1.1.2 2016-10-01 cran (@1.1.2) | |
#> stringr 1.1.0 2016-08-19 CRAN (R 3.3.0) | |
#> testthat * 1.0.2.9000 2016-12-18 Github (hadley/testthat@49e68d1) | |
#> withr 1.0.2 2016-06-20 cran (@1.0.2) | |
``` |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment