Hi all,
In my df I would like to generate a new column which contains a string showing
all the verbs in each row of df$Message.
> library(openNLP)
> library(NLP)
> dput(df)
structure(list(DocumentID = c(478920L, 510133L, 499497L, 930234L
), Message = structure(c(4L, 2L, 3L, 1L), .Label = c("Thank you very much
for your nice feedback.\n",
"THank you, added it", "Thanks for the well explained
article.",
"The solution has been updated"), class = "factor")), class
= "data.frame", row.names = c(NA,
-4L))
tagPOS <- function(x, ...) {
s <- as.String(x)
word_token_annotator <- Maxent_Word_Token_Annotator()
a2 <- Annotation(1L, "sentence", 1L, nchar(s))
a2 <- annotate(s, word_token_annotator, a2)
a3 <- annotate(s, Maxent_POS_Tag_Annotator(), a2)
a3w <- a3[a3$type == "word"]
POStags <- unlist(lapply(a3w$features, `[[`, "POS"))
POStagged <- paste(sprintf("%s/%s", s[a3w], POStags), collapse =
" ")
list(POStagged = POStagged, POStags = POStags)
}
Any help?
Thanks in advance!
Elahe