diff --git a/DESCRIPTION b/DESCRIPTION index 5e4ad71..567c9a9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: sentimentBR Title: Tools for Sentiment Analysis in English and Portuguese -Version: 0.3.0 +Version: 0.3.1 Authors@R: person("Jodavid", "Ferreira", email = "jdaf1@de.ufpe.br",role = c("aut","cre"), comment = c(ORCID = "0000-0002-2131-6464")) Description: based on the sentiment package (https://cran.r-project.org/web/packages/sentiment/index.html) diff --git a/R/classify_emotion.R b/R/classify_emotion.R index e1a9907..78ee653 100644 --- a/R/classify_emotion.R +++ b/R/classify_emotion.R @@ -16,6 +16,10 @@ #' \item{joy}{The absolute log likelihood of the document expressing a joyous sentiment.} #' \item{sadness}{The absolute log likelihood of the document expressing a sad sentiment.} #' \item{surprise}{The absolute log likelihood of the document expressing a surprised sentiment.} +#' \item{trust}{The absolute log likelihood of the document expressing a trust sentiment.} +#' \item{negative}{The absolute log likelihood of the document expressing a negative sentiment.} +#' \item{positive}{The absolute log likelihood of the document expressing a positive sentiment.} +#' \item{anticipation}{The absolute log likelihood of the document expressing a anticipation sentiment.} #' \item{best_fit}{The most likely sentiment category (e.g. anger, disgust, fear, joy, sadness, surprise) for the given text.} #' #' @author Timothy P. Jurka and @@ -30,26 +34,50 @@ #' # CLASSIFY EMOTIONS #' classify_emotion(documents,algorithm="bayes",verbose=TRUE, lang = "en") #' +#' # pt-BR +#' documentos <- c("Estou muito feliz, animado e otimista.", +#' "Estou muito assustado e irritado.") +#' +#' # CLASSIFY EMOTIONS +#' classify_emotion(documentos,algorithm="bayes",verbose=TRUE, lang = "pt") +#' #' classify_emotion <- function(textColumns,algorithm="bayes",prior=1.0,verbose=FALSE,lang = "en",...) { matrix <- create_matrix(textColumns,...) if(lang == "en"){ - lexicon <- read.csv(system.file("data/emotions.csv.gz",package="sentimentBR"),header=FALSE) - counts <- list(anger=length(which(lexicon[,2]=="anger")),disgust=length(which(lexicon[,2]=="disgust")),fear=length(which(lexicon[,2]=="fear")),joy=length(which(lexicon[,2]=="joy")),sadness=length(which(lexicon[,2]=="sadness")),surprise=length(which(lexicon[,2]=="surprise")),total=nrow(lexicon)) + lexicon <- read.csv(system.file("data/emotions.csv.gz",package="sentimentBR"),header=FALSE, sep=",") + # --------- + lexicon[,1] <- rm_accent(lexicon[,1]) + # --------- + counts <- list(anger=length(which(lexicon[,2]=="anger")),disgust=length(which(lexicon[,2]=="disgust")), + fear=length(which(lexicon[,2]=="fear")),joy=length(which(lexicon[,2]=="joy")), + sadness=length(which(lexicon[,2]=="sadness")),surprise=length(which(lexicon[,2]=="surprise")), + trust=length(which(lexicon[,2]=="trust")),positive=length(which(lexicon[,2]=="positive")), + negative=length(which(lexicon[,2]=="negative")),anticipation=length(which(lexicon[,2]=="anticipation")), + total=nrow(lexicon)) }else if(lang == "pt"){ - lexicon <- read.csv(system.file("data/emotionspt.csv.gz",package="sentimentBR"),header=FALSE) - counts <- list(anger=length(which(lexicon[,2]=="anger")),disgust=length(which(lexicon[,2]=="disgust")),fear=length(which(lexicon[,2]=="fear")),joy=length(which(lexicon[,2]=="joy")),sadness=length(which(lexicon[,2]=="sadness")),surprise=length(which(lexicon[,2]=="surprise")),total=nrow(lexicon)) + lexicon <- read.csv(system.file("data/emotionspt.csv.gz",package="sentimentBR"),header=FALSE, + quote = "", sep=",", row.names = NULL) + # --------- + lexicon[,1] <- rm_accent(lexicon[,1]) + # --------- + counts <- list(anger=length(which(lexicon[,2]=="anger")),disgust=length(which(lexicon[,2]=="disgust")), + fear=length(which(lexicon[,2]=="fear")),joy=length(which(lexicon[,2]=="joy")), + sadness=length(which(lexicon[,2]=="sadness")),surprise=length(which(lexicon[,2]=="surprise")), + trust=length(which(lexicon[,2]=="trust")),positive=length(which(lexicon[,2]=="positive")), + negative=length(which(lexicon[,2]=="negative")),anticipation=length(which(lexicon[,2]=="anticipation")), + total=nrow(lexicon)) } # ---------------- - lexicon[,1] <- rm_accent(lexicon[,1]) + #lexicon[,1] <- rm_accent(lexicon[,1]) documents <- c() # ---------------- for (i in 1:nrow(matrix)) { if (verbose) print(paste("DOCUMENT",i)) - scores <- list(anger=0,disgust=0,fear=0,joy=0,sadness=0,surprise=0) + scores <- list(anger=0,disgust=0,fear=0,joy=0,sadness=0,surprise=0, trust=0, positive=0,negative=0,anticipation=0) doc <- matrix[i,] words <- findFreqTerms(doc,lowfreq=1) @@ -94,11 +122,15 @@ classify_emotion <- function(textColumns,algorithm="bayes",prior=1.0,verbose=FAL best_fit <- names(scores)[which.max(unlist(scores))] if (best_fit == "disgust" && as.numeric(unlist(scores[2]))-3.09234 < .01) best_fit <- NA - documents <- rbind(documents,c(scores$anger,scores$disgust,scores$fear,scores$joy,scores$sadness,scores$surprise,best_fit)) + documents <- rbind(documents,c(scores$anger,scores$disgust,scores$fear,scores$joy, + scores$sadness,scores$surprise,scores$trust,scores$positive, + scores$negative, scores$anticipation, + best_fit)) } if(lang == "en"){ - colnames(documents) <- c("ANGER","DISGUST","FEAR","JOY","SADNESS","SURPRISE","BEST_FIT") + colnames(documents) <- c("ANGER","DISGUST","FEAR","JOY","SADNESS","SURPRISE", + "TRUST", "POSITIVE", "NEGATIVE", "ANTICIPATION","BEST_FIT") }else if(lang == "pt"){ #------------------------- class <- function(x){ @@ -112,6 +144,10 @@ classify_emotion <- function(textColumns,algorithm="bayes",prior=1.0,verbose=FAL "joy" = "alegria", "sandness" = "triteza", "surprise" = "surpresa", + "trust" = "confiança", + "positive" = "positiva", + "negative" = "negativa", + "anticipation" = "antecipação", "NA" = NA ) } @@ -119,8 +155,9 @@ classify_emotion <- function(textColumns,algorithm="bayes",prior=1.0,verbose=FAL } #------------------------- - colnames(documents) <- c("RAIVA","DESGOSTO","MEDO","ALEGRIA","TRISTEZA","SURPRESA","BEST_FIT") - documents[,7] <- class(documents[,7]) + colnames(documents) <- c("RAIVA","DESGOSTO","MEDO","ALEGRIA","TRISTEZA","SURPRESA", + "CONFIANÇA", "POSITIVA", "NEGATIVA", "ANTECIPAÇÃO","BEST_FIT") + documents[,11] <- class(documents[,11]) } return(documents) } diff --git a/README.Rmd b/README.Rmd index b08da51..69d591a 100644 --- a/README.Rmd +++ b/README.Rmd @@ -20,6 +20,10 @@ knitr::opts_chunk$set( +Última Atualização: `r format(Sys.Date(),"%d-%m-%Y")` + + + ## Visão geral No CRAN do R, existe um pacote arquivado denominado @@ -49,7 +53,9 @@ library(sentimentBR) # Texto a ser classficado documento <- c("A alegria que se tem em pensar e aprender faz-nos pensar e aprender ainda mais.", - "Um pouco de desprezo economiza bastante ódio.") + "Um pouco de desprezo economiza bastante ódio.", + "Não crie limites para si mesmo. Você deve ir tão longe quanto sua mente permitir. O que você mais quer pode ser conquistado.", + "Pessoas vencedoras não são aquelas que não falham, são aquelas que não desistem") # Classificando Emoções classify_emotion(documento,algorithm="bayes",verbose=FALSE, lang = "pt") @@ -58,5 +64,13 @@ classify_emotion(documento,algorithm="bayes",verbose=FALSE, lang = "pt") classify_polarity(documento,algorithm="bayes",verbose=FALSE, lang = "pt") ``` + +## Atualizações: + +* **05.06.2021**: Primeiro envio com as funções iniciais. + +* **12.06.2021**: Aumento do dicionário para emoções e inclusão de quatro novas classes de classificação. + + Um post iniciando com Scraping e concluíndo com alguns gráficos para análisar os sentimentos de textos pode ser encontrado no meu blog: [https://jodavid.github.io/post/](https://jodavid.github.io/post/) diff --git a/README.md b/README.md index f699168..25ee945 100644 --- a/README.md +++ b/README.md @@ -9,6 +9,8 @@ version](https://www.r-pkg.org/badges/version/sentimentBR)](https://cran.r-proje Download](https://cranlogs.r-pkg.org/badges/grand-total/sentimentBR)](https://cran.r-project.org/package=sentimentBR) +Última Atualização: 12-06-2021 + ## Visão geral No CRAN do R, existe um pacote arquivado denominado @@ -46,24 +48,49 @@ library(sentimentBR) # Texto a ser classficado documento <- c("A alegria que se tem em pensar e aprender faz-nos pensar e aprender ainda mais.", - "Um pouco de desprezo economiza bastante ódio.") + "Um pouco de desprezo economiza bastante ódio.", + "Não crie limites para si mesmo. Você deve ir tão longe quanto sua mente permitir. O que você mais quer pode ser conquistado.", + "Pessoas vencedoras não são aquelas que não falham, são aquelas que não desistem") # Classificando Emoções classify_emotion(documento,algorithm="bayes",verbose=FALSE, lang = "pt") #> RAIVA DESGOSTO MEDO -#> [1,] "1.46871776464786" "3.09234031207392" "2.06783599555953" -#> [2,] "13.2129533435987" "3.09234031207392" "2.06783599555953" -#> ALEGRIA TRISTEZA SURPRESA BEST_FIT -#> [1,] "7.34083555412328" "1.7277074477352" "7.34083555412327" "alegria" -#> [2,] "1.02547755260094" "1.7277074477352" "2.78695866252273" "raiva" +#> [1,] "9.6244348067824" "2.61502587407376" "17.0176979015462" +#> [2,] "16.953528543029" "9.6244348067824" "17.0176979015462" +#> [3,] "16.953528543029" "2.61502587407376" "9.6244348067824" +#> [4,] "2.29534107053581" "2.61502587407376" "2.23117171201856" +#> ALEGRIA TRISTEZA SURPRESA +#> [1,] "23.7177293624799" "16.8636497805622" "16.0460570745889" +#> [2,] "9.6244348067824" "9.6244348067824" "9.6244348067824" +#> [3,] "16.6710820846312" "24.102864754342" "9.6244348067824" +#> [4,] "9.6244348067824" "2.3852198330026" "3.20281253897588" +#> CONFIANÇA POSITIVA NEGATIVA +#> [1,] "16.7400169329669" "17.3703030365747" "9.6244348067824" +#> [2,] "2.50885268059795" "1.87856657699013" "17.7333589627577" +#> [3,] "9.6244348067824" "17.3703030365747" "25.8422831187331" +#> [4,] "2.50885268059795" "1.87856657699013" "9.6244348067824" +#> ANTECIPAÇÃO BEST_FIT +#> [1,] "23.0888562197168" "alegria" +#> [2,] "9.6244348067824" "negativa" +#> [3,] "36.5532776326512" "antecipação" +#> [4,] "9.6244348067824" "alegria" # Classificando Polaridade classify_polarity(documento,algorithm="bayes",verbose=FALSE, lang = "pt") #> POS NEG POS/NEG BEST_FIT #> [1,] "9.47547003995745" "0.445453222112551" "21.2715265477714" "positive" #> [2,] "9.47547003995745" "27.5355036756473" "0.344118275502535" "negative" +#> [3,] "17.2265151579293" "8.78232285939751" "1.96149873259283" "neutral" +#> [4,] "9.47547003995745" "0.445453222112551" "21.2715265477714" "positive" ``` +## Atualizações: + +- **05.06.2021**: Primeiro envio com as funções iniciais. + +- **12.06.2021**: Aumento do dicionário para emoções e inclusão de + quatro novas classes de classificação. + Um post iniciando com Scraping e concluíndo com alguns gráficos para análisar os sentimentos de textos pode ser encontrado no meu blog: diff --git a/data/emotions.csv.gz b/data/emotions.csv.gz index 9336dbb..9ba69fa 100644 Binary files a/data/emotions.csv.gz and b/data/emotions.csv.gz differ diff --git a/data/emotionspt.csv.gz b/data/emotionspt.csv.gz index 7c9bacd..b42d0ec 100644 Binary files a/data/emotionspt.csv.gz and b/data/emotionspt.csv.gz differ diff --git a/man/classify_emotion.Rd b/man/classify_emotion.Rd index f2da898..10f63e8 100644 --- a/man/classify_emotion.Rd +++ b/man/classify_emotion.Rd @@ -34,6 +34,10 @@ Returns an object of class \code{data.frame} with seven columns and one row for \item{joy}{The absolute log likelihood of the document expressing a joyous sentiment.} \item{sadness}{The absolute log likelihood of the document expressing a sad sentiment.} \item{surprise}{The absolute log likelihood of the document expressing a surprised sentiment.} + \item{trust}{The absolute log likelihood of the document expressing a trust sentiment.} + \item{negative}{The absolute log likelihood of the document expressing a negative sentiment.} + \item{positive}{The absolute log likelihood of the document expressing a positive sentiment.} + \item{anticipation}{The absolute log likelihood of the document expressing a anticipation sentiment.} \item{best_fit}{The most likely sentiment category (e.g. anger, disgust, fear, joy, sadness, surprise) for the given text.} } \description{ @@ -47,6 +51,13 @@ documents <- c("I am very happy, excited, and optimistic.", # CLASSIFY EMOTIONS classify_emotion(documents,algorithm="bayes",verbose=TRUE, lang = "en") +# pt-BR +documentos <- c("Estou muito feliz, animado e otimista.", + "Estou muito assustado e irritado.") + +# CLASSIFY EMOTIONS +classify_emotion(documentos,algorithm="bayes",verbose=TRUE, lang = "pt") + } \author{