Update emotions classify

Jodavid · Jun 12, 2021 · 7d6ca94 · 7d6ca94
1 parent f8a9da2
commit 7d6ca94
Show file tree

Hide file tree

Showing 7 changed files with 107 additions and 18 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: sentimentBR
 Title: Tools for Sentiment Analysis in English and Portuguese
-Version: 0.3.0
+Version: 0.3.1
 Authors@R: 
       person("Jodavid", "Ferreira", email = "[email protected]",role = c("aut","cre"), comment = c(ORCID = "0000-0002-2131-6464"))
 Description: based on the sentiment package (https://cran.r-project.org/web/packages/sentiment/index.html)

diff --git a/R/classify_emotion.R b/R/classify_emotion.R
@@ -16,6 +16,10 @@
 #'        \item{joy}{The absolute log likelihood of the document expressing a joyous sentiment.}
 #'        \item{sadness}{The absolute log likelihood of the document expressing a sad sentiment.}
 #'        \item{surprise}{The absolute log likelihood of the document expressing a surprised sentiment.}
+#'        \item{trust}{The absolute log likelihood of the document expressing a trust sentiment.}
+#'        \item{negative}{The absolute log likelihood of the document expressing a negative sentiment.}
+#'        \item{positive}{The absolute log likelihood of the document expressing a positive sentiment.}
+#'        \item{anticipation}{The absolute log likelihood of the document expressing a anticipation sentiment.}
 #'        \item{best_fit}{The most likely sentiment category (e.g. anger, disgust, fear, joy, sadness, surprise) for the given text.}
 #'
 #' @author Timothy P. Jurka <tpjurka@@ucdavis.edu> and
@@ -30,26 +34,50 @@
 #' # CLASSIFY EMOTIONS
 #' classify_emotion(documents,algorithm="bayes",verbose=TRUE, lang = "en")
 #'
+#' # pt-BR
+#' documentos <- c("Estou muito feliz, animado e otimista.",
+#'                "Estou muito assustado e irritado.")
+#'
+#' # CLASSIFY EMOTIONS
+#' classify_emotion(documentos,algorithm="bayes",verbose=TRUE, lang = "pt")
+#'
 #'
 classify_emotion <- function(textColumns,algorithm="bayes",prior=1.0,verbose=FALSE,lang = "en",...) {
 	matrix <- create_matrix(textColumns,...)
 
 	if(lang == "en"){
-	lexicon <- read.csv(system.file("data/emotions.csv.gz",package="sentimentBR"),header=FALSE)
-	counts <- list(anger=length(which(lexicon[,2]=="anger")),disgust=length(which(lexicon[,2]=="disgust")),fear=length(which(lexicon[,2]=="fear")),joy=length(which(lexicon[,2]=="joy")),sadness=length(which(lexicon[,2]=="sadness")),surprise=length(which(lexicon[,2]=="surprise")),total=nrow(lexicon))
+	lexicon <- read.csv(system.file("data/emotions.csv.gz",package="sentimentBR"),header=FALSE, sep=",")
+	# ---------
+	lexicon[,1] <- rm_accent(lexicon[,1])
+	# ---------
+	counts <- list(anger=length(which(lexicon[,2]=="anger")),disgust=length(which(lexicon[,2]=="disgust")),
+	               fear=length(which(lexicon[,2]=="fear")),joy=length(which(lexicon[,2]=="joy")),
+	               sadness=length(which(lexicon[,2]=="sadness")),surprise=length(which(lexicon[,2]=="surprise")),
+	               trust=length(which(lexicon[,2]=="trust")),positive=length(which(lexicon[,2]=="positive")),
+	               negative=length(which(lexicon[,2]=="negative")),anticipation=length(which(lexicon[,2]=="anticipation")),
+	               total=nrow(lexicon))
 	}else if(lang == "pt"){
-	  lexicon <- read.csv(system.file("data/emotionspt.csv.gz",package="sentimentBR"),header=FALSE)
-	  counts <- list(anger=length(which(lexicon[,2]=="anger")),disgust=length(which(lexicon[,2]=="disgust")),fear=length(which(lexicon[,2]=="fear")),joy=length(which(lexicon[,2]=="joy")),sadness=length(which(lexicon[,2]=="sadness")),surprise=length(which(lexicon[,2]=="surprise")),total=nrow(lexicon))
+	  lexicon <- read.csv(system.file("data/emotionspt.csv.gz",package="sentimentBR"),header=FALSE,
+	                      quote = "", sep=",", row.names = NULL)
+	  # ---------
+	  lexicon[,1] <- rm_accent(lexicon[,1])
+	  # ---------
+	  counts <- list(anger=length(which(lexicon[,2]=="anger")),disgust=length(which(lexicon[,2]=="disgust")),
+	                 fear=length(which(lexicon[,2]=="fear")),joy=length(which(lexicon[,2]=="joy")),
+	                 sadness=length(which(lexicon[,2]=="sadness")),surprise=length(which(lexicon[,2]=="surprise")),
+	                 trust=length(which(lexicon[,2]=="trust")),positive=length(which(lexicon[,2]=="positive")),
+	                 negative=length(which(lexicon[,2]=="negative")),anticipation=length(which(lexicon[,2]=="anticipation")),
+	                 total=nrow(lexicon))
 	}
 
 	# ----------------
-	lexicon[,1] <- rm_accent(lexicon[,1])
+	#lexicon[,1] <- rm_accent(lexicon[,1])
 	documents <- c()
 	# ----------------
 
 	for (i in 1:nrow(matrix)) {
 		if (verbose) print(paste("DOCUMENT",i))
-		scores <- list(anger=0,disgust=0,fear=0,joy=0,sadness=0,surprise=0)
+		scores <- list(anger=0,disgust=0,fear=0,joy=0,sadness=0,surprise=0, trust=0, positive=0,negative=0,anticipation=0)
 		doc <- matrix[i,]
 		words <- findFreqTerms(doc,lowfreq=1)
 
@@ -94,11 +122,15 @@ classify_emotion <- function(textColumns,algorithm="bayes",prior=1.0,verbose=FAL
 
         best_fit <- names(scores)[which.max(unlist(scores))]
         if (best_fit == "disgust" && as.numeric(unlist(scores[2]))-3.09234 < .01) best_fit <- NA
-		documents <- rbind(documents,c(scores$anger,scores$disgust,scores$fear,scores$joy,scores$sadness,scores$surprise,best_fit))
+		documents <- rbind(documents,c(scores$anger,scores$disgust,scores$fear,scores$joy,
+		                               scores$sadness,scores$surprise,scores$trust,scores$positive,
+		                               scores$negative, scores$anticipation,
+		                               best_fit))
 	}
 
 	if(lang == "en"){
-	  colnames(documents) <- c("ANGER","DISGUST","FEAR","JOY","SADNESS","SURPRISE","BEST_FIT")
+	  colnames(documents) <- c("ANGER","DISGUST","FEAR","JOY","SADNESS","SURPRISE",
+	                           "TRUST", "POSITIVE", "NEGATIVE", "ANTICIPATION","BEST_FIT")
 	}else if(lang == "pt"){
 	  #-------------------------
 	  class <- function(x){
@@ -112,15 +144,20 @@ classify_emotion <- function(textColumns,algorithm="bayes",prior=1.0,verbose=FAL
 	                          "joy" = "alegria",
 	                          "sandness" = "triteza",
 	                          "surprise" = "surpresa",
+	                          "trust" = "confiança",
+	                          "positive" = "positiva",
+	                          "negative" = "negativa",
+	                          "anticipation" = "antecipação",
 	                          "NA" = NA
 	      )
 	    }
 	    return(vetor)
 
 	  }
 	  #-------------------------
-	  colnames(documents) <- c("RAIVA","DESGOSTO","MEDO","ALEGRIA","TRISTEZA","SURPRESA","BEST_FIT")
-	  documents[,7] <- class(documents[,7])
+	  colnames(documents) <- c("RAIVA","DESGOSTO","MEDO","ALEGRIA","TRISTEZA","SURPRESA",
+	                           "CONFIANÇA", "POSITIVA", "NEGATIVA", "ANTECIPAÇÃO","BEST_FIT")
+	  documents[,11] <- class(documents[,11])
   }
 	return(documents)
 }
diff --git a/README.Rmd b/README.Rmd
@@ -20,6 +20,10 @@ knitr::opts_chunk$set(
 <!-- badges: end -->
 
 
+Última Atualização: `r format(Sys.Date(),"%d-%m-%Y")`
+
+
+
 ## Visão geral
 
 No CRAN do R, existe um pacote arquivado denominado
@@ -49,7 +53,9 @@ library(sentimentBR)
 
 # Texto a ser classficado
 documento <- c("A alegria que se tem em pensar e aprender faz-nos pensar e aprender ainda mais.",
-               "Um pouco de desprezo economiza bastante ódio.")
+               "Um pouco de desprezo economiza bastante ódio.",
+               "Não crie limites para si mesmo. Você deve ir tão longe quanto sua mente permitir. O que você mais quer pode ser conquistado.",
+               "Pessoas vencedoras não são aquelas que não falham, são aquelas que não desistem")
 
 # Classificando Emoções
 classify_emotion(documento,algorithm="bayes",verbose=FALSE, lang = "pt")
@@ -58,5 +64,13 @@ classify_emotion(documento,algorithm="bayes",verbose=FALSE, lang = "pt")
 classify_polarity(documento,algorithm="bayes",verbose=FALSE, lang = "pt")
 ```
 
+
+## Atualizações:
+
+* **05.06.2021**: Primeiro envio com as funções iniciais.
+
+* **12.06.2021**: Aumento do dicionário para emoções e inclusão de quatro novas classes de classificação.
+
+
 Um post iniciando com Scraping e concluíndo com alguns gráficos para análisar os sentimentos de textos pode ser encontrado no meu blog:
 [https://jodavid.github.io/post/](https://jodavid.github.io/post/)
diff --git a/README.md b/README.md
@@ -9,6 +9,8 @@ version](https://www.r-pkg.org/badges/version/sentimentBR)](https://cran.r-proje
 Download](https://cranlogs.r-pkg.org/badges/grand-total/sentimentBR)](https://cran.r-project.org/package=sentimentBR)
 <!-- badges: end -->
 
+Última Atualização: 12-06-2021
+
 ## Visão geral
 
 No CRAN do R, existe um pacote arquivado denominado
@@ -46,24 +48,49 @@ library(sentimentBR)
 
 # Texto a ser classficado
 documento <- c("A alegria que se tem em pensar e aprender faz-nos pensar e aprender ainda mais.",
-               "Um pouco de desprezo economiza bastante ódio.")
+               "Um pouco de desprezo economiza bastante ódio.",
+               "Não crie limites para si mesmo. Você deve ir tão longe quanto sua mente permitir. O que você mais quer pode ser conquistado.",
+               "Pessoas vencedoras não são aquelas que não falham, são aquelas que não desistem")
 
 # Classificando Emoções
 classify_emotion(documento,algorithm="bayes",verbose=FALSE, lang = "pt")
 #>      RAIVA              DESGOSTO           MEDO              
-#> [1,] "1.46871776464786" "3.09234031207392" "2.06783599555953"
-#> [2,] "13.2129533435987" "3.09234031207392" "2.06783599555953"
-#>      ALEGRIA            TRISTEZA          SURPRESA           BEST_FIT 
-#> [1,] "7.34083555412328" "1.7277074477352" "7.34083555412327" "alegria"
-#> [2,] "1.02547755260094" "1.7277074477352" "2.78695866252273" "raiva"
+#> [1,] "9.6244348067824"  "2.61502587407376" "17.0176979015462"
+#> [2,] "16.953528543029"  "9.6244348067824"  "17.0176979015462"
+#> [3,] "16.953528543029"  "2.61502587407376" "9.6244348067824" 
+#> [4,] "2.29534107053581" "2.61502587407376" "2.23117171201856"
+#>      ALEGRIA            TRISTEZA           SURPRESA          
+#> [1,] "23.7177293624799" "16.8636497805622" "16.0460570745889"
+#> [2,] "9.6244348067824"  "9.6244348067824"  "9.6244348067824" 
+#> [3,] "16.6710820846312" "24.102864754342"  "9.6244348067824" 
+#> [4,] "9.6244348067824"  "2.3852198330026"  "3.20281253897588"
+#>      CONFIANÇA          POSITIVA           NEGATIVA          
+#> [1,] "16.7400169329669" "17.3703030365747" "9.6244348067824" 
+#> [2,] "2.50885268059795" "1.87856657699013" "17.7333589627577"
+#> [3,] "9.6244348067824"  "17.3703030365747" "25.8422831187331"
+#> [4,] "2.50885268059795" "1.87856657699013" "9.6244348067824" 
+#>      ANTECIPAÇÃO        BEST_FIT     
+#> [1,] "23.0888562197168" "alegria"    
+#> [2,] "9.6244348067824"  "negativa"   
+#> [3,] "36.5532776326512" "antecipação"
+#> [4,] "9.6244348067824"  "alegria"
 
 # Classificando Polaridade
 classify_polarity(documento,algorithm="bayes",verbose=FALSE, lang = "pt")
 #>      POS                NEG                 POS/NEG             BEST_FIT  
 #> [1,] "9.47547003995745" "0.445453222112551" "21.2715265477714"  "positive"
 #> [2,] "9.47547003995745" "27.5355036756473"  "0.344118275502535" "negative"
+#> [3,] "17.2265151579293" "8.78232285939751"  "1.96149873259283"  "neutral" 
+#> [4,] "9.47547003995745" "0.445453222112551" "21.2715265477714"  "positive"
 ```
 
+## Atualizações:
+
+-   **05.06.2021**: Primeiro envio com as funções iniciais.
+
+-   **12.06.2021**: Aumento do dicionário para emoções e inclusão de
+    quatro novas classes de classificação.
+
 Um post iniciando com Scraping e concluíndo com alguns gráficos para
 análisar os sentimentos de textos pode ser encontrado no meu blog:
 <https://jodavid.github.io/post/>
diff --git a/data/emotions.csv.gz b/data/emotions.csv.gz
diff --git a/data/emotionspt.csv.gz b/data/emotionspt.csv.gz
diff --git a/man/classify_emotion.Rd b/man/classify_emotion.Rd