Skip to content

Commit

Permalink
Update emotions classify
Browse files Browse the repository at this point in the history
  • Loading branch information
Jodavid committed Jun 12, 2021
1 parent f8a9da2 commit 7d6ca94
Show file tree
Hide file tree
Showing 7 changed files with 107 additions and 18 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: sentimentBR
Title: Tools for Sentiment Analysis in English and Portuguese
Version: 0.3.0
Version: 0.3.1
Authors@R:
person("Jodavid", "Ferreira", email = "[email protected]",role = c("aut","cre"), comment = c(ORCID = "0000-0002-2131-6464"))
Description: based on the sentiment package (https://cran.r-project.org/web/packages/sentiment/index.html)
Expand Down
57 changes: 47 additions & 10 deletions R/classify_emotion.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@
#' \item{joy}{The absolute log likelihood of the document expressing a joyous sentiment.}
#' \item{sadness}{The absolute log likelihood of the document expressing a sad sentiment.}
#' \item{surprise}{The absolute log likelihood of the document expressing a surprised sentiment.}
#' \item{trust}{The absolute log likelihood of the document expressing a trust sentiment.}
#' \item{negative}{The absolute log likelihood of the document expressing a negative sentiment.}
#' \item{positive}{The absolute log likelihood of the document expressing a positive sentiment.}
#' \item{anticipation}{The absolute log likelihood of the document expressing a anticipation sentiment.}
#' \item{best_fit}{The most likely sentiment category (e.g. anger, disgust, fear, joy, sadness, surprise) for the given text.}
#'
#' @author Timothy P. Jurka <tpjurka@@ucdavis.edu> and
Expand All @@ -30,26 +34,50 @@
#' # CLASSIFY EMOTIONS
#' classify_emotion(documents,algorithm="bayes",verbose=TRUE, lang = "en")
#'
#' # pt-BR
#' documentos <- c("Estou muito feliz, animado e otimista.",
#' "Estou muito assustado e irritado.")
#'
#' # CLASSIFY EMOTIONS
#' classify_emotion(documentos,algorithm="bayes",verbose=TRUE, lang = "pt")
#'
#'
classify_emotion <- function(textColumns,algorithm="bayes",prior=1.0,verbose=FALSE,lang = "en",...) {
matrix <- create_matrix(textColumns,...)

if(lang == "en"){
lexicon <- read.csv(system.file("data/emotions.csv.gz",package="sentimentBR"),header=FALSE)
counts <- list(anger=length(which(lexicon[,2]=="anger")),disgust=length(which(lexicon[,2]=="disgust")),fear=length(which(lexicon[,2]=="fear")),joy=length(which(lexicon[,2]=="joy")),sadness=length(which(lexicon[,2]=="sadness")),surprise=length(which(lexicon[,2]=="surprise")),total=nrow(lexicon))
lexicon <- read.csv(system.file("data/emotions.csv.gz",package="sentimentBR"),header=FALSE, sep=",")
# ---------
lexicon[,1] <- rm_accent(lexicon[,1])
# ---------
counts <- list(anger=length(which(lexicon[,2]=="anger")),disgust=length(which(lexicon[,2]=="disgust")),
fear=length(which(lexicon[,2]=="fear")),joy=length(which(lexicon[,2]=="joy")),
sadness=length(which(lexicon[,2]=="sadness")),surprise=length(which(lexicon[,2]=="surprise")),
trust=length(which(lexicon[,2]=="trust")),positive=length(which(lexicon[,2]=="positive")),
negative=length(which(lexicon[,2]=="negative")),anticipation=length(which(lexicon[,2]=="anticipation")),
total=nrow(lexicon))
}else if(lang == "pt"){
lexicon <- read.csv(system.file("data/emotionspt.csv.gz",package="sentimentBR"),header=FALSE)
counts <- list(anger=length(which(lexicon[,2]=="anger")),disgust=length(which(lexicon[,2]=="disgust")),fear=length(which(lexicon[,2]=="fear")),joy=length(which(lexicon[,2]=="joy")),sadness=length(which(lexicon[,2]=="sadness")),surprise=length(which(lexicon[,2]=="surprise")),total=nrow(lexicon))
lexicon <- read.csv(system.file("data/emotionspt.csv.gz",package="sentimentBR"),header=FALSE,
quote = "", sep=",", row.names = NULL)
# ---------
lexicon[,1] <- rm_accent(lexicon[,1])
# ---------
counts <- list(anger=length(which(lexicon[,2]=="anger")),disgust=length(which(lexicon[,2]=="disgust")),
fear=length(which(lexicon[,2]=="fear")),joy=length(which(lexicon[,2]=="joy")),
sadness=length(which(lexicon[,2]=="sadness")),surprise=length(which(lexicon[,2]=="surprise")),
trust=length(which(lexicon[,2]=="trust")),positive=length(which(lexicon[,2]=="positive")),
negative=length(which(lexicon[,2]=="negative")),anticipation=length(which(lexicon[,2]=="anticipation")),
total=nrow(lexicon))
}

# ----------------
lexicon[,1] <- rm_accent(lexicon[,1])
#lexicon[,1] <- rm_accent(lexicon[,1])
documents <- c()
# ----------------

for (i in 1:nrow(matrix)) {
if (verbose) print(paste("DOCUMENT",i))
scores <- list(anger=0,disgust=0,fear=0,joy=0,sadness=0,surprise=0)
scores <- list(anger=0,disgust=0,fear=0,joy=0,sadness=0,surprise=0, trust=0, positive=0,negative=0,anticipation=0)
doc <- matrix[i,]
words <- findFreqTerms(doc,lowfreq=1)

Expand Down Expand Up @@ -94,11 +122,15 @@ classify_emotion <- function(textColumns,algorithm="bayes",prior=1.0,verbose=FAL

best_fit <- names(scores)[which.max(unlist(scores))]
if (best_fit == "disgust" && as.numeric(unlist(scores[2]))-3.09234 < .01) best_fit <- NA
documents <- rbind(documents,c(scores$anger,scores$disgust,scores$fear,scores$joy,scores$sadness,scores$surprise,best_fit))
documents <- rbind(documents,c(scores$anger,scores$disgust,scores$fear,scores$joy,
scores$sadness,scores$surprise,scores$trust,scores$positive,
scores$negative, scores$anticipation,
best_fit))
}

if(lang == "en"){
colnames(documents) <- c("ANGER","DISGUST","FEAR","JOY","SADNESS","SURPRISE","BEST_FIT")
colnames(documents) <- c("ANGER","DISGUST","FEAR","JOY","SADNESS","SURPRISE",
"TRUST", "POSITIVE", "NEGATIVE", "ANTICIPATION","BEST_FIT")
}else if(lang == "pt"){
#-------------------------
class <- function(x){
Expand All @@ -112,15 +144,20 @@ classify_emotion <- function(textColumns,algorithm="bayes",prior=1.0,verbose=FAL
"joy" = "alegria",
"sandness" = "triteza",
"surprise" = "surpresa",
"trust" = "confiança",
"positive" = "positiva",
"negative" = "negativa",
"anticipation" = "antecipação",
"NA" = NA
)
}
return(vetor)

}
#-------------------------
colnames(documents) <- c("RAIVA","DESGOSTO","MEDO","ALEGRIA","TRISTEZA","SURPRESA","BEST_FIT")
documents[,7] <- class(documents[,7])
colnames(documents) <- c("RAIVA","DESGOSTO","MEDO","ALEGRIA","TRISTEZA","SURPRESA",
"CONFIANÇA", "POSITIVA", "NEGATIVA", "ANTECIPAÇÃO","BEST_FIT")
documents[,11] <- class(documents[,11])
}
return(documents)
}
16 changes: 15 additions & 1 deletion README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ knitr::opts_chunk$set(
<!-- badges: end -->


Última Atualização: `r format(Sys.Date(),"%d-%m-%Y")`



## Visão geral

No CRAN do R, existe um pacote arquivado denominado
Expand Down Expand Up @@ -49,7 +53,9 @@ library(sentimentBR)
# Texto a ser classficado
documento <- c("A alegria que se tem em pensar e aprender faz-nos pensar e aprender ainda mais.",
"Um pouco de desprezo economiza bastante ódio.")
"Um pouco de desprezo economiza bastante ódio.",
"Não crie limites para si mesmo. Você deve ir tão longe quanto sua mente permitir. O que você mais quer pode ser conquistado.",
"Pessoas vencedoras não são aquelas que não falham, são aquelas que não desistem")
# Classificando Emoções
classify_emotion(documento,algorithm="bayes",verbose=FALSE, lang = "pt")
Expand All @@ -58,5 +64,13 @@ classify_emotion(documento,algorithm="bayes",verbose=FALSE, lang = "pt")
classify_polarity(documento,algorithm="bayes",verbose=FALSE, lang = "pt")
```


## Atualizações:

* **05.06.2021**: Primeiro envio com as funções iniciais.

* **12.06.2021**: Aumento do dicionário para emoções e inclusão de quatro novas classes de classificação.


Um post iniciando com Scraping e concluíndo com alguns gráficos para análisar os sentimentos de textos pode ser encontrado no meu blog:
[https://jodavid.github.io/post/](https://jodavid.github.io/post/)
39 changes: 33 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ version](https://www.r-pkg.org/badges/version/sentimentBR)](https://cran.r-proje
Download](https://cranlogs.r-pkg.org/badges/grand-total/sentimentBR)](https://cran.r-project.org/package=sentimentBR)
<!-- badges: end -->

Última Atualização: 12-06-2021

## Visão geral

No CRAN do R, existe um pacote arquivado denominado
Expand Down Expand Up @@ -46,24 +48,49 @@ library(sentimentBR)

# Texto a ser classficado
documento <- c("A alegria que se tem em pensar e aprender faz-nos pensar e aprender ainda mais.",
"Um pouco de desprezo economiza bastante ódio.")
"Um pouco de desprezo economiza bastante ódio.",
"Não crie limites para si mesmo. Você deve ir tão longe quanto sua mente permitir. O que você mais quer pode ser conquistado.",
"Pessoas vencedoras não são aquelas que não falham, são aquelas que não desistem")

# Classificando Emoções
classify_emotion(documento,algorithm="bayes",verbose=FALSE, lang = "pt")
#> RAIVA DESGOSTO MEDO
#> [1,] "1.46871776464786" "3.09234031207392" "2.06783599555953"
#> [2,] "13.2129533435987" "3.09234031207392" "2.06783599555953"
#> ALEGRIA TRISTEZA SURPRESA BEST_FIT
#> [1,] "7.34083555412328" "1.7277074477352" "7.34083555412327" "alegria"
#> [2,] "1.02547755260094" "1.7277074477352" "2.78695866252273" "raiva"
#> [1,] "9.6244348067824" "2.61502587407376" "17.0176979015462"
#> [2,] "16.953528543029" "9.6244348067824" "17.0176979015462"
#> [3,] "16.953528543029" "2.61502587407376" "9.6244348067824"
#> [4,] "2.29534107053581" "2.61502587407376" "2.23117171201856"
#> ALEGRIA TRISTEZA SURPRESA
#> [1,] "23.7177293624799" "16.8636497805622" "16.0460570745889"
#> [2,] "9.6244348067824" "9.6244348067824" "9.6244348067824"
#> [3,] "16.6710820846312" "24.102864754342" "9.6244348067824"
#> [4,] "9.6244348067824" "2.3852198330026" "3.20281253897588"
#> CONFIANÇA POSITIVA NEGATIVA
#> [1,] "16.7400169329669" "17.3703030365747" "9.6244348067824"
#> [2,] "2.50885268059795" "1.87856657699013" "17.7333589627577"
#> [3,] "9.6244348067824" "17.3703030365747" "25.8422831187331"
#> [4,] "2.50885268059795" "1.87856657699013" "9.6244348067824"
#> ANTECIPAÇÃO BEST_FIT
#> [1,] "23.0888562197168" "alegria"
#> [2,] "9.6244348067824" "negativa"
#> [3,] "36.5532776326512" "antecipação"
#> [4,] "9.6244348067824" "alegria"

# Classificando Polaridade
classify_polarity(documento,algorithm="bayes",verbose=FALSE, lang = "pt")
#> POS NEG POS/NEG BEST_FIT
#> [1,] "9.47547003995745" "0.445453222112551" "21.2715265477714" "positive"
#> [2,] "9.47547003995745" "27.5355036756473" "0.344118275502535" "negative"
#> [3,] "17.2265151579293" "8.78232285939751" "1.96149873259283" "neutral"
#> [4,] "9.47547003995745" "0.445453222112551" "21.2715265477714" "positive"
```

## Atualizações:

- **05.06.2021**: Primeiro envio com as funções iniciais.

- **12.06.2021**: Aumento do dicionário para emoções e inclusão de
quatro novas classes de classificação.

Um post iniciando com Scraping e concluíndo com alguns gráficos para
análisar os sentimentos de textos pode ser encontrado no meu blog:
<https://jodavid.github.io/post/>
Binary file modified data/emotions.csv.gz
Binary file not shown.
Binary file modified data/emotionspt.csv.gz
Binary file not shown.
11 changes: 11 additions & 0 deletions man/classify_emotion.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 7d6ca94

Please sign in to comment.