-
-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathw33_psychometrics.Rmd
executable file
·126 lines (109 loc) · 3.66 KB
/
w33_psychometrics.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
---
title: "w33 Psychometrics"
author: "Federica Gazzelloni"
date: "8/19/2022"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
library(tidyverse)
```
```{r}
psych_stats <- read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2022/2022-08-16/psych_stats.csv")
```
```{r}
psych_stats%>%head
```
```{r}
my_df <- psych_stats%>%
arrange(rank)%>%
select(char_name,uni_name,personality,avg_rating,rank,rating_sd) %>%
filter(uni_name=="Friends")
my_df
```
Use {tidytext} to select the personality variables to be used in the visualization. In this case a list of encoded variables, such as */U000..* were filtered out, to leave just words in the vector.
```{r}
library(tidytext)
my_df1 <- my_df %>%
mutate(personality=gsub("[^A-z]","unknown",personality))%>%
filter(!personality=="unknown") %>%
#count(personality) %>%
unnest_tokens(word, personality) %>%
inner_join(get_sentiments("bing")) %>%
distinct(char_name,word,sentiment,avg_rating)
```
Further wrangling activity on the dataset is done to select only the personality words which are in common and with highest avg rating values for all of the protagonists in **Friends** TV show.
```{r}
by_names <- my_df1%>%
group_by(word)%>%
summarize(char_name,avg_rating=mean(avg_rating),.groups="drop")%>%
ungroup()%>%
pivot_wider(names_from=char_name,values_from=word)%>%
drop_na()%>%
pivot_longer(cols=2:7,names_to="names",values_to="values")%>%
unnest(values)%>%
arrange(values) %>%
count(values) %>%
group_by(values) %>%
filter(!n<6 & !n>6) %>%
ungroup() %>%
left_join(my_df3,by=c("values"="word"))%>%
select(-n) %>%
pivot_wider(names_from=char_name,values_from=values) %>%
pivot_longer(cols=3:8,names_to="names",values_to="word")%>%
distinct()%>%
drop_na()%>%
mutate(word=str_to_title(word)) %>%
mutate(id_sentiment=ifelse(sentiment=="positive",1,0))
```
```{r}
library(extrafont)
# loadfonts()
```
```{r}
p <-by_names%>%
ggplot(aes(x=avg_rating,y=fct_reorder(word,-avg_rating)))+
geom_col(aes(fill=names), position = position_fill(),color="black")+
ggthemes::scale_fill_tableau()+
guides(fill=guide_legend(nrow = 1,reverse = T,keywidth = 0))+
labs(fill="",
subtitle="\nordered by common high-rating personality",
caption="DataSource: Open Source Psychometrics | #TidyTuesday 2022 week33\nDataViz: Federica Gazzelloni (@fgazzelloni)",
title="Friends: positive and negative personality ratings")+
ggthemes::theme_fivethirtyeight()+
theme(text=element_text(color="grey90",family="Public Sans Medium"),
plot.title = element_text(size=22),
legend.position = "top",
legend.background = element_rect(fill="black",color="black"),
legend.text = element_text(size=12),
strip.background = element_blank(),
axis.text.x = element_blank(),
axis.text.y = element_text(size=12),
panel.grid = element_line(size=3),
plot.background = element_rect(fill="black",color="black"),
panel.background = element_rect(fill="black",color="black"))
```
```{r}
library(cowplot)
ggdraw(p)+
draw_image("logo.png",scale=0.25,
x=-0.35,
y=0.45)
ggsave("w33_psychometrics.png",
dpi=320,
height = 7,
width = 9)
```
Other visualization not to be used.
```{r}
by_names %>%
ggplot(aes(fct_reorder(word,avg_rating),avg_rating,
fill=sentiment,color=sentiment))+
geom_point()+
geom_text(aes(label=word),size=3)+
# ggimage::geom_image(x=0.2,y=0.2,image=image)+
geom_line(aes(group=sentiment))+
#geom_col()+
facet_wrap(~fct_reorder(names,-avg_rating),scale="free")+
ggthemes::scale_color_tableau()
```