forked from rfordatascience/tidytuesday
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy patheconomist-mistakes.R
124 lines (88 loc) · 3.36 KB
/
economist-mistakes.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
library(tidyverse)
library(here)
library(janitor)
### Brexit Raw
brexit_raw <- read_csv(here("2019", "2019-04-16", "Economist_brexit.csv"))
brexit_clean <- brexit_raw %>%
set_names(nm = .[3,]) %>%
clean_names() %>%
slice(4:nrow(.))
brexit_clean %>% write_csv(here("2019", "2019-04-16", "brexit.csv"))
### corbyn
corbyn_raw <- read_csv(here("2019", "2019-04-16", "Economist_corbyn.csv"))
corbyn_clean <- corbyn_raw %>%
set_names(nm = "political_group", "avg_facebook_likes") %>%
na.omit()
corbyn_clean %>% write_csv(here("2019", "2019-04-16", "corbyn.csv"))
### dogs
dogs_raw <- read_csv(here("2019", "2019-04-16", "Economist_dogs.csv"))
dogs_clean <- dogs_raw %>%
na.omit() %>%
set_names(nm = c("year", "avg_weight", "avg_neck"))
dogs_clean %>% write_csv(here("2019", "2019-04-16", "dogs.csv"))
### EU Balance
eu_balance_raw <- read_csv(here("2019", "2019-04-16", "Economist_eu-balance.csv"))
names_eu <- eu_balance_raw %>%
.[1,] %>%
as.character()
datapasta::vector_paste_vertical(names_eu)
clean_names_eu <- c("country",
"current_2009",
"current_2010",
"current_2011",
"current_2012",
"current_2013",
"current_2014",
"current_2015",
"budget_2009",
"budget_2010",
"budget_2011",
"budget_2012",
"budget_2013",
"budget_2014",
"budget_2015")
eu_current <- eu_balance_raw %>%
set_names(nm = clean_names_eu) %>%
filter(country != "Country") %>%
gather(year, value, starts_with("current")) %>%
select(-starts_with("budget")) %>%
separate(year, into = c("account_type", "year"))
eu_budget <- eu_balance_raw %>%
set_names(nm = clean_names_eu) %>%
filter(country != "Country") %>%
gather(year, value, starts_with("budget")) %>%
select(-starts_with("current")) %>%
separate(year, into = c("account_type", "year"))
eu_balance_clean <- bind_rows(eu_current, eu_budget)
eu_balance_clean %>% write_csv(here("2019", "2019-04-16", "eu_balance.csv"))
### Pensions
pensions_raw <- read_csv(here("2019", "2019-04-16", "Economist_pensions.csv"))
pensions_clean <- pensions_raw %>%
na.omit() %>%
set_names(nm = c("country", "pop_65_percent", "gov_spend_percent_gdp"))
pensions_clean %>% write_csv(here("2019", "2019-04-16", "pensions.csv"))
### Trade
trade_raw <- read_csv(here("2019", "2019-04-16", "Economist_us-trade-manufacturing.csv"))
trade_clean <- trade_raw %>%
set_names(nm = c("year", "trade_deficit", "manufacture_employment")) %>%
mutate(trade_deficit = trade_deficit * 1e9,
manufacture_employment = manufacture_employment * 1e6) %>%
na.omit()
trade_clean %>% write_csv(here("2019", "2019-04-16", "trade.csv"))
### Women
women_research_raw <- read_csv(here("2019", "2019-04-16", "Economist_women-research.csv"))
women_research_raw[1,] %>%
as.character() %>%
datapasta::vector_paste_vertical()
research_names <- c("country",
"Health sciences",
"Physical sciences",
"Engineering",
"Computer science, maths",
"Women inventores")
women_research_clean <- women_research_raw %>%
na.omit() %>%
set_names(nm = research_names) %>%
filter(country != "Country") %>%
gather(field, percent_women, `Health sciences`:`Women inventores`)
women_research_clean %>% write_csv(here("2019", "2019-04-16", "women_research.csv"))