-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathindex.Rmd
210 lines (163 loc) · 8.82 KB
/
index.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
---
title: "Visualize the output dataset for COVerAGE-DB"
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message=FALSE, fig.width=8, fig.height=6)
```
```{r, include = FALSE}
invisible(lapply(list.files(here::here("R"), full.names = TRUE), source))
suppressPackageStartupMessages({
library("data.table")
library("ggplot2")
library("readr")
library("cowplot")
library("osfr")
library("countrycode")
})
source_1 <- "COVerAGE-DB: A database of COVID-19 cases and deaths by age (https://osf.io/mpwjq/)"
WorldRobinson <- readRDS(here::here("data/NEWorldRobinson.rds"))
geo_df <- readRDS(here::here("data/WorldRobinson_df.rds"))
#
dtJHU <- get.JHU.daily()
dtJHU2 <- process_dtJHU(dtJHU)
# download input.DB as zip file and read in
# `load_fresh` will save a local copy
dt5_ori <- load_fresh(backup = TRUE)
dt5_ori <- fread(here::here("data_backup/dt5_ori.csv"))
dt5 <- clean_outputDB(dt5_ori) # dt5 has only latest day
countries_w_new_update <- dt5[Date>"2020-10-01", unique(Country)]
all_countries <- get_cnames(dt5) # all countries in the datasets
# range of `max_date`
dt_date <- unique(dt5[,.(Country, max_date)])
# make dt5_country (MPIDR by country and age and sex) for Tableau
dt5_country <- get_dt5_pool_sex(dt5) # dt5_country has all the countries, pooled sex into total and remove NA value
dt5_country[, New:=ifelse(Country%in%countries_w_new_update, 1, 0)]
dt5_country <- join_region(dt5_country) # joined to region
dt5_country <- join_wpp_pop_dth(dt5_country)
# dt5_country[, update_date:= format.Date(dt5_ori$update_date[1], "%d %B %Y")]
dt5_country[, update_date:= as.Date(dt5_ori$update_date[1], format = "%d %B %Y")]
dt5_country_colorder <- readRDS(here::here("data/dt5_country_colorder.rds"))
setcolorder(dt5_country, dt5_country_colorder)
dt5_country[,.N]
# 2/11: 8383
# unique(dt5_country[,.(Measure, Sex2, WBRegion4, nc_MPIDR_WB)]) # Number of MPIDR countries by WB group
# agg country and age get total
# dt5_total <- get_sum_country_age(dt5_country)
dt5_summary_simple <- get_summary_table(dt5_country)
dt5_summary_simple[, Pcent_0_9:= (`Age 0-4` + `Age 5-9`) / Sum0_19 * 100] # 0-10 % in 0-20
dt5_summary_simple[, Pcent_10_19:= (`Age 10-14` + `Age 15-19`) / Sum0_19 * 100]
# agg by WB region
dt5_summary_region <- get_summary_table_region(dt5_country, dtJHU2)
dt5_summary_region[1:6,]
#
# aggregate at country level with JHU country data
dt5_summary <- get_summary_table_region2(dt5_country, dtJHU2)
dt5_summary[,.N]
# 744
dt5_summary_simple
```
```{r}
# plots ----
# dt_ava shows if there is only case, or only deaths or both for each country by sex
dt_ava <- copy(dt5_country)[, Measure := paste(sort(unique(Measure)), collapse = "_"), by = .(Sex, Country)]
dt_ava[Sex!="Both", Sex:= "Sex-specific"]
dt_ava2 <- unique(dt_ava[,.(Country, Sex, Measure)]) # used for map, `plot_ava_map2`
# dt_ava2[Sex=="Both"&grepl("Cases", Measure) ,.N]
# latest date of updating country
dt_date <- unique(dt5[, .(Country, Sex, Date)])
dt_date[, Month := format(Date, "%Y-%m")]
# There are `r NC_New` countries updated after October 2020:
NC_New <- paste0(dt_date[Month >= "2020-10", uniqueN(Country)], " (",
round(dt_date[Month >= "2020-10", uniqueN(Country)]/dt_date[,uniqueN(Country)]*100), "%)")
# agg country, get total by age
dt5_total_age <- get_sum_country(dt5_country)
# then get regional summary
dt5_total_region <- get_sum_country_region(dt5_country) # get regional (both total and sex-specific )
# plot barchart by age groups using dataset `dt5_total_age`
bar_total_age1 <- make_barchart2(dt5_total_age[Sex=="Both"], Measure0 = "Cases")
bar_total_age2 <- make_barchart2(dt5_total_age[Sex=="Both"], Measure0 = "Deaths")
bar_total_age <- cowplot::plot_grid(bar_total_age1, bar_total_age2)
# plot barchart by WB region
bar_total_age_region <- make_barchart2(dt5_total_region[Sex=="Both"], label = FALSE)
# age pyramid for case and death
dt5_total_age$Sex <- factor(dt5_total_age$Sex, levels = c("Both", "Male", "Female"))
plots_by_measure1 <- plot.measure(data = dt5_total_age[Sex!="Both"], Measure0 = "Cases", big_plot = TRUE, hide_legend = TRUE)
plots_by_measure2 <- plot.measure(data = dt5_total_age[Sex!="Both"], Measure0 = "Deaths", big_plot = TRUE, hide_legend = FALSE)
pyramid_age_sex <- cowplot::plot_grid(plots_by_measure1, plots_by_measure2)
# age pyramid for case and death --- by WB region
rcs <- c("Low income",
"Lower middle income",
"Upper middle income",
"High income")
grid0 <- expand.grid(rcs, c("Cases", "Deaths"))
plot.all <- function(region0, measure0){
plot.measure(dt5_total_region[Sex!="Both"], measure0, region0, big_plot = TRUE, hide_legend = TRUE, hide_text = TRUE)
}
plots_by_region_sex <- Map(plot.all, region0 = grid0$Var1, measure0 = grid0$Var2)
pyramid_age_sex_region <- cowplot::plot_grid(plotlist = plots_by_region_sex, ncol = 2)
#
# one map each
# In the `dt5_country` dataset, countries with single/both sex always have total
c_case <- dt5_country[Measure=="Cases" & Sex == "Both", unique(Country)]
c_death <- dt5_country[Measure=="Deaths" & Sex == "Both", unique(Country)]
c_case_sex<- dt5_country[Measure=="Cases" & Sex != "Both", unique(Country)]
c_death_sex <- dt5_country[Measure=="Deaths" & Sex != "Both", unique(Country)]
map_total <- plot_ava_map2(dt_ava2[Sex == "Both"])
map_sex_specific <- plot_ava_map2(dt_ava2[Sex == "Sex-specific"])
# aggregated plots for all countries ----
all_countries <- get_cnames(dt5)
# `get_dt_for_total` runs a cleaning function, check if CFR could be obtained
data_total1 <- rbindlist(lapply(all_countries, get_dt_for_total,
data = dt5,
target_interval = seq(0, 90, by = 5),
get_f_m = FALSE))
# for the 0-19 group total
CFR_0_19 <- round(calculate.CFR(data_total1)[Measure=="Deaths", Value]/calculate.CFR(data_total1)[Measure=="Cases", Value] * 100, 3)
data_total2 <- rbindlist(lapply(all_countries, get_dt_for_total,
data = dt5,
target_interval = seq(0, 90, by = 5),
get_f_m = TRUE))
# CFR plot
data_total1 <- revise.data.total(data_total1) # calculate CFR
data_total2 <- revise.data.total(data_total2)
CFR_total <- plot.measure(data_total1, Measure0 = "Case fatality rate",
big_plot = TRUE, hide_legend = TRUE) # total
CFR_sex <- plot.measure(data_total2, Measure0 = "Case fatality rate",
big_plot = TRUE, hide_legend = FALSE) # sex-specific
bar_CFR <- cowplot::plot_grid(CFR_total, CFR_sex)
```
**This page uses the harmonized dataset with 5-year age interval. **
COVerAGE-DB is an open-access database including cumulative counts of confirmed COVID-19 cases, deaths, and tests by age and sex. Original data and sources are provided alongside data and measures in standardized and age-harmonized formats. The data were extracted from reports published by official governmental institutions in a variety of formats.
The [project page](https://github.com/timriffe/covid_age) and the [medRxiv paper](https://www.medrxiv.org/content/10.1101/2020.09.18.20197228v2) contain the technical details.
The database is still in development. On `r Sys.Date()` it includes `r dt5_ori[, uniqueN(Country)]` countries and `r dt5_ori[Region!="All", uniqueN(Region)]` subnational areas.
**Data Availability**
The `r length(c_case)` countries with national data on counts of COVID-19 cases and the `r length(c_death)` countries with national data on counts of COVID-19 deaths.
The `r length(c_case_sex)` countries with national data on sex-specific counts of COVID-19 cases and the `r length(c_death_sex)` countries with national data on sex-specific counts of COVID-19 deaths.
```{r, fig.width=8, fig.height=4}
map_total
map_sex_specific
```
**Age distribution of global COVID-19 cases and deaths**
```{r, fig.width=10, fig.height=4}
bar_total_age
```
**Age distribution of global COVID-19 cases and deaths by sex**
```{r, fig.width=10, fig.height=4}
pyramid_age_sex
```
**Case fatality rate (CFR), with 5-year age interval. **
CFR is calculated by dividing death counts by case counts for each age group. In calculating CFR, only countries with both case and death data are counted.
```{r, fig.width=10, fig.height=4}
bar_CFR
```
**Age distribution of COVID-19 cases and deaths by income group**
```{r, fig.width=10, fig.height=10}
bar_total_age_region
```
**Age distribution of global COVID-19 cases and deaths by sex and income group**
```{r, fig.width=10, fig.height=14}
pyramid_age_sex_region
```
## Reference
Riffe, T., Acosta, E., and The COVerAGE-DB team (2020). COVerAGE-DB: A database of COVID-19 cases and deaths by age. medRxiv https://doi.org/10.1101/2020.09.18.20197228
Dong E, Du H, Gardner L. An interactive web-based dashboard to track COVID-19 in real time. Lancet Inf Dis. 20(5):533-534. https://doi.org/10.1016/S1473-3099(20)30120-1