-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdenominators.Rmd
282 lines (217 loc) · 7.92 KB
/
denominators.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
---
title: "denominators"
author: "FD"
date: "10/10/2021"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
# Initializations
```{r}
library("colorspace")
pal1 <- colorspace::qualitative_hcl(4, palette = "Set 2")
names(pal1) <- c("SPF", "insee2021", "AmeliAssMaladie", "AmeliINSEE2020")
pal1
```
# Load and manage data
## Download datasets
```{r downloadDatasets}
dlData <- FALSE
if(dlData){
# Amelia data
# source("0_loadAmeliData.R")
# SPF data
URL.SPF <- "https://www.data.gouv.fr/en/datasets/r/54dd5f8d-1e2e-4ccb-8fb8-eac68245befd"
system(paste0("wget -O data/vaccSPF.csv ", URL.SPF))
URL.SPF2 <- "https://www.data.gouv.fr/en/datasets/r/dc103057-d933-4e4b-bdbf-36d312af9ca9"
system(paste0("wget -O data/vaccSPF_tot.csv ", URL.SPF2))
}
```
## Load datasets
```{r loadDatasets}
# Ameli
## By EPCI
vaccEPCI <- read.csv("data/vaccEPCI.csv", sep = ";")
# head(vaccEPCI)
# Find final time
finalDate <- max(vaccEPCI$date)
finalDate
# Subset the data to only keep the most recent
vaccEPCI <- vaccEPCI[which(vaccEPCI$date == finalDate), ]
# Check total populations size
sum(vaccEPCI[vaccEPCI$classe_age == "TOUT_AGE", "population_carto"])
## By Dep
vaccDep <- read.csv("data/vaccDep.csv", sep = ";")
# head(vaccDep)
# Check that final dates are compatible
stopifnot(max(vaccDep$date) == finalDate)
# Subset the data to only keep the most recent, and remove detail to avoid duplicated
vaccDep <- vaccDep[vaccDep$date == finalDate & vaccDep$type_vaccin == "Tout vaccin" & vaccDep$departement_residence == "Tout département" & vaccDep$region_residence == "Toute région", ]
# Check total populations size
sum(vaccDep[vaccDep$classe_age == "TOUT_AGE", "population_insee"], na.rm = TRUE)
## SPF
vaccSPF <- read.csv("data/vaccSPF.csv", sep = ";")
head(vaccSPF)
# Select values of the same date as the Ameli data
vaccSPF <- vaccSPF[vaccSPF$jour == finalDate, ]
sort(unique(vaccSPF$clage_vacsi))
# SPF, final, which contains population values
tmp <- read.csv("data/vaccSPF_tot.csv", sep = ";")
# Add this info to the SPF dataset
vaccSPF <- merge(vaccSPF, tmp[, c("clage_vacsi", "pop")], by = "clage_vacsi", all = TRUE)
```
## Add INSEE 2021 Demographic data
```{r loadDatasets}
# Load demographic data
# Sources: Population par classe d'age
# https://www.insee.fr/fr/statistiques/1893198
#
# Population par année d'âge (par milliers)
# https://www.insee.fr/fr/statistiques/5347620#tableau-figure6_radio1
#
demoAmeli <- read.csv("data/demographic/insee_2021_Ameli.csv")
demoAmeli
demoSPF <- read.csv("data/demographic/insee_2021_SPF.csv")
demoSPF
# Add new population data to SPF
vaccSPF <- merge(vaccSPF, demoSPF, "clage_vacsi")
vaccSPF
# Compute new rates
vaccSPF$couv_tot_dose1_2021 <- 100 * vaccSPF$n_cum_dose1 / vaccSPF$pop2021
vaccSPF$couv_tot_complet_2021 <- 100 * vaccSPF$n_cum_complet / vaccSPF$pop2021
# Add population data to the other Ameli dataset
vaccDep
names(demoAmeli) <- c("classe_age", "pop2021")
vaccDep <- merge(vaccDep, demoAmeli, by = "classe_age")
# Sort tables
vaccDep <- vaccDep[order(vaccDep$classe_age), ]
```
## Compute national data
```{r}
# Need to remove whole lines where there are NAs
linesOK <- which(!is.na(vaccEPCI$effectif_cumu_1_inj) & !is.na(vaccEPCI$effectif_cumu_termine), !is.na(vaccEPCI$population_carto))
vaccEPCI <- vaccEPCI[linesOK, ]
# Compute national data
# Initialize dataset
vaccAmeli <- data.frame(classe_age = sort(unique(vaccEPCI$classe_age)))
vaccAmeli$n_tot_dose1 <- NA
vaccAmeli$n_tot_complet <- NA
vaccAmeli$pop <- NA
# Sum values by age class
for(ag in vaccAmeli$classe_age){
subdat <- vaccEPCI[vaccEPCI$classe_age == ag,]
tmp <- colSums(subdat[, c("effectif_cumu_1_inj", "effectif_cumu_termine", "population_carto")], na.rm = TRUE)
vaccAmeli[vaccAmeli$classe_age == ag, 2:4] <- tmp
}
# Compute couverture
vaccAmeli$couv_dose1 <- 100 * vaccAmeli$n_tot_dose1 / vaccAmeli$pop
vaccAmeli$couv_complet <- 100 * vaccAmeli$n_tot_complet / vaccAmeli$pop
# Sort by age class
vaccAmeli <- vaccAmeli[order(vaccAmeli$classe_age), ]
```
## Compute Ameli age classes on SPF data
```{r}
# Same age classes:
vaccSPF2 <- data.frame(classe_age = c("65-74", "75 et +", "TOUT_AGE"))
# SPF age class codes contributing to each Ameli age class
inds <- list(c(69, 74), c(79, 80), c(0))
out <- matrix(0, ncol = 4, nrow = 3)
for(i in 1:3){
# Sum data within each age class
out[i, ] <- colSums(vaccSPF[is.element(vaccSPF$clage_vacsi, inds[[i]]), c("n_cum_dose1", "n_cum_complet", "pop", "pop2021")])
}
# Fill in new dataset
vaccSPF2$n_tot_dose1 <- out[, 1]
vaccSPF2$n_tot_complet <- out[, 2]
vaccSPF2$pop <- out[, 3]
vaccSPF2$pop2021 <- out[, 4]
vaccSPF2$couv_dose1 <- 100 * vaccSPF2$n_tot_dose1 / vaccSPF2$pop
vaccSPF2$couv_complet <- 100 * vaccSPF2$n_tot_complet / vaccSPF2$pop
vaccSPF2$couv_dose1_2021 <- 100 * vaccSPF2$n_tot_dose1 / vaccSPF2$pop2021
vaccSPF2$couv_complet_2021 <- 100 * vaccSPF2$n_tot_complet / vaccSPF2$pop2021
vaccSPF2
```
```{r}
# Names of the age classes of SPF
ageClasses <- sort(unique(vaccSPF$clage_vacsi))
names(ageClasses) <- c("Tous âges", "00-04 ans", "05-09 ans", "10-11 ans", "12-17 ans", "18-24 ans", "25-29 ans", "30-39 ans", "40-49 ans", "50-59 ans", "60-64 ans", "65-69 ans", "70-74 ans", "75-79 ans", "80 ans et +")
ageClasses
# Reverse dictionary
ac2 <- names(ageClasses)
names(ac2) <- ageClasses
vaccSPF$clage_name <- ac2[as.character(vaccSPF$clage_vacsi)]
# Sort
vaccSPF <- vaccSPF[order(vaccSPF$clage_name), ]
vaccSPF
```
# Plotting
## SPF age classes
```{r}
dx <- 0.25
par(las = 1, xpd = TRUE, mgp = c(2, 0.5, 0), tck = -0.01)
xx <- seq_len(nrow(vaccSPF))
# Initialize plot
plot(xx, vaccSPF$couv_dose1,
ylim = c(0, 100),
xaxs = "i", yaxs = "i",
xlim = c(1-dx, nrow(vaccSPF)+dx),
frame.plot = FALSE, axes = FALSE,
type = "n",
xlab = "", ylab = "Couverture au moins une dose")
par(xpd = FALSE)
# Horiz lines
for(i in seq(0, 100, by = 10)){
abline(h = i, col = gray(0.9))
}
par(xpd = TRUE)
points(xx, vaccSPF$couv_dose1,
col = pal1["SPF"], pch = 16, cex = 1.5)
points(seq_len(nrow(vaccSPF)), vaccSPF$couv_tot_dose1_2021, col = pal1["insee2021"], pch = 0, cex = 1.2, lwd = 2)
axis(1, at = seq_len(nrow(vaccSPF)), labels = vaccSPF$clage_name, las = 3, cex.lab = 0.6, lwd = 0, lwd.ticks = 0)
axis(2)
legend("bottomleft", legend = c("couv. SPF", "denom. INSEE 2021"),
col = c(pal1[c("SPF", "insee2021")]), pch = c(16, 0), lwd = c(1, 2),
lty = 0, # To avoid plotting a line in spite of lwd
pt.cex = c(1.5, 1.2),
inset = c(0, 1), xpd = TRUE, horiz = TRUE, bty = "n"
)
vaccSPF
```
## Ameli classes d'age
```{r}
# Initialize plot
par(las = 1, xpd = TRUE, mgp = c(2, 0.5, 0), tck = -0.01)
xx <- seq_len(nrow(vaccAmeli))
# Initialize plot
plot(xx, vaccAmeli$couv_dose1,
ylim = c(0, 100),
xaxs = "i", yaxs = "i",
xlim = c(1-dx, nrow(vaccAmeli)+dx),
frame.plot = FALSE, axes = FALSE,
type = "n",
xlab = "", ylab = "Couverture au moins une dose")
par(xpd = FALSE)
# Horiz lines
for(i in seq(0, 100, by = 10)){
abline(h = i, col = gray(0.9))
}
par(xpd = TRUE)
points(xx, vaccAmeli$couv_dose1, pch = 15, col = pal1["AmeliAssMaladie"])
axis(1, at = seq_len(nrow(vaccAmeli)), labels = vaccAmeli$classe_age, las = 3, cex.lab = 0.6, lwd = 0, lwd.ticks = 0)
axis(2)
points(5:7, vaccSPF2$couv_dose1, col = pal1["SPF"], lwd = 2, pch = 15)
points(5:7, vaccSPF2$couv_dose1_2021, col = pal1["insee2021"], lwd = 2, pch = 0)
points(seq_len(nrow(vaccDep)), 100*vaccDep$taux_cumu_1_inj, col = pal1["AmeliINSEE2020"], pch = 1, lwd = 2)
vaccDep$effectif_cumu_1_inj
vaccAmeli$n_tot_dose1
vaccAmeli
legend("bottomleft",
legend = c("Assurance maladie (denom 2021)", "Ameli, denom. INSEE 2020", "SPF (denom. 2020)", "SPF denom. 2021"),
col = pal1[c("AmeliAssMaladie", "AmeliINSEE2020", "SPF", "insee2021")],
pch = c(15, 1, 16, 0), lwd = c(2),
lty = 0, # To avoid plotting a line in spite of lwd
pt.cex = 1.2,
inset = c(0, 1), xpd = TRUE, bty = "n", ncol = 2
)
```