-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy path00_1_KERNEL_DENSITY.R
164 lines (126 loc) · 5.42 KB
/
00_1_KERNEL_DENSITY.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
# Script name: 01_1_KERNEL_DENSITY.R
#
# Author: M.Chen, Inserm
#
# Doing: Estimating physical activity density with 60sec epoch data
# (2020-05-15 data release)
#--------------------------------------------------------------------
# Features
Sys.setlocale("LC_ALL","English")
# Packages
# > Tools
library(tidyr)
library(plyr)
library(dplyr)
library(haven)
library(stringr)
library(purrr)
library(vroom)
library(ks)
# > Time managment
library(lubridate)
library(hms)
# > Graphics
library(ggplot2)
library(cowplot)
library(wesanderson)
# > Graphical settings
pal <- wes_palette("Zissou1", 5, type = "discrete")
#--------------------------------------------------------------------
# Information on participant
# > Person summary = Data set containing the correct set of patients (n = 4006). One line = patient
stata_data <- read_dta("E:\\PC_FIXE\\Data\\04_DAILY_ACT_SUM\\2020-05-15\\WW_L40M100V400_update052020.dta") %>%
filter(exclusion == 0)
#--------------------------------------------------------------------
# Load data from participants with complete data
# i.e. no invalid day
# > Path of 60sec epoch files
path.files <- "E:\\PC_FIXE\\Data\\01_TIME_SERIES\\2020_05_15\\ms5.outraw\\40_100_400\\"
# > Load raw data from participants with complete data
acc_data <- data.frame(files = list.files(path = path.files,
pattern = ".csv$",
recursive = TRUE)) %>%
mutate(stno = str_extract(files, "[0-9]+")) %>%
filter(stno %in% unique(stata_data$stno)) %>%
split(.$stno, drop = T) %>%
map_dfr(~ {
i_dat <- vroom(file = paste0(path.files, unique(.x$files)),
delim = ",")
}, .id = "stno") %>%
# Compute logarithme of acc. data in g (because some data are skewed)
mutate(ACC_g = ACC/1000, # convert acc in mg in acc in g
log_ACC_g = log(1 + ACC_g), # compute the logarithm of the acc in g
log_ACC = log(1 + ACC), # compute the logarithm of the acc in mg
timestamp = as.POSIXct(timenum, origin="1970-1-1", tz="Europe/London")) # compute the timestamp
# > Delete unvalid days (i.e. days where less than 33% of the epoch are invalid)
# and sleep period
acc_data_valid <- acc_data %>%
filter(invalid_wakinghours < 33,
SleepPeriodTime == 0)
#--------------------------------------------------------------------
# 1) log(acc + 1), with acc in mg
# > Coordinates where densities will be estimated
# > Min and max of the wanted coordinates
min.pts <- 0
max.pts <- 15000
# > 100 log distributed points between min and max values in log scale
pts <- ( exp( seq(min.pts, log(1 + max.pts), length.out = 150) ) - 1 ) / 100
#plot(x = pts_1, y = rep(0, 150), type = "p", pch= "l")
# > Estimate median bandwidth
h.med <- acc_data_valid %>%
split(.$stno) %>%
# > Extract data from the large dataset
map_dbl(~ {
# > Data (neperian log of the acc in g)
acc <- .x$log_ACC
# > Density function estimation for the tested participant
fit <- kde(acc,
eval.points = pts, # at given points
positive = TRUE) # only positive data
# > Get bandwidth value
fit$h
}) %>%
# > Median bandwidth
median(.)
# Estimate individual density function with a commun bandwidth
dst <- acc_data_valid %>%
# > Keep only acceleration during waking time and valid days (needs to be implemented)
split(.$stno) %>%
map_dfr(~ {
# > Data (neperian log of the acc in g)
acc <- .x$log_ACC
# > Density function estimation for the tested participant
fit <- kde(acc,
eval.points = pts, # at given coordinates
h = h.med, # with a given bandwidth
positive = TRUE) # only positive data
# > Continuous density function for positive values
data.frame(x = fit$eval.points, # coordinates at which the function was estimated
f_i = fit$estimate) # estimated function
}, .id = "stno")
# > Check if there is no negative values
dst[f_i < 0,]
# > Mean density
dst %>%
dplyr::group_by(x) %>%
summarise(mean = mean(f_i),
upper = quantile(f_i, probs = 0.75),
lower = quantile(f_i, probs = 0.25)) %>%
mutate(group = if_else(x >= 0.1, "3. MVPA [0.1;0.5[", if_else(x < 0.04, "1. SD [0;0.04[", "2. LIPA [0.04;0.1["))) %>%
ggplot(.) +
geom_ribbon(aes(x = x, ymin = lower, ymax = upper), alpha = 0.7, fill = pal[2]) +
geom_line(aes(x = x, y = mean), color = pal[1]) +
geom_point(aes(x = pts_1, y = 0), color = pal[4], size = 0.1) +
#geom_area(aes(x = x, y = q.val, group = q.type), color = pal[1], lty = 2) +
theme_bw() +
theme(panel.grid.major.y = element_blank(),
panel.grid.minor.y = element_blank(),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
panel.background = element_rect(fill = "white", colour = "white"),
strip.background = element_rect(colour = "white", fill = "white")) +
facet_wrap(~group, ncol = 1, scales = "free") +
labs(x = "1) log(acc+1) (acc in g)", y = "Density")
#--------------------------------------------------------------------
# Save
save(dst, file = "\\\\172.27.137.244\\vieillissement$\\Post doc & PhD\\Mathilde\\Analysis\\01_ARTICLE_1\\02_FUNCTION_ON_SCALAR\\test_refund\\2020-05-15_dst_4006.rda")