forked from ICI3D/RTutorials
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathparticipatoryCoding_MalariaITN_2017.R
81 lines (69 loc) · 3.71 KB
/
participatoryCoding_MalariaITN_2017.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
## MMED 2017
## Steve Bellan
## Participatory Coding for Simulation of Study Design
## Research Question: How does the proportion of people sleeping under ITNs affect the incidence of clinical malaria?
sampleSize <- 10^5
incidenceRate <- .3 # per person per year
studyDuration <- 2 # years
numVillages <- 200
## myDat <- data.frame(id = 1:sampleSize,
## villageID = sample(1:200, size = sampleSize, replace=T)
## )
## head(myDat)
## incidence ~ a0*exp(-beta * bednet coverage + noise)
incidenceFXN <- function(a0, betaITN, pITN, sd=10^-6) { ## nonlinear function of incidence rate vs proportion sleeping with ITNs with noise & interecept
incidence <- a0 * exp(- betaITN * pITN + rnorm(length(pITN), 0, sd))
return(incidence)
}
## let's look at what this plot looks like with some values
pITNs <- seq(.25, .75, l = 100)
incidenceSeq <- incidenceFXN(a0 = 1, betaITN = .2, pITN = pITNs) ## default small sd so we can just see the trend
plot(pITNs, incidenceSeq, xlab = 'proportion sleeping under bednets',
ylab = 'incidence (per person per year)', type='l', lwd = 2,
ylim = c(0, max(incidenceSeq))
)
head(myDat)
myDat <- data.frame(villageID = 1:numVillages, ## create some villages
propBedNets = runif(numVillages, .25, .75)
)
myDat$incidence <- incidenceFXN(a0=1, betaITN = 3, pITN = myDat$propBedNets, sd = .5)
head(myDat)
myMod <- lm(log(incidence) ~ propBedNets, data = myDat) ## linear model of logincidence
summary(myMod) ## look at model
IDR <- exp(coefficients(myMod)['propBedNets']) # get incidence density ratio (exponent of linear coefficient)
IDRcis <- exp(confint(myMod))['propBedNets',] # CIs on IDR
## function that does the study many times
doStudy <- function(numRuns = 1, numVillages = 200,
minPropBednets = .25, maxPropBednets = .75,
a0 = 1, betaITN = 3, sd = .5, browse=F) {
if(browse) browser() ## if argument "browse" is set to true, then we step inside the function
## initialize these vectors to store output
IDRseq <- rep(NA, numRuns)
IDRlb <- rep(NA, numRuns)
IDRub <- rep(NA, numRuns)
for(run in 1:numRuns) { ## for each run
## create set of villages to sample with randomly distributed ITN coverage
myDat <- data.frame(villageID = 1:numVillages,
propBedNets = runif(numVillages, minPropBednets, maxPropBednets)
)
myDat$incidence <- incidenceFXN(a0=a0, betaITN = betaITN, pITN = myDat$propBedNets, sd = sd) ## calculate incidence with some noise as our data
myMod <- lm(log(incidence) ~ propBedNets, data = myDat) ## fit model to the data
IDRseq[run] <- exp(coefficients(myMod)['propBedNets']) ## store values
IDRcis <- exp(confint(myMod))['propBedNets',]
IDRlb[run] <- IDRcis[1]
IDRub[run] <- IDRcis[2]
}
power <- mean(IDRub<1) ## power is the proportion of times the upper bound of our CI is below 1 for the IDR
return(power)
## return(cbind(IDRseq, IDRlb, IDRub)) ## could also return the raw values as a matrix
}
villageNumSeq <- seq(10, 100, by =10) ## for different numbers of villages
powerSeq <- rep(NA, length(villageNumSeq)) ## calculate power
for(vv in 1:length(villageNumSeq)) { ## for each village number sample size
currentVillageNumber <- villageNumSeq[vv] ## set the current village number
powerSeq[vv] <- doStudy(1000, numVillages = currentVillageNumber, sd = .5, betaITN = .2) ## calculate power by doing a lot of runs
}
## Plot power vs village sample size
plot(villageNumSeq, powerSeq, xlab= 'number of villages', ylab = 'power', type = 'l', lwd = 2,
ylim = c(0,1))
lines(villageNumSeq, powerSeq, col = 'blue')