forked from burakbayramli/books
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathammonium-regional.r
241 lines (187 loc) · 7.57 KB
/
ammonium-regional.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
# Ammonium (uEq/L) in snowpack in Colorado and New Mexico.
# Fit regional kendall models
# This the RK3b.txt data file from the Kendall program for USGS at
# http://pubs.usgs.gov/sir/2005/5275/pdf/sir2005-5275.pdf
# 2015-06-27 CJS First edition
# Lines starting in ##--part001b; and ##***part001e; are used
# to bracket portions of the R code for inclusion into my
# course notes and are not normally coded.
options(useFancyQuotes=FALSE) # renders summary output corrects
library(car)
library(ggplot2)
library(gridExtra)
library(Kendall)
library(lmtest)
library(lsmeans)
library(nlme)
library(plyr)
library(reshape2)
source("../../schwarz.functions.r")
sink('ammonium-regional-R-010.txt', split=TRUE)
##***part010b;
# Read in the data and restructure it into the long format
ammon <- read.table("ammonium-regional.txt", header=TRUE, as.is=TRUE,
strip.white=TRUE)
ammon$RegionF <- factor(ammon$Region) # make region a factor
head(ammon)
##***part010e;
sink()
xtabs(~Year+Region, data=ammon)
# make an initial plot of the data
##***partprelimplotb;
library(scales)
plotprelim <- ggplot(data=ammon, aes(x=Year, y=Ammonium, group=RegionF))+
ggtitle("Ammonium levels over time")+
xlab("Year")+ylab("Ammonium")+
geom_point(size=4, position=position_dodge(w=0.2))+
geom_line()+
scale_x_continuous(breaks=pretty_breaks())
plotprelim
##***partprelimplote;
ggsave(plot=plotprelim, file='ammonium-regional-R-prelimplot.png')
########################################################################
# Fit the model using an ANCOVA
# Also make sure that Region is declared as a factor (see when data read in)
sink('ammonium-regional-R-regfitsep.txt', split=TRUE)
##***partregfitsepb;
# Fit a separate line for each Region
ddply(ammon, "Region", function(x){
# fit a separate line for each Region
cat("\n\n***Separate fit for Region :", as.character(x$Region[1]),"\n")
fit <- lm( Ammonium ~ Year, data=x)
print(summary(fit))
print(confint(fit)) # confidence interval on slope
res <- data.frame(slope=coef(fit)[2], se=sqrt(diag(vcov(fit)))[2])
return(res)
})
##***partregfitsepe;
sink()
sink('ammonium-regional-R-regfitnp.txt', split=TRUE)
##***partregfitnpb;
# Fit the regression line with non-parallel slopes and look at the ANOVA table
# Because lm() produces type I (increment tests), you need to specify the
# interaction term last in the model sequence.
# Be sure that Region has been declared as a factor.
ammon.fit.np <- lm( Ammonium ~ RegionF + Year + Year:RegionF, data=ammon)
anova(ammon.fit.np)
##***partregfitnpe;
sink()
sink('ammonium-regional-R-regfitp.txt', split=TRUE)
##***partregfitpb;
# Fit the regression line with parallel slopes. Specify the Region term last
# to get the proper test for Region effects
# Be sure that Region has been declared as a factor.
# Because R fits Type I tests, specify the Year variable last in the model
ammon.fit.p <- lm( Ammonium ~ RegionF + Year, data=ammon)
summary(ammon.fit.p)
anova(ammon.fit.p)
##***partregfitpe;
sink()
sink("ammonium-regional-R-fitpieces.txt", split=TRUE)
##***partfitpiecesb;
# Extract the individual parts of the fit using the
# standard methods. Note that because Region is a factor
# DO NOT USE THE ESTIMATES from the summary() to estimate
# the Region effect because these estimates depend on the
# internal parameterization used. Use the lsmeans() function instead
summary(ammon.fit.p)
coef(ammon.fit.p)
sqrt(diag(vcov(ammon.fit.p))) # gives the SE
confint(ammon.fit.p)
names(summary(ammon.fit.p))
summary(ammon.fit.p)$r.squared
summary(ammon.fit.p)$sigma
##***partfitpiecese;
sink()
##***partdiagplotb;
# look at diagnostic plot
# There is some evidence of non-fit in the qq pot and the scale-location plot.
plotdiag <- sf.autoplot.lm(ammon.fit.p)
plotdiag
##***partdiagplote;
ggsave(plot=plotdiag, file='ammonium-regional-R-diagplot.png')
sink('ammonium-regional-R-lsmeans.txt', split=TRUE)
##***partlsmeansb;
# Estimate the size of the Region effect. Do not use
# the output from summary() directly as this depends on the
# internal parameterization used by R. We use the lsmeans() package
ammon.fit.p.lsmo <- lsmeans::lsmeans(ammon.fit.p, ~RegionF)
cld(ammon.fit.p.lsmo)
##***partlsmeanse;
sink()
# plot the fitted lines to the graphs
# Because there are two lines, it is better to make predictions and
# then plot the predictions on the plot. See later.
sink('ammonium-regional-R-predsetup.txt', split=TRUE)
##***partpredsetupb;
# make predictions
# First set up the points where you want predictions
newYears <- expand.grid(Year=seq(min(ammon$Year,na.rm=TRUE),
max(ammon$Year,na.rm=TRUE),1),
RegionF=unique(ammon$RegionF))
newYears[1:5,]
str(newYears)
##***partpredsetupe;
sink()
sink('ammonium-regional-R-predavg.txt', split=TRUE)
##***partpredavgb;
# Predict the AVERAGE duration of cuting at each Year
# You need to specify help(predict.lm) tp see the documentation
predict.avg <- predict(ammon.fit.p, newdata=newYears,
se.fit=TRUE,interval="confidence")
# This creates a list that you need to restructure to make it look nice
predict.avg.df <- cbind(newYears, predict.avg$fit, se=predict.avg$se.fit)
head(predict.avg.df)
plotfit <- ggplot(data=ammon, aes(x=Year, y=Ammonium, group=RegionF))+
ggtitle("Ammonium levels over time ")+
xlab("Year")+ylab("Ammonium levels")+
geom_point(size=4,position=position_dodge(w=0.2))+
geom_line(data=predict.avg.df, aes(x=Year, y=fit, group=RegionF))
plotfit
# Add the confidence intervals to the plot
##***partpredavge;
sink()
ggsave(plot=plotfit, file='ammonium-regional-R-plotfit.png')
sink('ammonium-regional-R-predindiv.txt', split=TRUE)
##***partpredindivb;
# Predict the INDIVIDUAL duration of cuting at each Year
# R does not product the se for individual predictions
predict.indiv <- predict(ammon.fit.p, newdata=newYears,
interval="prediction")
# This creates a list that you need to restructure to make it look nice
predict.indiv.df <- cbind(newYears, predict.indiv)
head(predict.indiv.df)
##***partpredindive;
sink()
#########################################################################
# Regional Kendall test for trend using the rkt package
library(rkt)
rkt::rkt(ammon$Year, ammon$Ammonium, block=ammon$Region )
#########################################################################
# Regional Kendall test for trend using the EnvStats package
# There are several packages: wq, Kendall, pheno and rkt, envstats
# See: http://www.inside-r.org/node/218892
library(EnvStats)
EnvStats::kendallSeasonalTrendTest(Ammonium ~ Region + Year, data=ammon)
#########################################################################
# Kendall regional test for trend using the wq package
# We need to convert it first to a matrix time series object with missing values for any
# years not measured in a region
xtabs(~Year+Region, data=ammon)
# We create a full dataset and then merge it with our dataset with missing values
full <- expand.grid(Year=seq(min(ammon$Year, na.rm=TRUE),
max(ammon$Year, na.rm=TRUE),1),
Region=unique(ammon$Region))
ammon.full <- merge(ammon, full, all=TRUE )
xtabs(~Year+Region, data=ammon.full)
xtabs(Ammonium~Year+Region, data=ammon.full, sparse=TRUE)
# Convert to a matrix time series
# Convert from long to wide format
library(reshape2)
ammon.wide <- acast(ammon.full, Year ~ Region, value.var="Ammonium")
ammon.wide
ammon.mts <- ts(ammon.wide, class=c("ts","mts"))
frequency(ammon.mts)
library(wq)
wq::trendHomog(mts2ts(ammon.mts)) # test if the slopes are the same in all regions
wq::seaKen(mts2ts(ammon.mts)) # regional test