-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathAllCode
231 lines (215 loc) · 8.94 KB
/
AllCode
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
## VGI User Quality Analysis
## John Max Marno
## MsGISc June 2016
## This analysis is intended to provide a User Quality metric for VGI point data
## User Experience, and Local Knowledge are incorporated as part of the User Quality Proxy
## Data is taken from the GBIF (test data), and Colorado Parks and Wildlife (reference data)
####################################################################################################
## Load Necessary Packages and set work space
##set work space
setwd("~/Rindstud")
## load libraries
## multiple at once
x<- c("raster", "rgdal", "maptools",
"sp", "dismo", "plyr", "rgeos",
"biomod2", "ggmap", "ggplot2")
lapply(x, require, character.only=TRUE)
####################################################################################################
## Read and Load Datasets
##GBIF VGI Test Data:
BEobs<- readOGR(dsn=".", layer = "BEcoloexact")
## now read the CPW shapefile
cpwBE<- readOGR(dsn=".", layer = "BaldEhabitatMerge")
## using >summary(cpwBE) shows that the shapefile is projected
## in NAD1983 UTM zone 13N
## load Study Area Bounding Box (state of Colorado)
StColo<- readOGR(dsn=".", layer = "ColoradoGCSNAD1983")
## to make data easier to read, remove unncessary columns(can be returned later)
bald<- BEobs
d<-c("recordedby", "catalognum", "collection", "month", "day", "year",
"decimallon", "decimallat", "locality", "species", "gbifid")
BEobspoints<- bald[d]
####################################################################################################
## Remove Incomplete Results
## as.data.frame
bald.df<- data.frame(BEobspoints)
## must remove rows with missing values
## str(bald.df) ## note number of records (observations)
##complete.cases(bald.df) ## this produces a logical vector for complete rows 'TRUE'
BEcomplete<- bald.df[complete.cases(bald.df), ]
## now back to spatial points data frame
coordinates(BEcomplete) = c("decimallon", "decimallat")
crs.geo <- CRS("+proj=longlat +ellps=WGS84 +datum=WGS84") # geographical, datum WGS84
proj4string(BEcomplete) <- crs.geo # define projection system of our data
####################################################################################################
## Project cpw polygon shapefile
crs.geo <- CRS("+proj=longlat +ellps=WGS84 +datum=WGS84") # geographical, datum WGS84
cpwBE<- spTransform(cpwBE, crs.geo)
proj4string(cpwBE) <- crs.geo
####################################################################################################
## Calculate Gcentroid Coordinates for Each unique observer ID ('recordedby')
## include gCentroid for entire population in table
## try using complete entries only
popcent = gCentroid(BEcomplete)
obsname = "PopulationCentroid"
obsname=as.data.frame(obsname)
OID ="Row NA"
OID=as.data.frame(OID)
Gcent.pop.df = cbind(OID, obsname)
GCS.SPDF = SpatialPointsDataFrame(coords = popcent, data = Gcent.pop.df)
####################################################################################################
## Get vector of unique Observer ID's
oname<- as.data.frame(BEcomplete$recordedby)
urs<- unique(oname)
## Calculate Gcentroid coordinates of all Observations by Observer ID
i=0
for (i in 1:(nrow(urs)) ){
## get unique values
obsname = as.character(urs[i, ])
ptsub = subset(BEcomplete, BEcomplete$recordedby == obsname)
gcent = gCentroid(ptsub)
obsname = as.data.frame(obsname)
OID = as.character(i)
obsdata= cbind(OID, obsname)
new.spdf = SpatialPointsDataFrame(coords=gcent, data= obsdata)
GCS.SPDF = rbind(GCS.SPDF, new.spdf)
i=i+1
}
dr<- getwd()
writeOGR(GCS.SPDF, dr, "UserG_Centroid.new", driver = "ESRI Shapefile", overwrite_layer = TRUE)
####################################################################################################
## Now Measure distance between each observation and its respective user centroid (Local Knowledge)
i=0
for (i in 1: length(BEcomplete)){
thepoint <- BEcomplete[i, ]
onam <- as.character(BEcomplete@data$recordedby[i])
obid<- as.character(BEcomplete@data$catalognum[i])
##gcentroid point
gct<- subset(GCS.SPDF, obsname==onam)
dval<- spDistsN1(thepoint, gct, longlat = TRUE)
##dt= data.frame(obid, dval)
##oidgcent.df = rbind(oidgcent.df, dt)
## Append distance value to VGI test data data frame
BEcomplete@data$GcentDist[i]<- dval
## Let's also append a column containing the number of observations made by the User (including that observation)
## User Experience
count.table <- count(BEcomplete@data, 'recordedby')
count.freq <- count.table$freq[count.table$recordedby==onam]
BEcomplete@data$obs.count[i]<- count.freq
}
## Add column that contains GcentDist/ standard deviation of population Gcentdist
sd.pop.gcent <- sd(BEcomplete@data$GcentDist)
BEcomplete@data$Gcent.sd = BEcomplete@data$GcentDist/sd.pop.gcent
## save the updated spatial points data frame
dr<- getwd()
writeOGR(BEcomplete, dr, "BEcomplete.new", driver = "ESRI Shapefile", overwrite_layer = TRUE)
####################################################################################################
## Compare agains CPW reference Polygons (Species Activity Map)
## Will Run 2 Separate times for each indicator (Local Knowledge, and User Experience)
## User Experience
maxfreq <- max(BEcomplete@data$obs.count)
##empty df
##df=data.frame(i=numeric(), num_pts=numeric(),propo=numeric())
df.rows <- maxfreq
UE.df<- data.frame(matrix(0, ncol=3, nrow=df.rows))
colnames(df)<-c("Obs.Count", "NumberofPoints", "Proportion_w/in")
i=0
for (i in 1:maxfreq){
UE.sub<- subset(BEcomplete, obs.count>=i)
num.pts<- nrow(UE.sub)
vs.cpw<- !is.na(over(UE.sub, as(cpwBE, "SpatialPolygons")))
propo<-mean(vs.cpw)
## append data
UE.df[i,]= c(i, num.pts, propo)
##df<-rbind(df, c(i, num_pts, propo))
print(i)
i=i+1
colnames(UE.df)<-c("Observations per User", "NumberofPoints", "Proportion w/in Reference Data")
}
jpeg('UserExperienceScatter.jpeg')
plot(UE.df)
dev.off()
##############################
## Local Knowledge
## Now write comparison script
maxdist <- max(BEcomplete@data$GcentDist)+1
#empty df
df.rows <- maxdist
LK.df <- data.frame(matrix(0, ncol=3, nrow=df.rows))
colnames(LK.df)<-c("G.Centroid.Dist", "NumberofPoints", "Proportionw/in")
i=0
for (i in 1:maxdist){
GD.subset<- subset(BEcomplete, GcentDist<=i)
num_pts<- nrow(GD.subset)
win.cpw<- !is.na(over(GD.subset, as(cpwBE, "SpatialPolygons")))
proportion<-mean(win.cpw)
## try
LK.df[i,]= c(i, num_pts, proportion)
##df<-rbind(df, c(i, num_pts, propo))
print(i)
i=i+1
}
colnames(LK.df)<-c("CentroidDistance", "NumberofPoints", "Proportionw/in")
jpeg('LocalKnowledgeScatter.jpeg')
plot(LK.df)
dev.off()
##########################################################################################
## Create data frame based on User ID
## record standard deviation of point-centroid distance for each user
## Count number of unique users
obs.count<- count(BEcomplete@data, 'recordedby')
rown<- nrow(obs.count)
i=0
for (i in 1:rown){
u.oid= obs.count$recordedby[i]
char.oid= as.character(u.oid)
gcent.sub= subset(BEcomplete, BEcomplete@data$recordedby==char.oid)
gcent.mean <- mean(as.numeric(gcent.sub@data$GcentDist))
gcent.sd <- sd(as.numeric(gcent.sub@data$GcentDist), na.rm = TRUE)
obs.count$gcentmean[i] = gcent.mean
obs.count$gcent.sd[i] = gcent.sd
i=i+1
}
##########################################################################################
## Run Analysis with SD of each unique user's point-centroid distance
maxgcent<- max(obs.count$gcent.sd, na.rm = TRUE)+1
## create data frame to store values
USR.GC.df=data.frame(thresh=numeric(), numb_obsrvr=numeric(),proportion=numeric(), num_pts=numeric() )
i=0
for (i in 1:maxgcent){
## get observers with observation Standard Deviation of at least 'i'
sub.gcent<- subset(obs.count, gcent.sd<=i)
# Count number of observers with at least 'i' observations
obsnum<- nrow(sub.gcent)
## append to dataframe
## create list of observer ID's
obslist<- sub.gcent$recordedby
## convert to character format
obslist=as.character(obslist)
mysub<- BEcomplete[BEcomplete@data$recordedby %in% obslist, ]
## number of unique points
uqpts<- nrow(mysub)
in.cpw<- !is.na(over(mysub, as(cpwBE, "SpatialPolygons")))
prop<-mean(in.cpw)
USR.GC.df<-rbind(USR.GC.df, c(i, obsnum, prop, uqpts))
i=i+1
colnames(USR.GC.df)<-c("Sdev of Point-Centroid Dist", "Number of Users",
"Proportion w/in Reference Data", "Number of Observations")
}
save(USR.GC.df, file="USR.GC.df.Rda")
write.csv(USR.GC.df, file = "USR.GC.df.csv")
jpeg('UserID_Centroid_SdevScatter.jpeg')
plot(USR.GC.df)
dev.off()
##################################################################################
## Run example subset
UE.thresh <- 10
LK.thresh <-50
test.subset<- subset(BEcomplete, GcentDist<=LK.thresh & obs.count >=UE.thresh )
in.cpw<- !is.na(over(test.subset, as(cpwBE, "SpatialPolygons")))
test.prop<-mean(in.cpw)
nrow(test.subset)
print(test.prop)
## save subset of test data points
dr<- getwd()
writeOGR(test.subset, dr, "TestData.subset", driver = "ESRI Shapefile", overwrite_layer = TRUE)