-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathanalysis.Rnw
executable file
·248 lines (218 loc) · 12.3 KB
/
analysis.Rnw
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
%See MacArthur_Paper_Analysis.R for instructions on how to configure the
%Analysis supporting this script.
%tools>global options>sweave>weave rnw files using: knitr
%sudo apt-get install texlive
%sudo apt-get install texlive-latex-extra
%sudo apt-get install texlive-bibtex-extra
%sudo apt-get install texlive-fonts-recommended
\documentclass{article}
\usepackage{float}
\usepackage{wrapfig}
\usepackage{hyperref}
\usepackage{booktabs}
\usepackage{longtable}
\usepackage[backend=bibtex, style=nature, citestyle=authoryear]{biblatex}
\usepackage[table]{xcolor}
\bibliography{WBVFM_IntroPar}
\newenvironment{knitrout}{}{} %just a dummy environment
\makeatletter
\newcommand\gobblepars{%
\@ifnextchar\par%
{\expandafter\gobblepars\@gobble}%
{}}
\makeatother
\title{Descriptives: SEA and MacArthur}
\begin{document}
%\SweaveOpts{concordance=TRUE}
\begin{knitrout}
<<setup, echo=FALSE, include=FALSE>>=
Sys.setenv(TEXTINPUTS=getwd(),
BIBINPUTS=getwd(),
BSTINPUTS=getwd())
render_sweave()
#Make sure this is set correctly for builds.
rebuild_images = TRUE
source("MacArthur_Paper_Analysis.R")
@
\maketitle
<<Pre_warning, echo=FALSE>>=
if(restrict_analysis != FALSE)
{
wartxt = paste("Currently, only ",restrict_analysis," cells are being used in this analysis, due to computational limitations.", sep="")
}else
{
wartxt = "Draft Text."
}
@
\textbf{WARNING:} \Sexpr{wartxt}
\tableofcontents
\newpage
\section{Data}
<<Variable_table, results='asis', echo=FALSE, cache=FALSE>>=
varMatrix <- matrix(data=NA, nrow=length(names(vars)), ncol=2)
for (i in 1:length(names(vars)))
{
cur_var <- names(vars)[i]
short_name <-eval(parse(text= paste("vars$",cur_var,"$data_mini", sep="")))
varMatrix[i,] <- c(short_name, cur_var)
}
var_caption <- paste0("Variables used in this analysis.")
colnames(varMatrix) <- c("Short Name", "Long Name")
print(xtable(varMatrix, caption=var_caption, label="Variable_Table"),
size="footnotesize",
include.rownames=FALSE,
include.colnames=TRUE,
capiton.placement="bottom"
)
@
Variable names are defined in Table \ref{Variable_Table}.
\newpage
\section{Descriptives}
%Cell-level statistics
<<Cell_Stats, results='asis', echo=FALSE>>=
#selecting just variables of interest
dta3 <- dta2[23:297]
#Select only the mean temperature and precip, for now.
colnames(dta3)[2] <- "per_loss_e"
dta3 <- dta3[,grep('e$',names(dta3), invert=FALSE)]
#Process dataframe to summarize temporal variables into single summaries
collapse_temporal <- function(x, prefix)
{
eval(parse(text=paste("t_dta <- x[,grep('^",prefix,"',names(x), invert=TRUE)]",sep="")))
eval(parse(text=paste("stat_dta <- x[,grep('^",prefix,"',names(x), invert=FALSE)]",sep="")))
eval(parse(text=paste("t_dta$",prefix,"_mean <- rowMeans(stat_dta)", sep="")))
return(t_dta)
}
stargaze_dta <- collapse_temporal(dta3, "lnyx")
stargaze_dta <- collapse_temporal(stargaze_dta, "ncc4")
stargaze_dta <- collapse_temporal(stargaze_dta, "at41")
stargaze_dta <- collapse_temporal(stargaze_dta, "pc41")
ttl = paste("Cell level descriptive statistics (N=",length(dta2[[1]]),")",sep="")
stargazer(stargaze_dta, title=ttl, notes="Variables with 'mean' appendix are summaries of the yearly mean values.")
@
%ADM2-level statistics
<<ADM_Stats, results='asis', echo=FALSE>>=
#selecting just variables of interest
dta3 <- dta2[c(6,23:297)]
colnames(dta3)[1] <- "OBJECTIDe"
colnames(dta3)[3] <- "per_loss_e"
dta3 <- dta3[,grep('e$',names(dta3), invert=FALSE)]
dta3_adm <- aggregate(dta3, by=list(dta3$OBJECTID), FUN=mean, na.rm=TRUE)
#Process dataframe to summarize temporal variables into single summaries
collapse_temporal <- function(x, prefix)
{
eval(parse(text=paste("t_dta <- x[,grep('^",prefix,"',names(x), invert=TRUE)]",sep="")))
eval(parse(text=paste("stat_dta <- x[,grep('^",prefix,"',names(x), invert=FALSE)]",sep="")))
eval(parse(text=paste("t_dta$",prefix,"_mean <- rowMeans(stat_dta)", sep="")))
return(t_dta)
}
stargaze_dta <- collapse_temporal(dta3_adm, "lnyx")
stargaze_dta <- collapse_temporal(stargaze_dta, "ncc4")
stargaze_dta <- collapse_temporal(stargaze_dta, "at41")
stargaze_dta <- collapse_temporal(stargaze_dta, "pc41")
stargaze_dta2 <- stargaze_dta[-c(1,2)]
ttl = paste("ADM2 level descriptive statistics (N=",length(unique(dta2$OBJECTID)),")",sep="")
stargazer(stargaze_dta2, title=ttl, notes=c("Variables with 'mean' appendix are summaries of the yearly mean values.", "All variables represent the mean cell value within a ADM 2."))
@
\begin{figure}[H]
\caption{Study area (ADM2) overlayed with Chinese development finance project locations.}
\label{LTDR_Hansen}
\centering
<<StudyAreaMap, echo=FALSE, fig="TRUE", out.width="0.8\\linewidth">>=
plot(spdf_adm)
points(Mac_spdf, col="red")
@
\end{figure}
\begin{figure}[H]
\caption{Relationship between LTDR 2000 NDVI and Hansen \% Forest Cover 2000, of LTDR cells with at least \Sexpr{forest_thresh}\% forest cover according to Hansen 2000.}
\label{LTDR_Hansen}
\centering
<<LTDR_NDVI, echo=FALSE, fig="TRUE", out.width="0.8\\linewidth">>=
plot(ndviDTA_for)
@
\end{figure}
The distribution of NDVI values for the remaining LTDR cells can be seen in figure \ref{LTDR_Hansen}.
As expected, we see a strong correlation - areas with higher levels of NDVI also tend to have higher levels of forest cover as estimated by Hansen.
Because LTDR and Hansen are not independent products (Hansen leverages the same input as LTDR in the product production), this analysis does not suggest that one product can be used to independently verify the other.
Rather, here we illustrate that historic LTDR trends can be relevant for establishing baselines when Hansen outcome variables are considered.
\begin{figure}[H]
\caption{Histogram of LTDR NDVI Values in (a) locations with >\Sexpr{forest_thresh}\% forest cover in Hansen 2000, and (b) locations with <\Sexpr{forest_thresh}\% forest cover in Hansen 2000.}
\label{NDVI_Hansen_hist}
\centering
<<LTDR_NDVI_hist, echo=FALSE, fig="TRUE", out.width="0.8\\linewidth">>=
par(mfrow=c(2,1))
ttl = paste("Histogram of NDVI values in LTDR 5km w/ >",forest_thresh,"% Forest Cover", sep="")
hist(ndviDTA_for$lnyx_2000e, main=ttl, col="green", xlim=c(0,10000))
ttl2 = paste("Histogram of NDVI values in LTDR 5km w/ <",forest_thresh,"% Forest Cover", sep="")
hist(ndviDTA_notfor$lnyx_2000e, main=ttl2, col="red", xlim=c(0,10000))
@
\end{figure}
\section{Methods}
From each cell defined as forest (cells with > \Sexpr{forest_thresh}\% forest cover according to Hansen 2000), the euclidean distance (Haversine) to each chinese investment site (N = \Sexpr{length(locations2[[1]])}) is calculated and recorded.
On average, the minimum distance between a unit of observation (the cell) and each chinese investment site is \Sexpr{round(minDistKm,0)}km, with an average distance of \Sexpr{round(avgDistKm,0)}km.
\par
To establish the degree of impact Chinese investment had on each grid cell, a weighted distance-decay function is approximated.
The spatial autocorrelation of forest tree cover is approximated using the 1999 LTDR dataset to avoid any potential confounds with treatments that start circa 2000.
By examining the spatial autocorrelation of forest tree cover, we hope to establish the maximum distance at which spillover effects might feasibly be observed (as the spatial pattern of tree cover in 1999 is the produce of all preceeding impacts).
Spatial autocorrelation is examined over 50 kilometer steps - for example, all cells within 50km are contrasted to one another, and the correlation of forest cover is recorded.
This is then repeated in bands, i.e., 50-100km; 100-150km and so forth until the 1000km limit is reached.
At each distance band, a summary measure of spatial autocorrelation - Morans I - is calculated, following:
\begin{equation}
I_h = (\frac{N}{\sum_{i}^{N}\sum_{j}^{N}w_{ij}}) * ((\sum_{i}^{N}\sum_{j}^{N}w_{ij} * (X_{i}-\bar{x}) * \frac{X_{j} - \bar{x}}{\sum_{i}^{N}(X_{i}-\bar{x})})^{2})
\end{equation}
where \textit{h} represents each spatial bin, \textit{N} the number of spatial units, \textit{i} and \textit{j} are indexes for each unit, \textit{X} is the variable of itnerest, and \begin{math}W_{ij}\end{math} represents the weights matrix.
In this application, the weights matrix is specified according to the bin (\textit{h}) being analyzed.
For example, if a chinese project is 100 kilometers from a cell of interest, the relevant spatial correlation estimated in this function is used to weight that project.
\\
\begin{figure}[H]
\caption{Distance decay of spatial autocorrelation observed in 1999 Forest Cover, as measured using the LTDR dataset.}
\label{LTDR_1999_Decay}
\centering
<<LTDR_1999_Decay, echo=FALSE, fig="TRUE", out.width="0.8\\linewidth">>=
plot(correlogram_data)
@
\end{figure}
As this correlogram illustrates, a lack of positive spatial autocorrelation among LTDR measurements in 1999 is first observed at approximately \Sexpr{round(as.numeric(correlogram_data$x.intercept))} km.
We use this distance as a threshold to screen nearby Chinese aid projects - i.e., the average and sum distance of all Chinese aid projects within \Sexpr{round(as.numeric(correlogram_data$x.intercept))} km is taken for each cell, and projects beyond that distance are not included.
These two values - the average and summed distance of Chinese aid projects - are used as our approximation of the strength of Chinese interventions within any given cell on the landscape.
\par
\begin{figure}[H]
\caption{Potential measurements of treatment indicator.}
\label{Proj_Avg_Dist}
\centering
<<Treatment_Map, echo=FALSE, cache=FALSE>>=
mapAvg = spplot(AOI_cells, "thresh_avgDist", col="transparent", main=list(label=paste("Average Distance \n (within ",round(correlogram_data$x.intercept)," km) \n to Chinese projects", sep=""),cex=0.75), sub="Threshold Approach", sp.layout=list(Mac_spdf, pch=16, col="red", cex=0.5))
mapCnt = spplot(AOI_cells, "thresh_tot_proj", col="transparent", main=list(label=paste("Total Nearby \n (within ",round(correlogram_data$x.intercept)," km) \n Chinese projects", sep=""),cex=0.75), sub="Threshold Approach", sp.layout=list(Mac_spdf, pch=16, col="red", cex=0.5))
mapTotDist = spplot(AOI_cells, "thresh_totDist", col="transparent", main=list(label=paste("Total Distance of Nearby \n (within ",round(correlogram_data$x.intercept)," km) \n Chinese projects", sep=""),cex=0.75), sub="Threshold Approach", sp.layout=list(Mac_spdf, pch=16, col="red", cex=0.5))
mapWeightedDist = spplot(AOI_cells, "thresh_weightedDist", col="transparent", main=list(label="Weighted Distance \n of \n Chinese projects",cex=0.75), sub="Distance Decay Approach", sp.layout=list(Mac_spdf, pch=16, col="red", cex=0.5))
if(rebuild_images == TRUE)
{
png(filename="mapFigs.png", width=7, height=5, units="in", res=300)
grid.arrange(mapAvg, mapCnt, mapTotDist,mapWeightedDist, nrow=2, ncol=2, heights=c(unit(2.5, "inches"),unit(2.5, "inches")),widths=c(unit(3.5, "inches"),unit(3.5, "inches")))
}
@
\includegraphics{mapFigs}
\end{figure}
\begin{figure}[H]
\caption{Potential measurements of treatment indicator.}
\label{Temporal_Measurements}
\centering
<<OverTime_Treatment_Fig, echo=FALSE, fig="TRUE", out.width="0.8\\linewidth">>=
par(mfrow=c(2,2))
plot(TempDF$all_years, TempDF$Dist_Decay_Yrs, type="b", lty=2, xlab="Year", ylab="Avg. Dist. Weighted Est. Impact of Chinese Aid")
plot(TempDF$all_years, TempDF$AvgYears, type="b", lty=2, xlab="Year", ylab="Avg. Dist. to New Chinese Aid Proj.")
plot(TempDF$all_years, TempDF$CountProj_Years, type="b", lty=2, xlab="Year", ylab="Count of New Chinese Aid Proj.")
plot(TempDF$all_years, TempDF$Avg_MinYears, type="b", lty=2, xlab="Year", ylab="Avg. Dist. to Closest New Chinese Aid Proj.")
@
\end{figure}
Figure \ref{Proj_Avg_Dist} shows an example of the treatment variables examined in this analysis, averaged across the full time period, while \ref{Temporal_Measurements} shows the overall fluctuations of these measurements over time, across all grid cells being examined.
Functional Form of Analysis:
\begin{equation}
Hansen_{it} = \alpha + \theta * WeightedDistanceActiveChineseProject_{it} + \sum(\beta_{j}*X_{j}) + D_{region} + D_{t} + D_{region} * t + \epsilon_{it}
%\frac{1}{(2\pi)^{1/2}*\sigma_{i}} * exp(-\frac{1}{2}*\frac{(x-\mu_{i})^{2}}{\sigma_{i}^{2}})
\end{equation}
<<Model_Results, results='asis', echo=FALSE>>=
stargazer(CMREG, title="Initial Results", keep=c("DecayYr","Forest_Loss", "NighttimeLights","MinTemp","MaxTemp","MeanTemp", "MinPrecip", "MaxPrecip", "MeanPrecip"))
@
\end{knitrout}
\end{document}