forked from seb369/landuse_comm_assembly
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbNTI_soil_properties.Rmd
412 lines (318 loc) · 15 KB
/
bNTI_soil_properties.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
---
title: "Comparing βNTI and soil properties across the whole region"
author: "Samuel Barnett"
date: "`r format(Sys.time(), '%d %B, %Y')`"
output:
github_document:
toc: true
toc_depth: 2
html_preview: false
---
## Introduction
In this notebook, we will use the βNTI values calculated in bNTI_calculations.Rmd and see if they correlate with difference is various soil properties across all 30 soil samples. A correlation would indicate that the property is a driver of deterministic community assembly across the region.
This analysis uses the same (-2, 2) range for significance testing as in Stegen et al. 2012. This means that the following conclusison can be drawn from this data:
βNTI > 2: Community assembly driven by variable selection
βNTI < -2: Community assembly driven by homgenizing selection
|βNTI| < 2: Community assembly is stochastic
### Initiate libraries
```{r, message=FALSE, warning=FALSE}
# Packages needed for analysis
library(dplyr)
library(tidyr)
library(tibble)
library(phyloseq)
library(geosphere)
# Packages needed for plotting
library(ggplot2)
```
### Import data
```{r, message=FALSE, warning=FALSE}
# Import bulk soil phyloseq data
bulk.physeq = readRDS("/home/sam/data/fullCyc2_data/bulk_soil_physeq.RDS")
## Check how many reads you have in each of the samples. This will tell you if you need to re-do anything
# Get read counts and make a new dataframe with this data
read_count = data.frame("count" = colSums(otu_table(bulk.physeq))) %>%
rownames_to_column(var="X.Sample") %>%
inner_join(data.frame(sample_data(bulk.physeq)), by="X.Sample") %>%
arrange(-count) %>%
mutate(X.Sample=factor(X.Sample, levels=X.Sample))
# Now plot read count for each sample. The horizontal line represents a 2000 read threshold
ggplot(data=read_count, aes(x=X.Sample, y=log10(count), fill=ecosystem)) +
geom_bar(stat="identity") +
labs(x="Sample", y="Log10(Read count)") +
geom_hline(yintercept=log10(10000)) +
theme(text = element_text(size=16),
axis.text.x = element_blank())
# Everything seems to be at or above 10000 total reads
bulk.physeq
```
Now we need to rarefy the data to normalize the sequencing depth. We should also get a normalized dataset which gives relative abundance rather than readcounts.
```{r, message=FALSE, warning=FALSE}
# Rarefy to an even depth
set.seed(72) # setting seed for reproducibility
bulk.physeq.rare = rarefy_even_depth(bulk.physeq)
# Normalize read counts (this gives relative abundance)
bulk.physeq.norm = transform_sample_counts(bulk.physeq.rare, function(x) x/sum(x))
```
Now import the βNTI data generated in bNTI_calculation.Rmd
```{r, message=FALSE, warning=FALSE}
# Import data
full.bNTI.df = read.table("/home/sam/data/fullCyc2_data/Final_data/community_assembly/full_bNTI.txt")
```
## βNTI across soil parameters.
Lets see if there is a correlation between βNTI and some of the soil parameters measured.
First we need a function for running the mantel test. This just makes things easier later with less code.
```{r, message=FALSE, warning=FALSE}
Sams.mantel.test = function(df, seed=NULL) {
# Run mantel test to see if there is a correlation
delta.mat = df %>%
select(Sample_1, Sample_2, delta) %>%
spread(Sample_2, delta)
rownames(delta.mat) = delta.mat$Sample_1
delta.mat$Sample_1 = NULL
delta.mat = delta.mat[names(sort(rowSums(!is.na(delta.mat)), decreasing = F)), names(sort(colSums(!is.na(delta.mat)), decreasing = T))]
delta.mat = as.dist(delta.mat)
bNTI.mat = df %>%
select(Sample_1, Sample_2, bNTI) %>%
spread(Sample_2, bNTI)
rownames(bNTI.mat) = bNTI.mat$Sample_1
bNTI.mat$Sample_1 = NULL
bNTI.mat = bNTI.mat[names(sort(rowSums(!is.na(bNTI.mat)), decreasing = F)), names(sort(colSums(!is.na(bNTI.mat)), decreasing = T))]
bNTI.mat = as.dist(bNTI.mat)
if (!(is.null(seed))){
set.seed(seed)
}
mantel.res = vegan::mantel(delta.mat, bNTI.mat)
return(mantel.res)
}
```
I also want to add the ecosystem type to the βNTI dataframe so that I can see whether we are comparing between the same or different land use strategies.
```{r, message=FALSE, warning=FALSE}
# Land use shapes
LandUse.shapes = c("agriculture" = 15, "meadow" = 16, "forest" = 17, "across" = 5)
# get habitat metadata and add it to the βNTI data
eco.meta1=data.frame(sample_data(bulk.physeq.rare)) %>%
select(X.Sample, ecosystem) %>%
rename(Sample_1 = X.Sample, ecosystem_1 = ecosystem)
eco.meta2=data.frame(sample_data(bulk.physeq.rare)) %>%
select(X.Sample, ecosystem) %>%
rename(Sample_2 = X.Sample, ecosystem_2 = ecosystem)
full.eco.bNTI.df = inner_join(full.bNTI.df, eco.meta1) %>%
inner_join(eco.meta2)
```
### pH
First lets see if βNTI is correlated with the difference in pH between sites.
```{r, message=FALSE, warning=FALSE, fig.height=5, fig.width=5}
# Add in pH metadata
pH.meta1=data.frame(sample_data(bulk.physeq.rare)) %>%
select(X.Sample, pH) %>%
rename(Sample_1 = X.Sample, pH_1 = pH)
pH.meta2=data.frame(sample_data(bulk.physeq.rare)) %>%
select(X.Sample, pH) %>%
rename(Sample_2 = X.Sample, pH_2 = pH)
full.bNTI.pH.df = inner_join(full.eco.bNTI.df, pH.meta1) %>%
inner_join(pH.meta2) %>%
mutate(delta = abs(pH_1-pH_2),
crosstype = ifelse(ecosystem_1 == ecosystem_2, as.character(ecosystem_1), "across"))
# Run mantel test to see if there is a correlation
pH.mantel = Sams.mantel.test(full.bNTI.pH.df, seed=72)
# Plot
full.bNTI.pH.plot = ggplot(full.bNTI.pH.df, aes(x=delta, y=bNTI)) +
geom_point(aes(shape=crosstype)) +
scale_shape_manual(values=LandUse.shapes) +
geom_hline(yintercept = 2, linetype=2) +
geom_hline(yintercept = -2, linetype=2) +
annotate("text", x=3.25, y=12.5, label=paste("r= ", round(pH.mantel$statistic, 3), "\n", "p= ", round(pH.mantel$signif, 3), sep="")) +
labs(x="∆ pH", y="βNTI") +
theme(legend.position = "none")
full.bNTI.pH.plot
```
There is a significant positive relationship between βNTI and difference in pH between sites. For the most part, sites with very similar pHs have a βNTI < -2 suggesting homogeneous selection pressure, while sites with very dissimilar pHs have a βNTI > 2 suggesting varaible selection.
### Soil organic matter
Now lets see if βNTI is correlated with the difference in soil organic matter (SOM) between sites.
```{r, message=FALSE, warning=FALSE, fig.height=5, fig.width=5}
# Add in SOM metadata
SOC.meta1=data.frame(sample_data(bulk.physeq.rare)) %>%
select(X.Sample, organic_content_perc) %>%
rename(Sample_1 = X.Sample, SOC_1 = organic_content_perc)
SOC.meta2=data.frame(sample_data(bulk.physeq.rare)) %>%
select(X.Sample, organic_content_perc) %>%
rename(Sample_2 = X.Sample, SOC_2 = organic_content_perc)
full.bNTI.SOC.df = inner_join(full.eco.bNTI.df, SOC.meta1) %>%
inner_join(SOC.meta2) %>%
mutate(delta = abs(SOC_1-SOC_2),
crosstype = ifelse(ecosystem_1 == ecosystem_2, as.character(ecosystem_1), "across"))
# Run mantel test to see if there is a correlation
SOC.mantel = Sams.mantel.test(full.bNTI.SOC.df, seed=72)
# Plot
full.bNTI.SOC.plot = ggplot(full.bNTI.SOC.df, aes(x=delta, y=bNTI)) +
geom_point(aes(shape=crosstype)) +
scale_shape_manual(values=LandUse.shapes) +
geom_hline(yintercept = 2, linetype=2) +
geom_hline(yintercept = -2, linetype=2) +
annotate("text", x=0.1375, y=12.5, label=paste("r= ", round(SOC.mantel$statistic, 3), "\n", "p= ", round(SOC.mantel$signif, 3), sep="")) +
labs(x="∆ %SOC", y="βNTI") +
theme(legend.position = "none")
full.bNTI.SOC.plot
```
### C:N ratio
Now lets see if βNTI is correlated with the difference in C:N ratio between sites.
```{r, message=FALSE, warning=FALSE, fig.height=5, fig.width=5}
# Add in C:N metadata
CN.meta1=data.frame(sample_data(bulk.physeq.rare)) %>%
select(X.Sample, percent_N, percent_C) %>%
rename(Sample_1 = X.Sample) %>%
mutate(CN_1 = percent_C/percent_N) %>%
select(-percent_N, -percent_C)
CN.meta2=data.frame(sample_data(bulk.physeq.rare)) %>%
select(X.Sample, percent_N, percent_C) %>%
rename(Sample_2 = X.Sample) %>%
mutate(CN_2 = percent_C/percent_N) %>%
select(-percent_N, -percent_C)
full.bNTI.CN.df = inner_join(full.eco.bNTI.df, CN.meta1) %>%
inner_join(CN.meta2) %>%
mutate(delta = abs(CN_1-CN_2),
crosstype = ifelse(ecosystem_1 == ecosystem_2, as.character(ecosystem_1), "across"))
# Run mantel test to see if there is a correlation
CN.mantel = Sams.mantel.test(full.bNTI.CN.df, seed=72)
# Plot
full.bNTI.CN.plot = ggplot(full.bNTI.CN.df, aes(x=delta, y=bNTI)) +
geom_point(aes(shape=crosstype)) +
scale_shape_manual(values=LandUse.shapes) +
geom_hline(yintercept = 2, linetype=2) +
geom_hline(yintercept = -2, linetype=2) +
annotate("text", x=9.3, y=12.5, label=paste("r= ", round(CN.mantel$statistic, 3), "\n", "p= ", round(CN.mantel$signif, 3), sep="")) +
labs(x="∆ C:N", y="βNTI") +
theme(legend.position = "none")
full.bNTI.CN.plot
```
There is a significant relationship between βNTI and delta C:N ratio.
### Percent sand
Now lets see if βNTI is correlated with the difference in percent sand between sites.
```{r, message=FALSE, warning=FALSE, fig.height=5, fig.width=5}
# Add in % sand metadata
sand.meta1=data.frame(sample_data(bulk.physeq.rare)) %>%
select(X.Sample, sand__perc) %>%
rename(Sample_1 = X.Sample, Sand1 = sand__perc)
sand.meta2=data.frame(sample_data(bulk.physeq.rare)) %>%
select(X.Sample, sand__perc) %>%
rename(Sample_2 = X.Sample, Sand2 = sand__perc)
full.bNTI.sand.df = inner_join(full.eco.bNTI.df, sand.meta1) %>%
inner_join(sand.meta2) %>%
mutate(delta = abs(Sand1-Sand2),
crosstype = ifelse(ecosystem_1 == ecosystem_2, as.character(ecosystem_1), "across"))
# Run mantel test to see if there is a correlation
sand.mantel = Sams.mantel.test(full.bNTI.sand.df, seed=72)
# Plot
full.bNTI.sand.plot = ggplot(full.bNTI.sand.df, aes(x=delta, y=bNTI)) +
geom_point(aes(shape=crosstype)) +
scale_shape_manual(values=LandUse.shapes) +
geom_hline(yintercept = 2, linetype=2) +
geom_hline(yintercept = -2, linetype=2) +
annotate("text", x=65, y=12.5, label=paste("r= ", round(sand.mantel$statistic, 3), "\n", "p= ", round(sand.mantel$signif, 3), sep="")) +
labs(x="∆ sand", y="βNTI") +
theme(legend.position = "none")
full.bNTI.sand.plot
```
There is no signficiant correlaton between βNTI and delta percent sand
### Geographic distance
Now lets see if βNTI is correlated with the geographic distance between sites.
```{r, message=FALSE, warning=FALSE, fig.height=5, fig.width=5}
# Get geographic distances between sites
geodist.meta1 = data.frame(sample_data(bulk.physeq.rare)) %>%
select(X.Sample, longitude, latitude) %>%
rename(Sample_1 = X.Sample, longitude1 = longitude, latitude1 = latitude)
geodist.meta2 = data.frame(sample_data(bulk.physeq.rare)) %>%
select(X.Sample, longitude, latitude) %>%
rename(Sample_2 = X.Sample, longitude2 = longitude, latitude2 = latitude)
full.bNTI.geodist.df = inner_join(full.eco.bNTI.df, geodist.meta1) %>%
inner_join(geodist.meta2) %>%
mutate(crosstype = ifelse(ecosystem_1 == ecosystem_2, as.character(ecosystem_1), "across"))
Coord1.mat = as.matrix(full.bNTI.geodist.df %>% select(longitude1, latitude1))
Coord2.mat = as.matrix(full.bNTI.geodist.df %>% select(longitude2, latitude2))
full.bNTI.geodist.df$delta = distHaversine(Coord1.mat, Coord2.mat)/1000
# Run mantel test to see if there is a correlation
geodist.mantel = Sams.mantel.test(full.bNTI.geodist.df, seed=72)
# Plot
full.bNTI.geodist.plot = ggplot(full.bNTI.geodist.df, aes(x=delta, y=bNTI)) +
geom_point(aes(shape=crosstype)) +
scale_shape_manual(values=LandUse.shapes) +
geom_hline(yintercept = 2, linetype=2) +
geom_hline(yintercept = -2, linetype=2) +
annotate("text", x=42.5, y=12.5, label=paste("r= ", round(geodist.mantel$statistic, 3), "\n", "p= ", round(geodist.mantel$signif, 3), sep="")) +
labs(x="Geographic distance (km)", y="βNTI") +
theme(legend.position = "none")
full.bNTI.geodist.plot
```
There is a slight but significant correlation between βNTI and geographic distance when including all pairwise site combinations.
### Plot all together for publication
```{r, message=FALSE, warning=FALSE, fig.height=6.61417, fig.width=6.61417}
# pH
full.bNTI.pH.plot = ggplot(full.bNTI.pH.df, aes(x=delta, y=bNTI)) +
geom_point(aes(shape=crosstype), size=3) +
scale_shape_manual(values=LandUse.shapes) +
geom_hline(yintercept = 2, linetype=2) +
geom_hline(yintercept = -2, linetype=2) +
annotate("text", x=3.5*.9, y=12.5, size=4,
label=paste("r = ", round(pH.mantel$statistic, 3), "\n", "p = ", round(pH.mantel$signif, 3), sep="")) +
labs(x="∆ pH", y="βNTI") +
lims(x=c(0, 3.5), y=c(-10, 14)) +
theme_bw() +
theme(legend.position = "none",
axis.text = element_text(size=12),
axis.title= element_text(size=14),
plot.margin = unit(c(7,1,1,5), "mm"))
# SOM
full.bNTI.SOC.plot = ggplot(full.bNTI.SOC.df, aes(x=delta, y=bNTI)) +
geom_point(aes(shape=crosstype), size=3) +
scale_shape_manual(values=LandUse.shapes) +
geom_hline(yintercept = 2, linetype=2) +
geom_hline(yintercept = -2, linetype=2) +
annotate("text", x=0.165*.9, y=12.5, size=4,
label=paste("r = ", round(SOC.mantel$statistic, 3), "\n", "p = ", round(SOC.mantel$signif, 3), sep="")) +
labs(x="∆ SOM", y="βNTI") +
lims(x=c(0, 0.165), y=c(-10, 14)) +
theme_bw() +
theme(legend.position = "none",
axis.text = element_text(size=12),
axis.title= element_text(size=14),
plot.margin = unit(c(7,1,1,5), "mm"))
# C:N ratio
full.bNTI.CN.plot = ggplot(full.bNTI.CN.df, aes(x=delta, y=bNTI)) +
geom_point(aes(shape=crosstype), size=3) +
scale_shape_manual(values=LandUse.shapes) +
geom_hline(yintercept = 2, linetype=2) +
geom_hline(yintercept = -2, linetype=2) +
annotate("text", x=10.5*.9, y=12.5, size=4,
label=paste("r = ", round(CN.mantel$statistic, 3), "\n", "p = ", round(CN.mantel$signif, 3), sep="")) +
labs(x="∆ C:N", y="βNTI") +
lims(x=c(0, 10.5), y=c(-10, 14)) +
theme_bw() +
theme(legend.position = "none",
axis.text = element_text(size=12),
axis.title= element_text(size=14),
plot.margin = unit(c(7,1,1,5), "mm"))
# Geographic distance
full.bNTI.geodist.plot = ggplot(full.bNTI.geodist.df, aes(x=delta, y=bNTI)) +
geom_point(aes(shape=crosstype), size=3) +
scale_shape_manual(values=LandUse.shapes) +
geom_hline(yintercept = 2, linetype=2) +
geom_hline(yintercept = -2, linetype=2) +
annotate("text", x=49*.9, y=12.5, size=4,
label=paste("r = ", round(geodist.mantel$statistic, 3), "\n", "p = ", round(geodist.mantel$signif, 3), sep="")) +
labs(x="Geographic distance (km)", y="βNTI") +
lims(x=c(0, 49), y=c(-10, 14)) +
theme_bw() +
theme(legend.position = "none",
axis.text = element_text(size=12),
axis.title= element_text(size=14),
plot.margin = unit(c(7,1,1,5), "mm"))
bNTI.chem.plot = cowplot::plot_grid(full.bNTI.pH.plot, full.bNTI.SOC.plot, full.bNTI.CN.plot, full.bNTI.geodist.plot, labels=c("A", "B", "C", "D"))
#ggsave("bNTI_chem.tiff", plot=bNTI.chem.plot, device="tiff",
# path="/home/sam/notebooks/fullCyc2/figures/community_assembly_MS/",
# width=168, height=168, units="mm", dpi=600)
bNTI.chem.plot
```
```{r}
sessionInfo()
```