-
Notifications
You must be signed in to change notification settings - Fork 46
main_pipeline
wentao edited this page Mar 10, 2022
·
1 revision
title: "main_pipeline" author: "wentao" date: "2020/3/22" output: html_document editor_options: chunk_output_type: console
pre code,pre,code {
white-space:pre!important;
overflow-x: scroll!important;
}
knitr::opts_chunk$set(echo = TRUE,
fig.width = 7,
fig.height = 5,
fig.align = "center",
warning = FALSE,
message = FALSE
)
2021年12月25日,更新ggClusterNet包文档,这是第一篇,主要包括网络分析及其相关分析。
#--导入所需R包#-------
library(phyloseq)
library(igraph)
library(network)
library(sna)
library(tidyverse)
library(ggClusterNet)
#-----导入数据#-------
data(ps)
按照丰度过滤微生物表格,并却计算相关矩阵,按照指定的阈值挑选矩阵中展示的数值。
这里我们在早期调用了psych包中的corr.test函数,使用三种相关方法,但是这三种是远远不够的,所以后续我们准备将x剁相关都加入其中。
#-提取丰度最高的指定数量的otu进行构建网络
#----------计算相关#----
result = corMicro (ps = ps,
N = 150,
method.scale = "TMM",
r.threshold=0.8,
p.threshold=0.05,
method = "spearman"
)
#--提取相关矩阵
cor = result[[1]]
head(cor)
#-网络中包含的OTU的phyloseq文件提取
ps_net = result[[3]]
#-导出otu表格
otu_table = ps_net %>%
vegan_otu() %>%
t() %>%
as.data.frame()
这是网络布局的基础,无论是什么聚类布局,都需要制作一个分组文件,这个文件有两列,一列是节点,一列是分组信息,这个分组信息名称为:group。 这个文件信息就是用于对节点进行分组,然后按照分组对节点归类,使用包中可视化函数计算节点位置。
注意分组文件的格式,分为两列,第一列是网络中包含的OTU的名字,第二列是分组信息,同样的分组标记同样的字符。
#--人工构造分组信息:将网络中全部OTU分为五个部分,等分
netClu = data.frame(ID = row.names(otu_table),group =rep(1:5,length(row.names(otu_table)))[1:length(row.names(otu_table))] )
netClu$group = as.factor(netClu$group)
head(netClu)
不同的模块按照分组聚集成不同的圆,并且圆形的大小一样。如果一个分组只有一个点,则这个点坐落在圆心位置。
#--------计算布局#---------
result2 = PolygonClusterG (cor = cor,nodeGroup =netClu )
node = result2[[1]]
head(node)
nodeadd函数只是提供了简单的用注释函数,用户可以自己在node的表格后面添加各种注释信息。
tax_table = ps_net %>%
vegan_tax() %>%
as.data.frame()
#---node节点注释#-----------
nodes = nodeadd(plotcord =node,otu_table = otu_table,tax_table = tax_table)
head(nodes)
#-----计算边#--------
edge = edgeBuild(cor = cor,node = node)
head(edge)
p <- ggplot() + geom_segment(aes(x = X1, y = Y1, xend = X2, yend = Y2,color = as.factor(wei_label)),
data = edge, size = 0.5) +
geom_point(aes(X1, X2,fill = Phylum,size = mean),pch = 21, data = nodes) +
scale_colour_brewer(palette = "Set1") +
scale_x_continuous(breaks = NULL) + scale_y_continuous(breaks = NULL) +
theme(panel.background = element_blank()) +
theme(axis.title.x = element_blank(), axis.title.y = element_blank()) +
theme(legend.background = element_rect(colour = NA)) +
theme(panel.background = element_rect(fill = "white", colour = NA)) +
theme(panel.grid.minor = element_blank(), panel.grid.major = element_blank())
p
这是网络布局的基础,无论是什么聚类布局,都需要制作一个分组文件,这个文件有两列,一列是节点,一列是分组信息,这个分组信息名称必须设定为:group。
netClu = data.frame(ID = row.names(tax_table),group =rep(1:3,length(row.names(tax_table)))[1:length(row.names(tax_table))] )
netClu$group = as.factor(netClu$group)
result2 = PolygonClusterG (cor = cor,nodeGroup =netClu )
node = result2[[1]]
# ---node节点注释#-----------
nodes = nodeadd(plotcord =node,otu_table = otu_table,tax_table = tax_table)
#-----计算边#--------
edge = edgeBuild(cor = cor,node = node)
### 出图
pnet <- ggplot() + geom_segment(aes(x = X1, y = Y1, xend = X2, yend = Y2,color = as.factor(wei_label)),
data = edge, size = 0.5) +
geom_point(aes(X1, X2,fill = Phylum,size = mean),pch = 21, data = nodes) +
scale_colour_brewer(palette = "Set1") +
scale_x_continuous(breaks = NULL) + scale_y_continuous(breaks = NULL) +
# labs( title = paste(layout,"network",sep = "_"))+
# geom_text_repel(aes(X1, X2,label=Phylum),size=4, data = plotcord)+
# discard default grid + titles in ggplot2
theme(panel.background = element_blank()) +
# theme(legend.position = "none") +
theme(axis.title.x = element_blank(), axis.title.y = element_blank()) +
theme(legend.background = element_rect(colour = NA)) +
theme(panel.background = element_rect(fill = "white", colour = NA)) +
theme(panel.grid.minor = element_blank(), panel.grid.major = element_blank())
pnet
netClu = data.frame(ID = row.names(cor),group =rep(1:8,length(row.names(cor)))[1:length(row.names(cor))] )
netClu$group = as.factor(netClu$group)
result2 = PolygonClusterG (cor = cor,nodeGroup =netClu )
node = result2[[1]]
# ---node节点注释#-----------
nodes = nodeadd(plotcord =node,otu_table = otu_table,tax_table = tax_table)
#-----计算边#--------
edge = edgeBuild(cor = cor,node = node)
### 出图
pnet <- ggplot() + geom_segment(aes(x = X1, y = Y1, xend = X2, yend = Y2,color = as.factor(wei_label)),
data = edge, size = 0.5) +
geom_point(aes(X1, X2,fill = Phylum,size = mean),pch = 21, data = nodes) +
scale_colour_brewer(palette = "Set1") +
scale_x_continuous(breaks = NULL) + scale_y_continuous(breaks = NULL) +
# labs( title = paste(layout,"network",sep = "_"))+
# geom_text_repel(aes(X1, X2,label=Phylum),size=4, data = plotcord)+
# discard default grid + titles in ggplot2
theme(panel.background = element_blank()) +
# theme(legend.position = "none") +
theme(axis.title.x = element_blank(), axis.title.y = element_blank()) +
theme(legend.background = element_rect(colour = NA)) +
theme(panel.background = element_rect(fill = "white", colour = NA)) +
theme(panel.grid.minor = element_blank(), panel.grid.major = element_blank())
pnet
#----------计算相关#----
result = corMicro (ps = ps,
N = 200,
method.scale = "TMM",
r.threshold=0.8,
p.threshold=0.05,
method = "spearman"
)
#--提取相关矩阵
cor = result[[1]]
head(cor)
#-网络中包含的OTU的phyloseq文件提取
ps_net = result[[3]]
#-导出otu表格
otu_table = ps_net %>%
vegan_otu() %>%
t() %>%
as.data.frame()
tax = ps_net %>% vegan_tax() %>%
as.data.frame()
tax$filed = tax$Phylum
group2 <- data.frame(ID = row.names(tax),group = tax$Phylum)
group2$group =as.factor(group2$group)
result2 = PolygonClusterG (cor = cor,nodeGroup =group2 )
node = result2[[1]]
# ---node节点注释#-----------
nodes = nodeadd(plotcord =node,otu_table = otu_table,tax_table = tax_table)
#-----计算边#--------
edge = edgeBuild(cor = cor,node = node)
### 出图
pnet <- ggplot() + geom_segment(aes(x = X1, y = Y1, xend = X2, yend = Y2,color = as.factor(wei_label)),
data = edge, size = 0.5) +
geom_point(aes(X1, X2,fill = Phylum,size = mean),pch = 21, data = nodes) +
scale_colour_brewer(palette = "Set1") +
scale_x_continuous(breaks = NULL) + scale_y_continuous(breaks = NULL) +
# labs( title = paste(layout,"network",sep = "_"))+
# geom_text_repel(aes(X1, X2,label=Phylum),size=4, data = plotcord)+
# discard default grid + titles in ggplot2
theme(panel.background = element_blank()) +
# theme(legend.position = "none") +
theme(axis.title.x = element_blank(), axis.title.y = element_blank()) +
theme(legend.background = element_rect(colour = NA)) +
theme(panel.background = element_rect(fill = "white", colour = NA)) +
theme(panel.grid.minor = element_blank(), panel.grid.major = element_blank())
pnet
结果发现这些高丰度OTU大部分属于放线菌门和变形菌门,其他比较少。所以下面我们按照OTU数量的多少,对每个模块的大小进行重新调整。
result2 = PolygonRrClusterG (cor = cor,nodeGroup =group2 )
node = result2[[1]]
# ---node节点注释#-----------
nodes = nodeadd(plotcord =node,otu_table = otu_table,tax_table = tax_table)
#-----计算边#--------
edge = edgeBuild(cor = cor,node = node)
### 出图
pnet <- ggplot() + geom_segment(aes(x = X1, y = Y1, xend = X2, yend = Y2,color = as.factor(wei_label)),
data = edge, size = 0.5) +
geom_point(aes(X1, X2,fill = Phylum,size = mean),pch = 21, data = nodes) +
scale_colour_brewer(palette = "Set1") +
scale_x_continuous(breaks = NULL) + scale_y_continuous(breaks = NULL) +
# labs( title = paste(layout,"network",sep = "_"))+
# geom_text_repel(aes(X1, X2,label=Phylum),size=4, data = plotcord)+
# discard default grid + titles in ggplot2
theme(panel.background = element_blank()) +
# theme(legend.position = "none") +
theme(axis.title.x = element_blank(), axis.title.y = element_blank()) +
theme(legend.background = element_rect(colour = NA)) +
theme(panel.background = element_rect(fill = "white", colour = NA)) +
theme(panel.grid.minor = element_blank(), panel.grid.major = element_blank())
pnet
netClu = modulGroup( cor = cor,cut = NULL,method = "cluster_fast_greedy" )
?PolygonClusterG
result2 = PolygonClusterG (cor = cor,nodeGroup = netClu,zoom = 0.6)
node = result2[[1]]
# ---node节点注释#-----------
nodes = nodeadd(plotcord =node,otu_table = otu_table,tax_table = tax_table)
head(nodes)
nodes2 = nodes %>% inner_join(netClu,by = c("elements" = "ID"))
nodes2$group = paste("Model_",nodes2$group,sep = "")
#-----计算边#--------
edge = edgeBuild(cor = cor,node = node)
### 出图
pnet <- ggplot() + geom_segment(aes(x = X1, y = Y1, xend = X2, yend = Y2,color = as.factor(wei_label)),
data = edge, size = 0.5) +
geom_point(aes(X1, X2,fill = group,size = mean),pch = 21, data = nodes2) +
scale_colour_brewer(palette = "Set1") +
scale_x_continuous(breaks = NULL) + scale_y_continuous(breaks = NULL) +
# labs( title = paste(layout,"network",sep = "_"))+
# geom_text_repel(aes(X1, X2,label=Phylum),size=4, data = plotcord)+
# discard default grid + titles in ggplot2
theme(panel.background = element_blank()) +
# theme(legend.position = "none") +
theme(axis.title.x = element_blank(), axis.title.y = element_blank()) +
theme(legend.background = element_rect(colour = NA)) +
theme(panel.background = element_rect(fill = "white", colour = NA)) +
theme(panel.grid.minor = element_blank(), panel.grid.major = element_blank())
pnet
result4 = nodeEdge(cor = cor)
#提取变文件
edge = result4[[1]]
#--提取节点文件
node = result4[[2]]
igraph = igraph::graph_from_data_frame(edge, directed = FALSE, vertices = node)
res = ZiPiPlot(igraph = igraph,method = "cluster_fast_greedy")
p <- res[[1]]
p
dat = net_properties(igraph)
head(dat)
nodepro = node_properties(igraph)
head(nodepro)
Hub节点是在网络中与其他节点连接较多的节点,Hub微生物就是与其他微生物联系较为紧密的微生物,可以称之为关键微生物(keystone)
hub = hub_score(igraph)$vector %>%
sort(decreasing = TRUE) %>%
head(5) %>%
as.data.frame()
colnames(hub) = "hub_sca"
ggplot(hub) +
geom_bar(aes(x = hub_sca,y = reorder(row.names(hub),hub_sca)),stat = "identity",fill = "#4DAF4A")
result = random_Net_compate(igraph = igraph, type = "gnm", step = 100, netName = layout)
p1 = result[[1]]
sum_net = result[[4]]
p1
head(sum_net)
# plotname = paste(path,"/Power_law_distribution_",layout,".pdf",sep = "")
# ggsave(plotname, p1, width = 8, height =6)
# write.csv(sum_net,paste(path,"/",layout,"_net_VS_erdos_properties.csv",sep = ""),row.names = TRUE)
path = "./result/"
dir.create(path)
result = network(ps = ps,
N = 250,
layout_net = "model_Gephi.2",
r.threshold=0.6,
p.threshold=0.05,
label = FALSE,
path = path,
zipi = TRUE)
# 全部样本的网络比对
p = result[[1]]
# 全部样本网络参数比对
data = result[[2]]
plotname1 = paste(path,"/network_all.jpg",sep = "")
ggsave(plotname1, p,width = 48,height = 16,dpi = 72)
plotname1 = paste(path,"/network_all.pdf",sep = "")
ggsave(plotname1, p,width = 48,height = 16)
tablename <- paste(path,"/co-occurrence_Grobel_net",".csv",sep = "")
write.csv(data,tablename)
大网络运算时间会比较长,这里我没有计算zipi,用时5min完成全部运行。 N=0,代表用全部的OTU进行计算。
path = "./result_big/"
dir.create(path)
result = network(ps = ps,
N = 0,
big = TRUE,
select_layout = TRUE,
layout_net = "model_maptree",
r.threshold=0.6,
p.threshold=0.05,
label = FALSE,
path = path,
zipi = FALSE)
# 全部样本的网络比对
p = result[[1]]
# 全部样本网络参数比对
data = result[[2]]
num= 3
plotname1 = paste(path,"/network_all.jpg",sep = "")
ggsave(plotname1, p,width = 16*num,height = 16,dpi = 72)
plotname1 = paste(path,"/network_all.pdf",sep = "")
ggsave(plotname1, p,width = 16*num,height = 16)
tablename <- paste(path,"/co-occurrence_Grobel_net",".csv",sep = "")
write.csv(data,tablename)