-
Notifications
You must be signed in to change notification settings - Fork 36
/
Copy pathmclusterSim.R
56 lines (54 loc) · 1.96 KB
/
mclusterSim.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
##' Pairwise semantic similarity for a list of gene clusters
##'
##'
##' @title mclusterSim
##' @param clusters A list of gene clusters
##' @param organism organism
##' @param ont one of "HDO", "HPO" and "MPO"
##' @param measure one of "Wang", "Resnik", "Rel", "Jiang", and "Lin".
##' @param combine One of "max", "avg", "rcmax", "BMA" methods, for combining semantic similarity scores of multiple DO terms associated with gene/protein.
##' @return similarity matrix
##' @importFrom GOSemSim combineScores
##' @export
##' @author Guangchuang Yu
##' @examples
##' \dontrun{
##' cluster1 <- c("835", "5261","241")
##' cluster2 <- c("578","582")
##' cluster3 <- c("307", "308", "317")
##' clusters <- list(a=cluster1, b=cluster2, c=cluster3)
##' mclusterSim(clusters, measure="Wang")
##' }
mclusterSim <- function(clusters,
ont = "HDO",
organism = "hsa",
measure="Wang",
combine="BMA") {
if (ont == "DO") ont <- 'HDO'
cluster_dos <- list()
for (i in seq_along(clusters)) {
cluster_dos[[i]] <- unlist(sapply(clusters[[i]], gene2DO, organism = organism))
}
n <- length(clusters)
scores <- matrix(NA, nrow=n, ncol=n)
rownames(scores) <- names(clusters)
colnames(scores) <- names(clusters)
for (i in seq_along(cluster_dos)) {
do1 <- cluster_dos[[i]]
do1 <- do1[!is.na(do1)]
for (j in 1:i) {
do2 <- cluster_dos[[j]]
do2 <- do2[!is.na(do2)]
if (length(do1) != 0 && length(do2) != 0) {
s <- doseSim(do1, do2, measure = measure, ont = ont)
scores[i,j] <- combineScores(s, combine)
if (i != j) {
scores[j, i] <- scores[i, j]
}
}
}
}
removeRowNA <- apply(!is.na(scores), 1, sum)>0
removeColNA <- apply(!is.na(scores), 2, sum)>0
return(scores[removeRowNA, removeColNA])
}