Skip to content

Commit

Permalink
denoise
Browse files Browse the repository at this point in the history
  • Loading branch information
VascoElbrecht committed Aug 2, 2017
1 parent 85bc2f1 commit 3487038
Showing 1 changed file with 31 additions and 2 deletions.
33 changes: 31 additions & 2 deletions JAMP/R/Denoise.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Haplotyping v0.1

Denoise <- function(files="latest", strategy="unoise", unoise_alpha=5, minsize=10, minrelsize=0.001){
Denoise <- function(files="latest", strategy="unoise", unoise_alpha=5, minsize=10, minhaplosize=0.003){



Expand Down Expand Up @@ -216,11 +216,40 @@ dir.create("_data/4_denoised")
write.csv(file="_data/4_denoised/Raw_haplotable.csv", data, row.names=F)

write.fasta(as.list(data$sequences[-nrow(data)]), paste(data$OTU[-nrow(data)], data$haplotype[-nrow(data)], sep="__"), "_data/4_denoised/Raw_haplo_sequ_byOTU.txt")
write.fasta(as.list(data$sequences[-nrow(data)]), data$haplotype[-nrow(data)], "_data/4_denoised/Raw_haplo_sequ.txt")

info <- paste("\nHaplotype table generated!\nRaw data and fasta files are available in _data/4_denoised (no subsetting)\n\nNow appling subsetting to the dataset!\nHaplotypes below ", minhaplosize, "% abundance in at least one sample are beeing discarded. The relative abundance is based on the number of sequences available before denoising (imput files).\nWaringing: All abundances in the table below ", minhaplosize, "% are set to 0. See Raw_haplotable.csv tble for orignial data without subsetting!\n\n", sep="")
message(info)
cat(file="../log.txt", info, append=T, sep="\n")


# apply subsetting!

data <- read.csv("_data/4_denoised/Raw_haplotable.csv", stringsAsFactors=F)

for (i in 1:(ncol(data)-4)){ # set to 0!
temp <- data[i+3]/sum(data[i+3])*100
data[nrow(data), i+3] <- data[nrow(data), i+3] + sum(data[i+3][temp < minhaplosize])
data[i+3][temp < minhaplosize] <- 0
}

tail(data)


# remove all rows with 0!
data <- data[rowSums(data[5:ncol(data)-1])!=0,]

data[nrow(data), 3] <- paste("denoised+below", minhaplosize, sep="")

write.csv(file=paste("_data/4_denoised/haplotable_", minhaplosize, ".csv", sep=""), data, row.names=F)
write.csv(file=paste("haplotable_", minhaplosize, ".csv", sep=""), data, row.names=F)

write.fasta(as.list(data$sequences[-nrow(data)]), paste(data$OTU[-nrow(data)], data$haplotype[-nrow(data)], sep="__"), paste("_data/4_denoised/haplo_sequ_byOTU", minhaplosize, ".txt", sep=""))
write.fasta(as.list(data$sequences[-nrow(data)]), data$haplotype[-nrow(data)], paste("_data/4_denoised/haplo_sequ_", minhaplosize, ".txt", sep=""))




head(data)


temp <- "\nModule completed!"
Expand Down

0 comments on commit 3487038

Please sign in to comment.