Skip to content

Commit

Permalink
bug fix - ncbi downloader
Browse files Browse the repository at this point in the history
now using web history, dueto changes in the API the old method was
broken
  • Loading branch information
VascoElbrecht committed Mar 13, 2018
1 parent 5e0fb21 commit 452a555
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 12 deletions.
2 changes: 1 addition & 1 deletion PrimerMiner/DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ Package: PrimerMiner
Type: Package
Title: PrimerMiner an R package for development and in silico validation of DNA metabarcoding primers
Version: 0.16
Date: 2017-10-05
Date: 2018-02-13
Author: Vasco Elbrecht
Maintainer: Vasco Elbrecht <[email protected]>
Description: PrimerMiner is a R based batch sequence downloader to design and verify metabarcoding primers. Sequences for a specified marker (e.g. COI) are obtained from NCBI and BOLD and clustered into Operational taxonomic units (OTU) to reduce bias introduced by over represented sequences in the data bases.
Expand Down
28 changes: 18 additions & 10 deletions PrimerMiner/R/Download_GB.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,31 +23,39 @@ if (is.null(custom_query)){
searchQ <- paste(taxon[k],"[Organism] AND (", paste(c(marker), collapse=" OR "), ") AND 1:",maxlength ,"[Sequence Length]", sep="")
} else {searchQ <- paste(taxon, custom_query, sep="")}

search_results <- entrez_search(db="nuccore", term=searchQ, retmax=9999999)
search_results <- entrez_search(db="nuccore", term=searchQ, retmax=9999999, use_history=T)


if(length(search_results$ids)!=0){

cat("", file=paste(folder_path, taxon[k], "_GB.fasta", sep=""), sep="", append=F) # overwrite old file!


i <- 1
while (!is.na(search_results$ids[i])){
temp <- search_results$ids[i:(i+499)]
temp <- temp[!is.na(temp)]
downloaded_sequ <- entrez_fetch(db="nuccore", id=temp, rettype="fasta")
if (downloaded_sequ[1]!="resource temporarily unavailable (4)."){
start <- 0

chunks <- length(search_results$ids)/10000
if (!is.integer(chunks)){chunks <- as.integer(length(search_results$ids)/10000)+1}
for(i in 1:chunks){

downloaded_sequ <- entrez_fetch(db="nuccore", web_history= search_results$web_history, rettype="fasta", retmax=10000, retstart= start)

cat(downloaded_sequ, file=paste(folder_path, taxon[k], "_GB.fasta", sep=""), sep="", append=T)
i <- i + 500} # only write in file if data downloaded!
Sys.sleep(0.5)

start <- start + 10000
Sys.sleep(2.5)

}

}


meep <- read.fasta(paste(folder_path, taxon[k], "_GB.fasta", sep=""))
if (length(meep)!=length(search_results$ids)){
warning("WARNING: Something went wrong with the download. Numer of files in GB does not match number of downloaded files!")
cat(paste("\nWARNING: Something went wrong with the download. Numer of files in GB does not match number of downloaded files!\n\n"), file=logfile, sep="", append=T)}

}

time <- Sys.time() - time
message(paste("Downloaded ", length(search_results$ids)," sequences for ", taxon[k], " in ", format(time, digits=2), " from NCBI.", sep=""))
cat(paste(taxon[k],"\t", length(search_results$ids), "\t", format(time, digits=2), "\n", sep=""), file=logfile, sep="", append=T)
Expand Down
2 changes: 1 addition & 1 deletion Sample_Data/package_tutorial.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Feel free to contact Vasco Elbrecht if you run into issues (twitter: @luckylionde). Enjoy!
# Feel free to contact Vasco per mail if you run into issues ([email protected]). Enjoy!

# set the path to the PrimerMinder folder you just downloaded
setwd("~/Documents/UNI_und_VORLESUNGEN/GitHub/PrimerMiner")
Expand Down

0 comments on commit 452a555

Please sign in to comment.