Skip to content

Commit

Permalink
added subset option for BOLD v5
Browse files Browse the repository at this point in the history
  • Loading branch information
VascoElbrecht committed Oct 30, 2024
1 parent d55b338 commit adfd3a9
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 2 deletions.
12 changes: 10 additions & 2 deletions PrimerMiner/R/Download_bold.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,19 @@ for (k in 1:length(taxon)){
time <- Sys.time() # get time
bold.data <- bold.public.search(taxonomy = taxon[k]) # fetch process IDs for specific taxons

if(!is.null(subset_bold)){
if(subset_bold < nrow(bold.data)){
message("Dowloading subset for \"", taxon[k], "\": ", subset_bold, " / ", nrow(bold.data))
rowsToKeep <- sample(1:nrow(bold.data), subset_bold)
bold.data <- bold.data[rowsToKeep,]
}
}

data <- bold.fetch(get_by = "processid", identifiers = bold.data$processid)

# save tsv (without any processing)
if(save_bold_tsv){
write.table(data, file=paste(folder_path, taxon[k], "_BOLD.tsv"), quote=F, sep='\t', row.names=F)
write.table(data, file=paste0(folder_path, taxon[k], "_BOLD.tsv"), quote=F, sep='\t', row.names=F)
}

# filter for only needed marker codes
Expand All @@ -48,7 +56,7 @@ exp <- paste(">", data$processid, "___", data$order, "_", data$species, "\n", da
cat(exp, file=paste(folder_path, taxon[k], "_BOLD.fasta", sep=""), append=T, sep="")
}
time <- Sys.time() - time
message(paste("Downloaded ", nrow(data)," sequences for ", taxon[k], " in ", format(time, digits=2), " from BOLD.", sep=""))
message(paste("\nDownloaded ", nrow(data)," sequences for ", taxon[k], " in ", format(time, digits=2), " from BOLD.", sep=""))
cat(paste(taxon[k],"\t", nrow(data), "\t", format(time, digits=2), "\n", sep=""), file= logfile, sep="", append=T)
}
cat("#Bold_data_end\n\n", file= logfile, sep="", append=T)
Expand Down
1 change: 1 addition & 0 deletions PrimerMiner/R/batch_config.R
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ paste("Version =", packageVersion("PrimerMiner"), "# you might need to regenerat
"merge_bold = T",
"clipping_left_bold = 0",
"clipping_rigth_bold = 0",
"subset_bold=NULL # Enter the maximum number of sequences to download to reduce download time. Obtained sequences might be less than specified if e.g. having a different marker code",
"",
"# Clustering sequences, see ?Clustering for details",
paste("operating_system= \"", sys, "\" # autodetected, can be \"MacOSX\" or \"Linux\"", sep=""),
Expand Down
10 changes: 10 additions & 0 deletions PrimerMiner/R/batch_down.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,17 @@
batch_download <- function(table, config){

# store API key if already loaded...
if(exists("apikey")){
tempApiKey <- apikey
}

source(config)

if(apikey_bold == "00000000-0000-0000-0000-000000000000"){
bold.apikey(tempApiKey)
}


if(is.data.frame(table)){} else {table <- read.csv(table, sep= Taxon_sep, stringsAsFactors=F)}

table[2][is.na(table[2])] <- "" # replace NAs if only orders are given
Expand Down

0 comments on commit adfd3a9

Please sign in to comment.