You will need some familiarity with R
, and the packages tidyverse and Biostrings are needed.
library(tidyverse)
library(Biostrings)
NCBI_ClassifiedSeqs = read_delim("NCBI_ClassifiedSeqs.tsv",
delim = "\t", escape_double = FALSE, trim_ws = TRUE)
Microcystis = NCBI_ClassifiedSeqs %>%
filter(Genus == "Microcystis")
Microcystaceae = NCBI_ClassifiedSeqs %>%
filter(Family == "Microcystaceae")
Chroococcales = NCBI_ClassifiedSeqs %>%
filter(Order == "Chroococcales")
This saves as a fasta file with the Genbank accession number and name
Microcystis = Biostrings::DNAStringSet(Microcystis$Sequence)
names(Microcystis) = paste(Microcystis$Genbank_accession)
Biostrings::writeXStringSet(Microcystis, "Microcystis.fasta")