Skip to content

Commit

Permalink
Added invertebrate mitochondrial code
Browse files Browse the repository at this point in the history
  • Loading branch information
fredericlemoine committed Apr 1, 2019
1 parent 5e9c8aa commit e4f80c6
Show file tree
Hide file tree
Showing 6 changed files with 109 additions and 6 deletions.
43 changes: 43 additions & 0 deletions align/align_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1045,6 +1045,49 @@ func TestTranslateMito(t *testing.T) {
}
}

func TestTranslateMitoI(t *testing.T) {

in := NewSeqBag(UNKNOWN)
in.AddSequence("Seq0000", "ATGGATGACTTTTTCTGTTGCCCTCCCCCACCGCAACAGTCTTCCTCATCGAGTAGCGAAGAGACTACCACAACGGGTGGCGGAGGGTGGCATCACTATTACATTATCATAGTTGTCGTAGTGTAATGATAGC", "")
in.AddSequence("Seq0001", "ATGGATGACTTTTTCTGTTGCCCTCCCCCACCGCAACAGTCTTCCTCATCGAGTAGCGAAGAGACTACCACAACGGGTGGCGGAGGGTGGCATCACTATTACATTATCATAGTTGTCGTATAATGA", "")
in.AutoAlphabet()

in2, err2 := in.CloneSeqBag()
if err2 != nil {
t.Error(err2)
}

expaa := NewSeqBag(UNKNOWN)
expaa.AddSequence("Seq0000", "MDDFFCCPPPPQQSSSSSSEETTTTGGGGWHHYYIIMVVVV*W*", "")
expaa.AddSequence("Seq0001", "MDDFFCCPPPPQQSSSSSSEETTTTGGGGWHHYYIIMVVV*W", "")
expaa.AutoAlphabet()

exp3phases := NewSeqBag(UNKNOWN)
exp3phases.AddSequence("Seq0000_0", "MDDFFCCPPPPQQSSSSSSEETTTTGGGGWHHYYIIMVVVV*W*", "")
exp3phases.AddSequence("Seq0000_1", "WMTFSVALPHRNSLPHRVAKSLPQRVAEGGITITLS*LS*CNDS", "")
exp3phases.AddSequence("Seq0000_2", "GWLFLLPSPTATVFLIE*RSDYHNGWRSVASLLHYHSCRSVMM", "")
exp3phases.AddSequence("Seq0001_0", "MDDFFCCPPPPQQSSSSSSEETTTTGGGGWHHYYIIMVVV*W", "")
exp3phases.AddSequence("Seq0001_1", "WMTFSVALPHRNSLPHRVAKSLPQRVAEGGITITLS*LSYN", "")
exp3phases.AddSequence("Seq0001_2", "GWLFLLPSPTATVFLIE*RSDYHNGWRSVASLLHYHSCRMM", "")
exp3phases.AutoAlphabet()

if err := in.Translate(0, 2); err != nil {
t.Error(err)
} else {
if !expaa.Identical(in) {
t.Error(fmt.Errorf("Expected sequences are different from phased sequences"))
}
}

if err := in2.Translate(-1, 2); err != nil {
t.Error(err)
} else {
if !exp3phases.Identical(in2) {
t.Error(fmt.Errorf("Expected sequences are different from 3 phase translated sequences"))
}
}
}

func TestMaskNt(t *testing.T) {
in := NewAlign(UNKNOWN)
in.AddSequence("Seq0000", "GATTAATTTGCCGTAGGCCA", "")
Expand Down
32 changes: 30 additions & 2 deletions align/const.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,9 @@ const (
POSITION_SEMI_CONSERVED = 2 // Same weak group
POSITION_NOT_CONSERVED = 3 // None of the above values

GENETIC_CODE_STANDARD = 0 // Standard genetic code
GENETIC_CODE_VETEBRATE_MITO = 1 // Vertebrate mitochondrial genetic code
GENETIC_CODE_STANDARD = 0 // Standard genetic code
GENETIC_CODE_VETEBRATE_MITO = 1 // Vertebrate mitochondrial genetic code
GENETIC_CODE_INVETEBRATE_MITO = 2 // Invertebrate mitochondrial genetic code
)

var stdaminoacid = []rune{'A', 'R', 'N', 'D', 'C', 'Q', 'E', 'G', 'H', 'I', 'L', 'K', 'M', 'F', 'P', 'S', 'T', 'W', 'Y', 'V'}
Expand Down Expand Up @@ -88,12 +89,39 @@ var vertebratemitocode = map[string]rune{
"AGA": '*', "AGG": '*', "TAA": '*', "TAG": '*',
}

var invertebratemitocode = map[string]rune{
"---": '-',
"GCT": 'A', "GCC": 'A', "GCA": 'A', "GCG": 'A',
"TTA": 'L', "TTG": 'L', "CTT": 'L', "CTC": 'L', "CTA": 'L', "CTG": 'L',
"CGT": 'R', "CGC": 'R', "CGA": 'R', "CGG": 'R',
"AAA": 'K', "AAG": 'K',
"AAT": 'N', "AAC": 'N',
"ATG": 'M', "ATA": 'M',
"GAT": 'D', "GAC": 'D',
"TTT": 'F', "TTC": 'F',
"TGT": 'C', "TGC": 'C',
"CCT": 'P', "CCC": 'P', "CCA": 'P', "CCG": 'P',
"CAA": 'Q', "CAG": 'Q',
"AGA": 'S', "AGG": 'S', "TCT": 'S', "TCC": 'S', "TCA": 'S', "TCG": 'S', "AGT": 'S', "AGC": 'S',
"GAA": 'E', "GAG": 'E',
"ACT": 'T', "ACC": 'T', "ACA": 'T', "ACG": 'T',
"GGT": 'G', "GGC": 'G', "GGA": 'G', "GGG": 'G',
"TGG": 'W', "TGA": 'W',
"CAT": 'H', "CAC": 'H',
"TAT": 'Y', "TAC": 'Y',
"ATT": 'I', "ATC": 'I',
"GTT": 'V', "GTC": 'V', "GTA": 'V', "GTG": 'V',
"TAA": '*', "TAG": '*',
}

func geneticCode(code int) (gencode map[string]rune, err error) {
switch code {
case GENETIC_CODE_STANDARD:
gencode = standardcode
case GENETIC_CODE_VETEBRATE_MITO:
gencode = vertebratemitocode
case GENETIC_CODE_INVETEBRATE_MITO:
gencode = invertebratemitocode
default:
err = fmt.Errorf("This genetic code does not exis")
}
Expand Down
4 changes: 3 additions & 1 deletion cmd/phase.go
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,8 @@ Output file is an unaligned set of sequences in fasta.
geneticcode = align.GENETIC_CODE_STANDARD
case "mitov":
geneticcode = align.GENETIC_CODE_VETEBRATE_MITO
case "mitoi":
geneticcode = align.GENETIC_CODE_INVETEBRATE_MITO
default:
err = fmt.Errorf("Unknown genetic code : %s", phaseGeneticCode)
return
Expand Down Expand Up @@ -172,7 +174,7 @@ Output file is an unaligned set of sequences in fasta.
func init() {
RootCmd.AddCommand(phaseCmd)
phaseCmd.PersistentFlags().StringVarP(&phaseOutput, "output", "o", "stdout", "Output \"phased\" FASTA file")
phaseCmd.PersistentFlags().StringVar(&phaseGeneticCode, "genetic-code", "standard", "Genetic Code: standard, or mitov (vertebrate mitochondrial)")
phaseCmd.PersistentFlags().StringVar(&phaseGeneticCode, "genetic-code", "standard", "Genetic Code: standard, mitoi (invertebrate mitochondrial) or mitov (vertebrate mitochondrial)")
phaseCmd.PersistentFlags().StringVar(&phaseAAOutput, "aa-output", "none", "Output Met \"phased\" aa FASTA file")
phaseCmd.PersistentFlags().StringVarP(&phaseLogOutput, "log", "l", "none", "Output log: positions of the considered Start for each sequence")
phaseCmd.PersistentFlags().Float64Var(&lencutoff, "len-cutoff", -1.0, "Length cutoff, over orf length, to consider sequence hits (-1==No cutoff)")
Expand Down
4 changes: 3 additions & 1 deletion cmd/phasent.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,8 @@ Output file is an unaligned set of sequences in fasta.
geneticcode = align.GENETIC_CODE_STANDARD
case "mitov":
geneticcode = align.GENETIC_CODE_VETEBRATE_MITO
case "mitoi":
geneticcode = align.GENETIC_CODE_INVETEBRATE_MITO
default:
err = fmt.Errorf("Unknown genetic code : %s", phaseGeneticCode)
return
Expand Down Expand Up @@ -196,7 +198,7 @@ func init() {
RootCmd.AddCommand(phasentCmd)
phasentCmd.PersistentFlags().StringVarP(&phaseOutput, "output", "o", "stdout", "Output ATG \"phased\" FASTA file")
phasentCmd.PersistentFlags().StringVar(&phaseCodonOutput, "nt-output", "none", "Output ATG \"phased\" FASTA file + first nts not in ref phase removed (nt corresponding to aa-output sequence)")
phasentCmd.PersistentFlags().StringVar(&phaseGeneticCode, "genetic-code", "standard", "Genetic Code: standard, or mitov (vertebrate mitochondrial)")
phasentCmd.PersistentFlags().StringVar(&phaseGeneticCode, "genetic-code", "standard", "Genetic Code: standard, mitoi (invertebrate mitochondrial) or mitov (vertebrate mitochondrial)")
phasentCmd.PersistentFlags().StringVar(&phaseAAOutput, "aa-output", "none", "Output translated sequences FASTA file")
phasentCmd.PersistentFlags().StringVarP(&phaseLogOutput, "log", "l", "none", "Output log: positions of the considered ATG for each sequence")
phasentCmd.PersistentFlags().Float64Var(&lencutoff, "len-cutoff", -1.0, "Length cutoff, over orf length, to consider sequence hits (-1==No cutoff)")
Expand Down
5 changes: 4 additions & 1 deletion cmd/translate.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@ It only translates using the standard genetic code so far.
geneticcode = align.GENETIC_CODE_STANDARD
case "mitov":
geneticcode = align.GENETIC_CODE_VETEBRATE_MITO
case "mitoi":
geneticcode = align.GENETIC_CODE_INVETEBRATE_MITO

default:
err = fmt.Errorf("Unknown genetic code : %s", translateGeneticCode)
return
Expand Down Expand Up @@ -92,7 +95,7 @@ It only translates using the standard genetic code so far.

func init() {
RootCmd.AddCommand(translateCmd)
translateCmd.PersistentFlags().StringVar(&translateGeneticCode, "genetic-code", "standard", "Genetic Code: standard, or mitov (vertebrate mitochondrial)")
translateCmd.PersistentFlags().StringVar(&translateGeneticCode, "genetic-code", "standard", "Genetic Code: standard, mitoi (invertebrate mitochondrial) or mitov (vertebrate mitochondrial)")
translateCmd.PersistentFlags().StringVarP(&translateOutput, "output", "o", "stdout", "Output translated alignment file")
translateCmd.PersistentFlags().IntVar(&translatePhase, "phase", 0, "Number of characters to drop from the start of the alignment (if -1: Translate in the 3 phases, from positions 0, 1, and 2)")
translateCmd.PersistentFlags().BoolVar(&unaligned, "unaligned", false, "Considers sequences as unaligned and format fasta (phylip, nexus,... options are ignored)")
Expand Down
27 changes: 26 additions & 1 deletion test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -1926,7 +1926,7 @@ ${GOALIGN} translate -i input --phase 0 --unaligned -o result
diff -q -b expected result
rm -f input expected result

echo "->goalign translate 3 phases"
echo "->goalign translate 3 phases (standard)"
cat > input <<EOF
>test1
CATGAGTCTCTCTGATAAGGACAAGGCTGCTGTGAAAGCCCTATGG
Expand Down Expand Up @@ -1968,6 +1968,31 @@ ${GOALIGN} translate -i input --phase -1 --unaligned --genetic-code mitov -o res
diff -q -b expected result
rm -f input expected result

echo "->goalign translate 3 phases (mito invertebrate)"
cat > input <<EOF
>s1
ATGGATGACTTTTTCTGTTGCCCTCCCCCACCGCAACAGTCTTCCTCATCGAGTAGCGAAGAGACTACCACAACGGGTGGCGGAGGGTGGCATCACTATTACATTATCATAGTTGTCGTAGTGTAATGATAGC
>s2
ATGGATGACTTTTTCTGTTGCCCTCCCCCACCGCAACAGTCTTCCTCATCGAGTAGCGAAGAGACTACCACAACGGGTGGCGGAGGGTGGCATCACTATTACATTATCATAGTTGTCGTATAATGA
EOF
cat > expected <<EOF
>s1_0
MDDFFCCPPPPQQSSSSSSEETTTTGGGGWHHYYIIMVVVV*W*
>s1_1
WMTFSVALPHRNSLPHRVAK*LPQRVAEGGITITLS*LS*CNDS
>s1_2
GWLFLLPSPTATVFLIE*R*DYHNGWR*VASLLHYHSCRSVMM
>s2_0
MDDFFCCPPPPQQSSSSSSEETTTTGGGGWHHYYIIMVVV*W
>s2_1
WMTFSVALPHRNSLPHRVAK*LPQRVAEGGITITLS*LSYN
>s2_2
GWLFLLPSPTATVFLIE*R*DYHNGWR*VASLLHYHSCRMM
EOF
${GOALIGN} translate -i input --phase -1 --unaligned --genetic-code mitov -o result
diff -q -b expected result
rm -f input expected result

echo "->goalign dedup"
cat > input <<EOF
5 6
Expand Down

0 comments on commit e4f80c6

Please sign in to comment.