Skip to content

Commit

Permalink
Added option --ref-seq to goalign mask #6
Browse files Browse the repository at this point in the history
  • Loading branch information
fredericlemoine committed Jun 1, 2020
1 parent 708f86a commit d3a009c
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 1 deletion.
15 changes: 14 additions & 1 deletion cmd/mask.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ var maskout string = "stdout"
var maskstart int
var masklength int
var maskunique bool
var maskrefseq string

// subseqCmd represents the subseq command
var maskCmd = &cobra.Command{
Expand All @@ -32,6 +33,10 @@ goalign mask -p -i al.phy -s 9 -l 10
This will replace 10 positions with N|X from the 10th position.
If --ref-seq is specified, then coordinates are considered on the given reference sequence
without considering gaps. So far, all insertions compared to the reference sequence are also masked).
If --unique is specified, 'goalign mask --unique' will replace characters that
are unique in their column (except GAPS) with N or X.
In this case, --length and --start are ignored.
Expand All @@ -51,6 +56,8 @@ The output format is the same than input format.
return
}

refseq := cmd.Flags().Changed("ref-seq")

for al := range aligns.Achan {
if aligns.Err != nil {
err = aligns.Err
Expand All @@ -62,7 +69,12 @@ The output format is the same than input format.
return
}
} else {
if err = al.Mask(maskstart, masklength); err != nil {
start := maskstart
length := masklength
if refseq {
start, length, err = al.RefCoordinates(maskrefseq, start, length)
}
if err = al.Mask(start, length); err != nil {
io.LogError(err)
return
}
Expand All @@ -80,5 +92,6 @@ func init() {
maskCmd.PersistentFlags().StringVarP(&maskout, "output", "o", "stdout", "Alignment output file")
maskCmd.PersistentFlags().IntVarP(&maskstart, "start", "s", 0, "Start position (0-based inclusive)")
maskCmd.PersistentFlags().IntVarP(&masklength, "length", "l", 10, "Length of the sub alignment")
maskCmd.PersistentFlags().StringVar(&maskrefseq, "ref-seq", "none", "Coordinates are considered wrt. to the given reference sequence (no effect with --unique)")
maskCmd.PersistentFlags().BoolVar(&maskunique, "unique", false, "If given, then masks characters that are unique in their columns (start and length are ignored)")
}
3 changes: 3 additions & 0 deletions docs/commands/mask.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ This will replace 10 positions with N|X from the 10th position.
If --unique is specified, 'goalign mask --unique' will replace characters that
are unique in their column with N or X. In this case, --length and --start are ignored.

If --ref-seq is specified, then coordinates are considered on the given reference sequence
without considering gaps. So far, all insertions compared to the reference sequence are also masked).

The output format is the same than input format.

#### Usage
Expand Down
33 changes: 33 additions & 0 deletions test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3388,6 +3388,39 @@ ${GOALIGN} mask -i input -o result -s 0 -l 2 -p
diff -q -b expected result
rm -f input expected result

echo "->goalign mask / nucl --ref-seq"
cat > input <<EOF
10 20
Seq0000 GATTAATTTG CCGTAGGCCA
Seq0001 GAATCTGAAG ATCGAACACT
Seq0002 TTAAGT--TC ACTTC--ATG
Seq0003 GAGAGGACTA GTTCATACTT
Seq0004 TTTAAACACT TTTACATCGA
Seq0005 TGTCGGACCT AAGTATTGAG
Seq0006 TACAACGGTG TATTCCAGCG
Seq0007 GTGGAGAGGT CTATTTTTCC
Seq0008 GGTTGAAGGA CTCTAGAGCT
Seq0009 GTAAAGGGTA TGGCCATGTG
EOF

cat > expected <<EOF
10 20
Seq0000 GATTAATTNN NNNNNNNNCA
Seq0001 GAATCTGANN NNNNNNNNCT
Seq0002 TTAAGT--NN NNNNNNNNTG
Seq0003 GAGAGGACNN NNNNNNNNTT
Seq0004 TTTAAACANN NNNNNNNNGA
Seq0005 TGTCGGACNN NNNNNNNNAG
Seq0006 TACAACGGNN NNNNNNNNCG
Seq0007 GTGGAGAGNN NNNNNNNNCC
Seq0008 GGTTGAAGNN NNNNNNNNCT
Seq0009 GTAAAGGGNN NNNNNNNNTG
EOF

${GOALIGN} mask -i input -o result -s 6 -l 8 --ref-seq Seq0002 -p
diff -q -b expected result
rm -f input expected result

echo "->goalign mask --unique"
cat > input <<EOF
>A
Expand Down

0 comments on commit d3a009c

Please sign in to comment.