From 29e80769196393107eb0d141222aa91d6a57068b Mon Sep 17 00:00:00 2001 From: Frederic Lemoine Date: Fri, 13 Nov 2020 22:00:54 +0100 Subject: [PATCH] Modified Parsimony / number of steps --- acr/acr_test.go | 28 ++++++++++++++++++---- acr/parsimony.go | 61 +++++++++++++++++++++++++++++++----------------- asr/parsimony.go | 56 ++++++++++++++++++++++++++------------------ cmd/acr.go | 12 +++++++++- cmd/asr.go | 16 ++++++++++++- 5 files changed, 121 insertions(+), 52 deletions(-) diff --git a/acr/acr_test.go b/acr/acr_test.go index 02c50cd..5ba3faa 100644 --- a/acr/acr_test.go +++ b/acr/acr_test.go @@ -54,10 +54,14 @@ func TestACRDELTRANParsimony(t *testing.T) { } // Test UPPASS - err = parsimonyUPPASS(tr.Root(), nil, tipstates, states, stateIndices) + nsteps := 0 + nsteps, err = parsimonyUPPASS(tr.Root(), nil, tipstates, states, stateIndices) if err != nil { t.Error(err) } + if nsteps != 4 { + t.Error(fmt.Errorf("Number of pars steps id %d and should be %d", nsteps, 4)) + } testCheckStates(t, 2, ni, "t1", states, stateIndices, "A") testCheckStates(t, 2, ni, "t2", states, stateIndices, "A") testCheckStates(t, 2, ni, "t3", states, stateIndices, "B") @@ -172,10 +176,15 @@ func TestACRACCTRANParsimony(t *testing.T) { } // Test UPPASS - err = parsimonyUPPASS(tr.Root(), nil, tipstates, states, stateIndices) + nsteps := 0 + nsteps, err = parsimonyUPPASS(tr.Root(), nil, tipstates, states, stateIndices) if err != nil { t.Error(err) } + if nsteps != 4 { + t.Error(fmt.Errorf("Number of pars steps id %d and should be %d", nsteps, 4)) + } + testCheckStates(t, 2, ni, "t1", states, stateIndices, "A") testCheckStates(t, 2, ni, "t2", states, stateIndices, "A") testCheckStates(t, 2, ni, "t3", states, stateIndices, "B") @@ -255,11 +264,14 @@ func TestALLDELTRAN(t *testing.T) { t.Error(err) } - statemap, err := ParsimonyAcr(tr, tipstates, ALGO_DELTRAN, false) + statemap, nsteps, err := ParsimonyAcr(tr, tipstates, ALGO_DELTRAN, false) if err != nil { t.Error(err) } + if nsteps != 4 { + t.Error(fmt.Errorf("Number of pars steps id %d and should be %d", nsteps, 4)) + } testCheckMap(t, "t6", statemap, "A") testCheckMap(t, "t7", statemap, "A") testCheckMap(t, "t11", statemap, "A") @@ -303,10 +315,13 @@ func TestALLDOWNPASS(t *testing.T) { t.Error(err) } - statemap, err := ParsimonyAcr(tr, tipstates, ALGO_DOWNPASS, false) + statemap, nsteps, err := ParsimonyAcr(tr, tipstates, ALGO_DOWNPASS, false) if err != nil { t.Error(err) } + if nsteps != 4 { + t.Error(fmt.Errorf("Number of pars steps id %d and should be %d", nsteps, 4)) + } testCheckMap(t, "t6", statemap, "A", "B") testCheckMap(t, "t7", statemap, "A", "B") testCheckMap(t, "t11", statemap, "A", "B") @@ -351,10 +366,13 @@ func TestALLACCTRAN(t *testing.T) { t.Error(err) } - statemap, err := ParsimonyAcr(tr, tipstates, ALGO_ACCTRAN, false) + statemap, nsteps, err := ParsimonyAcr(tr, tipstates, ALGO_ACCTRAN, false) if err != nil { t.Error(err) } + if nsteps != 4 { + t.Error(fmt.Errorf("Number of pars steps id %d and should be %d", nsteps, 4)) + } testCheckMap(t, "t6", statemap, "B") testCheckMap(t, "t7", statemap, "B") testCheckMap(t, "t11", statemap, "A") diff --git a/acr/parsimony.go b/acr/parsimony.go index a5d27c8..fac341b 100644 --- a/acr/parsimony.go +++ b/acr/parsimony.go @@ -2,7 +2,6 @@ package acr import ( "bytes" - "errors" "fmt" "math/rand" "sort" @@ -29,9 +28,9 @@ const ( // If ALGO_ACCTRAN, then executes UPPASS and ACCTRAN // Returns a map with the states of all nodes. If a node has a name, key is its name, if a node has no name, // the key will be its id in the deep first traversal of the tree. +// Also returns the number of parsimony steps // if randomResolve is true, then in the second pass, each ambiguities will be resolved randomly -func ParsimonyAcr(t *tree.Tree, tipCharacters map[string]string, algo int, randomResolve bool) (map[string]string, error) { - var err error +func ParsimonyAcr(t *tree.Tree, tipCharacters map[string]string, algo int, randomResolve bool) (nametostates map[string]string, nsteps int, err error) { var nodes []*tree.Node = t.Nodes() var states []AncestralState = make([]AncestralState, len(nodes)) // Downside states of each node var upstates []AncestralState = make([]AncestralState, len(nodes)) // Upside states of each node @@ -54,9 +53,9 @@ func ParsimonyAcr(t *tree.Tree, tipCharacters map[string]string, algo int, rando upstates[i] = make(AncestralState, len(alphabet)) } - err = parsimonyUPPASS(t.Root(), nil, tipCharacters, states, stateIndices) + nsteps, err = parsimonyUPPASS(t.Root(), nil, tipCharacters, states, stateIndices) if err != nil { - return nil, err + return } switch algo { @@ -70,36 +69,40 @@ func ParsimonyAcr(t *tree.Tree, tipCharacters map[string]string, algo int, rando case ALGO_NONE: // No pass after uppass default: - return nil, fmt.Errorf("Parsimony algorithm %d unkown", algo) + err = fmt.Errorf("Parsimony algorithm %d unkown", algo) + return } - nametostates := buildInternalNamesToStatesMap(t, states, alphabet) + nametostates = buildInternalNamesToStatesMap(t, states, alphabet) assignStatesToTree(t, states, alphabet) - return nametostates, nil + return } // First step of the parsimony computatation: From tips to root -func parsimonyUPPASS(cur, prev *tree.Node, tipCharacters map[string]string, states []AncestralState, stateIndices map[string]int) error { +func parsimonyUPPASS(cur, prev *tree.Node, tipCharacters map[string]string, states []AncestralState, stateIndices map[string]int) (nsteps int, err error) { + nsteps = 0 + tempsteps := 0 // If it is a tip, we initialize the ancestral state using the current // state in the alignment. If no such tip name exists in the mapping file, // then returns an error if cur.Tip() { state, ok := tipCharacters[cur.Name()] if !ok { - return errors.New(fmt.Sprintf("Tip %s does not exist in the tip/state mapping file", cur.Name())) + return 0, fmt.Errorf("Tip %s does not exist in the tip/state mapping file", cur.Name()) } stateindex, ok := stateIndices[state] if ok { states[cur.Id()][stateindex] = 1 } else { - return errors.New(fmt.Sprintf("State %s does not exist in the alphabet, ignoring the state", state)) + return 0, fmt.Errorf("State %s does not exist in the alphabet, ignoring the state", state) } } else { for _, child := range cur.Neigh() { if child != prev { - if err := parsimonyUPPASS(child, cur, tipCharacters, states, stateIndices); err != nil { - return err + if tempsteps, err = parsimonyUPPASS(child, cur, tipCharacters, states, stateIndices); err != nil { + return 0, err } + nsteps += tempsteps } } @@ -117,9 +120,27 @@ func parsimonyUPPASS(cur, prev *tree.Node, tipCharacters map[string]string, stat nchild++ } } + maxState := 0 + max := 0.0 + for k, c := range states[cur.Id()] { + if c > max { + maxState = k + max = c + } + } computeParsimony(states[cur.Id()], states[cur.Id()], nchild) + // We keep one of the states having the max occurence + // We sum the number of children that does not have this state + // it will give the additionnal number of parsimony steps + for _, child := range cur.Neigh() { + if child != prev { + if states[child.Id()][maxState] == 0 { + nsteps++ + } + } + } } - return nil + return nsteps, nil } // Second step of the parsimony computation: From root to tips @@ -194,7 +215,8 @@ func parsimonyDOWNPASS(cur, prev *tree.Node, // Will set the most parsimonious states in the "currentStates" slice // using the neighbor states "neighborStates", and the number of neighbors -func computeParsimony(neighborStates AncestralState, currentStates AncestralState, nchild int) { +func computeParsimony(neighborStates AncestralState, currentStates AncestralState, nchild int) (nsteps int) { + // If intersection of states of children and parent is emtpy: // then State of cur node == Union of intersection of nodes 2 by 2 // If state is still empty, then state of cur node is union of all states @@ -205,20 +227,15 @@ func computeParsimony(neighborStates AncestralState, currentStates AncestralStat } } for k, c := range neighborStates { - if int(max) == nchild && c == max { + if c == max { // We have a characters shared by all neighbors and parent: Intersection ok currentStates[k] = 1 - } else if int(max) == 1 && c > 0 { - // Else we have no intersection between any children: take union - currentStates[k] = 1 - } else if int(max) < nchild && c > 1 { - // Else we have a character shared by at least 2 children: OK - currentStates[k] = 1 } else { // Else we do not take it currentStates[k] = 0 } } + return } // Third step of the parsimony computation for resolving ambiguities diff --git a/asr/parsimony.go b/asr/parsimony.go index 8829621..0e176a0 100644 --- a/asr/parsimony.go +++ b/asr/parsimony.go @@ -2,7 +2,6 @@ package asr import ( "bytes" - "errors" "fmt" "math/rand" @@ -22,8 +21,7 @@ const ( // Computed using parsimony // Sequences will be located in the comment field of each node // at the first index -func ParsimonyAsr(t *tree.Tree, a align.Alignment, algo int, randomResolve bool) error { - var err error +func ParsimonyAsr(t *tree.Tree, a align.Alignment, algo int, randomResolve bool) (nsteps []int, err error) { var nodes []*tree.Node = t.Nodes() var seqs []*AncestralSequence = make([]*AncestralSequence, len(nodes)) var upseqs []*AncestralSequence = make([]*AncestralSequence, len(nodes)) // Upside seqs of each node @@ -34,21 +32,23 @@ func ParsimonyAsr(t *tree.Tree, a align.Alignment, algo int, randomResolve bool) charToIndex[c] = i } + nsteps = make([]int, a.Length()) + // We initialize all ancestral sequences // And sequences at tips for i, n := range nodes { n.SetId(i) if seqs[i], err = NewAncestralSequence(a.Length(), len(a.AlphabetCharacters())); err != nil { - return err + return nil, err } if upseqs[i], err = NewAncestralSequence(a.Length(), len(a.AlphabetCharacters())); err != nil { - return err + return nil, err } } - err = parsimonyUPPASS(t.Root(), nil, a, seqs, charToIndex) + err = parsimonyUPPASS(t.Root(), nil, a, seqs, nsteps, charToIndex) if err != nil { - return err + return } switch algo { @@ -60,36 +60,38 @@ func ParsimonyAsr(t *tree.Tree, a align.Alignment, algo int, randomResolve bool) case ALGO_ACCTRAN: parsimonyACCTRAN(t.Root(), nil, a, seqs, charToIndex, randomResolve) default: - return fmt.Errorf("Parsimony algorithm %d unkown", algo) + err = fmt.Errorf("Parsimony algorithm %d unkown", algo) + return } assignSequencesToTree(t, seqs, a.AlphabetCharacters()) - return nil + return } // First step of the parsimony computatation: From tips to root -func parsimonyUPPASS(cur, prev *tree.Node, a align.Alignment, seqs []*AncestralSequence, charToIndex map[rune]int) error { +func parsimonyUPPASS(cur, prev *tree.Node, a align.Alignment, seqs []*AncestralSequence, nsteps []int, charToIndex map[rune]int) (err error) { // If it is a tip, we initialize the ancestral sequences using the current // Sequence in the alignment. If no such sequence exists in the alignment, // then returns an error if cur.Tip() { seq, ok := a.GetSequenceChar(cur.Name()) if !ok { - return errors.New(fmt.Sprintf("Sequence %s does not exist in the alignment", cur.Name())) + err = fmt.Errorf("Sequence %s does not exist in the alignment", cur.Name()) + return } for j, c := range seq { charindex, ok := charToIndex[c] if ok { seqs[cur.Id()].seq[j].counts[charindex] = 1 } else { - io.LogWarning(errors.New(fmt.Sprintf("Character %c does not exist in the alphabet, ignoring the state", c))) + io.LogWarning(fmt.Errorf("Character %c does not exist in the alphabet, ignoring the state", c)) } } } else { for _, child := range cur.Neigh() { if child != prev { - if err := parsimonyUPPASS(child, cur, a, seqs, charToIndex); err != nil { - return err + if err = parsimonyUPPASS(child, cur, a, seqs, nsteps, charToIndex); err != nil { + return } } } @@ -108,11 +110,25 @@ func parsimonyUPPASS(cur, prev *tree.Node, a align.Alignment, seqs []*AncestralS nchild++ } } - + maxState := 0 + max := 0.0 + for k, c := range seqs[cur.Id()].seq[j].counts { + if c > max { + maxState = k + max = c + } + } computeParsimony(ances, ances, nchild) + for _, child := range cur.Neigh() { + if child != prev { + if seqs[child.Id()].seq[j].counts[maxState] == 0 { + nsteps[j]++ + } + } + } } } - return nil + return } // Second step of the parsimony computatation: From root to tips @@ -201,15 +217,9 @@ func computeParsimony(neighborStates AncestralState, currentStates AncestralStat } } for k, c := range neighborStates.counts { - if int(max) == nchild && c == max { + if c == max { // We have a characters shared by all neighbors and parent: Intersection ok currentStates.counts[k] = 1 - } else if int(max) == 1 && c > 0 { - // Else we have no intersection between any children: take union - currentStates.counts[k] = 1 - } else if int(max) < nchild && c > 1 { - // Else we have a character shared by at least 2 children: OK - currentStates.counts[k] = 1 } else { // Else we do not take it currentStates.counts[k] = 0 diff --git a/cmd/acr.go b/cmd/acr.go index 438aac5..2f48d1b 100644 --- a/cmd/acr.go +++ b/cmd/acr.go @@ -18,6 +18,7 @@ import ( var acrstates string var acrrandomresolve bool // Resolve ambiguities randomly in the downpass/deltran/acctran algo +var outstepfile string // acrCmd represents the acr command var acrCmd = &cobra.Command{ @@ -46,7 +47,9 @@ randomly before going deeper in the tree. var resfile *os.File var treefile goio.Closer var treechan <-chan tree.Trees + var nsteps int var f *os.File + var outstepsf *os.File switch strings.ToLower(parsimonyAlgo) { case "acctran": @@ -80,6 +83,11 @@ randomly before going deeper in the tree. return } defer closeWriteFile(f, outtreefile) + if outstepsf, err = openWriteFile(outstepfile); err != nil { + io.LogError(err) + return + } + defer closeWriteFile(outstepsf, outstepfile) if outresfile != "none" { if resfile, err = openWriteFile(outresfile); err != nil { @@ -89,12 +97,13 @@ randomly before going deeper in the tree. defer closeWriteFile(resfile, outresfile) } for t := range treechan { - statemap, err = acr.ParsimonyAcr(t.Tree, tipstates, algo, acrrandomresolve) + statemap, nsteps, err = acr.ParsimonyAcr(t.Tree, tipstates, algo, acrrandomresolve) if err != nil { io.LogError(err) return } f.WriteString(t.Tree.Newick() + "\n") + fmt.Fprintf(outstepsf, "steps %d\n", nsteps) if outresfile != "none" { for k, v := range statemap { resfile.WriteString(fmt.Sprintf("%s,%s\n", k, v)) @@ -111,6 +120,7 @@ func init() { acrCmd.PersistentFlags().StringVarP(&intreefile, "input", "i", "stdin", "Input tree") acrCmd.PersistentFlags().StringVarP(&outtreefile, "output", "o", "stdout", "Output file") acrCmd.PersistentFlags().StringVar(&outresfile, "out-states", "none", "Output mapping file between node names and states") + acrCmd.PersistentFlags().StringVar(&outstepfile, "out-steps", "stdout", "Output file with number of parsimony steps") acrCmd.PersistentFlags().StringVar(&parsimonyAlgo, "algo", "acctran", "Parsimony algorithm for resolving ambiguities: acctran, deltran, or downpass") acrCmd.PersistentFlags().BoolVar(&acrrandomresolve, "random-resolve", false, "Random resolve states when several possibilities in: acctran, deltran, or downpass") } diff --git a/cmd/asr.go b/cmd/asr.go index a6a037c..c3293aa 100644 --- a/cmd/asr.go +++ b/cmd/asr.go @@ -21,6 +21,7 @@ var asralign string var asrphylip bool var asrinputstrict bool var asrrandomresolve bool // Resolve ambiguities randomly in the downpass/deltran/acctran algo +var outlogfile string // asrCmd represents the asr command var asrCmd = &cobra.Command{ @@ -50,6 +51,8 @@ randomly before going deeper in the tree. var treefile goio.Closer var treechan <-chan tree.Trees var f *os.File + var logf *os.File + var nsteps []int switch strings.ToLower(parsimonyAlgo) { case "acctran": @@ -100,13 +103,23 @@ randomly before going deeper in the tree. return } defer closeWriteFile(f, outtreefile) + if logf, err = openWriteFile(outlogfile); err != nil { + io.LogError(err) + return + } + defer closeWriteFile(logf, outlogfile) for t := range treechan { - err = asr.ParsimonyAsr(t.Tree, align, algo, asrrandomresolve) + nsteps, err = asr.ParsimonyAsr(t.Tree, align, algo, asrrandomresolve) if err != nil { io.LogError(err) return } + fmt.Fprintf(logf, "steps") + for _, s := range nsteps { + fmt.Fprintf(logf, " %d", s) + } + fmt.Fprintf(logf, "\n") f.WriteString(t.Tree.Newick() + "\n") } return @@ -120,6 +133,7 @@ func init() { asrCmd.PersistentFlags().BoolVar(&asrinputstrict, "input-strict", false, "Strict phylip input format (only used with -p)") asrCmd.PersistentFlags().StringVarP(&intreefile, "input", "i", "stdin", "Input tree") asrCmd.PersistentFlags().StringVarP(&outtreefile, "output", "o", "stdout", "Output file") + asrCmd.PersistentFlags().StringVar(&outlogfile, "log", "stdout", "Output log file") asrCmd.PersistentFlags().StringVar(&parsimonyAlgo, "algo", "acctran", "Parsimony algorithm for resolving ambiguities: acctran, deltran, or downpass") asrCmd.PersistentFlags().BoolVar(&asrrandomresolve, "random-resolve", false, "Random resolve states when several possibilities in: acctran, deltran, or downpass") }