diff --git a/cmd/stats.go b/cmd/stats.go index 0ac6617..d941fb2 100644 --- a/cmd/stats.go +++ b/cmd/stats.go @@ -35,19 +35,29 @@ If --per-sequences is given, then it will print the following stats, for each se 2. Number of consecutive gaps at the beginning of the sequence; 3. Number of consecutive gaps at the end of the sequence; 4. Number of gaps unique to the sequence (present in no other sequence); - If --profile is given along, then the output will be : unique\tnew\tboth, with: + If --count-profile is given along, then the output will be : unique\tnew\tboth, with: - 4a unique: # gaps that are unique in each sequence in the alignment - 4b new: # gaps that are new in each sequence compared to the profile - 4c both: # gaps that are unique in each sequence in the alignment and that are new compared the profile 5. Number of gap opennings (streches of gaps are counted once); 6. Number of Unique mutations; - If --profile is given along, then the output will be : unique\tnew\tboth, with: + If --count-profile is given along, then the output will be : unique\tnew\tboth, with: - 6a unique: # mutations that are unique in each sequence in the alignment - 6b new: # mutations that are new in each sequence compared to the profile - 6c both: # mutations that are unique in each sequence in the alignment and that are new compared the profile 7. Number of mutations compared to a reference sequence (given with --ref-sequence, otherwise, no column); 8. Length of the sequence without gaps; 9..n Number of occurence of each character (A,C,G, etc.). + +Note that --count-profile takes a tab separated file such as given by the command +goalign stats char --per-sites + +site A C G T +0 nA nC nG nT +1... +... +n... + `, RunE: func(cmd *cobra.Command, args []string) (err error) { var aligns *align.AlignChannel diff --git a/cmd/stats_gaps.go b/cmd/stats_gaps.go index 3f732a6..713f791 100644 --- a/cmd/stats_gaps.go +++ b/cmd/stats_gaps.go @@ -27,7 +27,7 @@ var statGapsCmd = &cobra.Command{ - If --from-start is specified, then counts only gaps at sequence starts; - If --from-end is specified, then counts only gaps at sequence ends; - If --unique is specified, then counts only gaps that are unique in their column. - If --profile is given along --unique, then the output will be : unique\tnew\tboth, with: + If --count-profile is given along with --unique, then the output will be : unique\tnew\tboth, with: - unique: # gaps that are unique in each sequence in the alignment - new: # gaps that are new in each sequence compared to the profile - both: # gaps that are unique in each sequence in the alignment and that are new compared the profile @@ -35,6 +35,14 @@ var statGapsCmd = &cobra.Command{ - Otherwise, counts total number of gaps for the given sequence. + Note that --count-profile takes a tab separated file such as the one given by the command + goalign stats char --per-sites + + site A C G T + 0 nA nC nG nT + 1... + ... + n... `, RunE: func(cmd *cobra.Command, args []string) (err error) { var aligns *align.AlignChannel diff --git a/cmd/stats_mutations.go b/cmd/stats_mutations.go index e85be54..61f971f 100644 --- a/cmd/stats_mutations.go +++ b/cmd/stats_mutations.go @@ -21,7 +21,7 @@ var statMutationsCmd = &cobra.Command{ - If --unique is specified, then counts only mutations (characters) that are unique in their column for the given sequence. - If --profile is given along --unique, then the output will be : unique\tnew\tboth, with: + If --count-profile is given along with --unique, then the output will be : unique\tnew\tboth, with: - unique: # mutations that are unique in each sequence in the alignment - new: # mutations that are new in each sequence compared to the profile - both: # mutations that are unique in each sequence in the alignment and that are new compared the profile @@ -32,6 +32,16 @@ var statMutationsCmd = &cobra.Command{ It does not take into account '-' and 'N' as unique mutations, and does not take into account '-' and 'N' as mutations compared to a reference sequence. + + Note that --count-profile takes a tab separated file such as the one given by the command + goalign stats char --per-sites + + site A C G T + 0 nA nC nG nT + 1... + ... + n... + `, RunE: func(cmd *cobra.Command, args []string) (err error) { var aligns *align.AlignChannel diff --git a/docs/commands/stats.md b/docs/commands/stats.md index 0d51792..399d3be 100644 --- a/docs/commands/stats.md +++ b/docs/commands/stats.md @@ -19,7 +19,7 @@ If `--per-sequences` option is given, then prints statistics for all sequences i 4. gapsend: Number of consecutive gaps at the end; 5. gapsuniques: Number of gaps unique to that sequence; - If --profile is given along, then the output will be : unique\tnew\tboth, with: + If --count-profile is given, then the output will be : unique\tnew\tboth, with: - 5a unique: # gaps that are unique in each sequence in the alignment - 5b new: # gaps that are new in each sequence compared to the profile - 5c both: # gaps that are unique in each sequence in the alignment and that are new compared the profile @@ -27,7 +27,7 @@ If `--per-sequences` option is given, then prints statistics for all sequences i 6. gapsopenning: Number of streches of gaps; 7. mutuniques: Number of unique mutations; - If --profile is given along, then the output will be : unique\tnew\tboth, with: + If --count-profile is given, then the output will be : unique\tnew\tboth, with: - 7a unique: # mutations that are unique in each sequence in the alignment - 7b new: # mutations that are new in each sequence compared to the profile - 7c both: # mutations that are unique in each sequence in the alignment and that are new compared the profile @@ -36,6 +36,15 @@ If `--per-sequences` option is given, then prints statistics for all sequences i 8. length: Lenght of the unaligned sequence; 9. A C G T...: Number of occurence of each character. +Note that `--count-profile`takes a tab separated file such as given by the command `goalign stats char --per-sites`: + +``` +site A C G T +0 nA nC nG nT +1... +... +n +``` Different sub-commands: * `goalign stats alleles`: Prints the average number of alleles per site of the alignment;