From f1db2d32b1a1d7c0405bf68def3446d637024184 Mon Sep 17 00:00:00 2001 From: Lee Katz - Aspen Date: Tue, 17 Apr 2018 12:14:23 -0400 Subject: [PATCH 1/2] support for undefined taxa and percentages of seq lengths instead of counts --- scripts/kraken-report | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/scripts/kraken-report b/scripts/kraken-report index 582a819..4854b9a 100755 --- a/scripts/kraken-report +++ b/scripts/kraken-report @@ -1,4 +1,4 @@ -#!/usr/bin/env perl +#!/usr/bin/perl # Copyright 2013-2015, Derrick Wood # @@ -75,16 +75,22 @@ my (%child_lists, %name_map, %rank_map); load_taxonomy($db_prefix); my %taxo_counts; +my %taxo_bp; my $seq_count = 0; +my $bp_count = 0; $taxo_counts{0} = 0; +$taxo_bp{0} = 0; while (<>) { my @fields = split; $taxo_counts{$fields[2]}++; + $taxo_bp{$fields[2]}+=$fields[3]; $seq_count++; + $bp_count+=$fields[3]; } my $classified_count = $seq_count - $taxo_counts{0}; -my %clade_counts = %taxo_counts; +#my %clade_counts = %taxo_counts; +my %clade_counts = %taxo_bp; dfs_summation(1); for (keys %name_map) { @@ -93,9 +99,8 @@ for (keys %name_map) { my $unclassified_percent = 100; if ($seq_count) { - $unclassified_percent = $clade_counts{0} * 100 / $seq_count; + $unclassified_percent = $clade_counts{0} * 100 / $bp_count; } - printf "%6.2f\t%d\t%d\t%s\t%d\t%s%s\n", $unclassified_percent, $clade_counts{0}, $taxo_counts{0}, "U", @@ -108,8 +113,15 @@ sub dfs_report { if (! $clade_counts{$node} && ! $show_zeros) { return; } + + if(!defined($name_map{$node})){ + warn "Warning: name_map of $node is not defined. Setting to UNKNOWN. Be sure to set scientific name for node ID in $db_prefix/taxonomy/names.dmp"; + $name_map{$node}="UNKNOWN"; + } + printf "%6.2f\t%d\t%d\t%s\t%d\t%s%s\n", - ($clade_counts{$node} || 0) * 100 / $seq_count, + #($clade_counts{$node} || 0) * 100 / $seq_count, + ($clade_counts{$node} || 0) * 100 / $bp_count, ($clade_counts{$node} || 0), ($taxo_counts{$node} || 0), rank_code($rank_map{$node}), @@ -117,6 +129,12 @@ sub dfs_report { " " x $depth, $name_map{$node}; my $children = $child_lists{$node}; + + # set default for undefined + for(@$children){ + $clade_counts{$_}//=0; + } + if ($children) { my @sorted_children = sort { $clade_counts{$b} <=> $clade_counts{$a} } @$children; for my $child (@sorted_children) { From f18baf41afdbe9eb6edbdbf9b5103d51433d4e49 Mon Sep 17 00:00:00 2001 From: Lee Katz Date: Tue, 17 Apr 2018 12:15:42 -0400 Subject: [PATCH 2/2] /usr/bin/env --- scripts/kraken-report | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/kraken-report b/scripts/kraken-report index 4854b9a..0226a4e 100755 --- a/scripts/kraken-report +++ b/scripts/kraken-report @@ -1,4 +1,4 @@ -#!/usr/bin/perl +#!/usr/bin/env perl # Copyright 2013-2015, Derrick Wood #