Skip to content

Commit

Permalink
sort bowtie2 outputs for reproducibility
Browse files Browse the repository at this point in the history
  • Loading branch information
rzlim08 committed May 23, 2024
1 parent 29224db commit 1bb6a26
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 1 deletion.
17 changes: 17 additions & 0 deletions lib/idseq-dag/idseq_dag/steps/run_assembly.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,23 @@ def generate_read_to_contig_mapping(assembled_contig,
}
)
)

# sort bowtie2 output file
# samtools sort -n -O sam -o bowtie2.sam bowtie2.sam
samtools_sort_params = [
"sort",
"-n",
"-O", "sam",
"-o", output_bowtie_sam,
output_bowtie_sam
]
command.execute(
command_patterns.SingleCommand(
cmd="samtools",
args=samtools_sort_params
)
)

contig_stats, _ = generate_info_from_sam(output_bowtie_sam, read2contig, duplicate_cluster_sizes_path=duplicate_cluster_sizes_path)
with open(output_contig_stats, 'w') as ocf:
json.dump(contig_stats, ocf)
Expand Down
2 changes: 1 addition & 1 deletion lib/idseq-dag/idseq_dag/util/m8.py
Original file line number Diff line number Diff line change
Expand Up @@ -431,7 +431,7 @@ def generate_taxon_count_json_from_m8(
agg_bucket["base_count"],
}
if agg_bucket.get('source_count_type'):
taxon_counts_row['source_count_type'] = list(agg_bucket['source_count_type'])
taxon_counts_row['source_count_type'] = sorted(list(agg_bucket['source_count_type']))

taxon_counts_attributes.append(taxon_counts_row)
output_dict = {
Expand Down
2 changes: 2 additions & 0 deletions workflows/short-read-mngs/host_filter.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,8 @@ task ercc_bowtie2_filter {

~{bowtie2_invocation}

samtools sort -n -O sam -o /tmp/bowtie2_ercc.sam /tmp/bowtie2_ercc.sam

# Extract reads [pairs] that did NOT map to the index
if [[ '~{paired}' == 'true' ]]; then
# 1 (read paired)
Expand Down

0 comments on commit 1bb6a26

Please sign in to comment.