Skip to content

Commit

Permalink
drop duplicates before write to disk
Browse files Browse the repository at this point in the history
  • Loading branch information
sambenfredj committed Jan 24, 2023
1 parent 8849023 commit 9859bbb
Showing 1 changed file with 7 additions and 2 deletions.
9 changes: 7 additions & 2 deletions mokapot/confidence.py
Original file line number Diff line number Diff line change
Expand Up @@ -871,7 +871,7 @@ def assign_confidence(

Parallel(n_jobs=-1, require="sharedmem")(
delayed(save_sorted_metadata_chunks)(
chunk_metadata, score_chunk, i, sep
chunk_metadata, score_chunk, psms_info, i, sep
)
for chunk_metadata, score_chunk, i in zip(
reader, scores_slices, range(len(scores_slices))
Expand Down Expand Up @@ -936,9 +936,14 @@ def assign_confidence(
)


def save_sorted_metadata_chunks(chunk_metadata, score_chunk, i, sep):
def save_sorted_metadata_chunks(
chunk_metadata, score_chunk, psms_info, i, sep
):
chunk_metadata["score"] = score_chunk
chunk_metadata.sort_values(by="score", ascending=False, inplace=True)
chunk_metadata = chunk_metadata.drop_duplicates(
psms_info["spectrum_columns"]
)
chunk_metadata.to_csv(
f"scores_metadata_{i}.csv",
sep=sep,
Expand Down

0 comments on commit 9859bbb

Please sign in to comment.