From 0b959059a6d2c35146bbb862f65b1413c786e77c Mon Sep 17 00:00:00 2001 From: guipenedo Date: Thu, 9 Nov 2023 12:51:40 +0000 Subject: [PATCH] small bugfix --- src/datatrove/pipeline/dedup/exact_substrings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/datatrove/pipeline/dedup/exact_substrings.py b/src/datatrove/pipeline/dedup/exact_substrings.py index 9d5898b5..505e902d 100644 --- a/src/datatrove/pipeline/dedup/exact_substrings.py +++ b/src/datatrove/pipeline/dedup/exact_substrings.py @@ -68,7 +68,7 @@ def set_up_dl_locks(self, dl_lock, up_lock): def save_sizes(self, doc_lens: list[int], rank: int): f_lens = self.output_folder.open(f"{rank:05d}{EH.stage_1_sequence_size}", mode="wb") - f_lens._file_handler.write(struct.pack("Q" * len(doc_lens), *doc_lens)) + f_lens.write(struct.pack("Q" * len(doc_lens), *doc_lens)) def __call__(self, data: DocumentsPipeline, rank: int = 0, world_size: int = 1): doc_lens = []