diff --git a/src/datatrove/pipeline/dedup/exact_substrings.py b/src/datatrove/pipeline/dedup/exact_substrings.py index 9d5898b5..505e902d 100644 --- a/src/datatrove/pipeline/dedup/exact_substrings.py +++ b/src/datatrove/pipeline/dedup/exact_substrings.py @@ -68,7 +68,7 @@ def set_up_dl_locks(self, dl_lock, up_lock): def save_sizes(self, doc_lens: list[int], rank: int): f_lens = self.output_folder.open(f"{rank:05d}{EH.stage_1_sequence_size}", mode="wb") - f_lens._file_handler.write(struct.pack("Q" * len(doc_lens), *doc_lens)) + f_lens.write(struct.pack("Q" * len(doc_lens), *doc_lens)) def __call__(self, data: DocumentsPipeline, rank: int = 0, world_size: int = 1): doc_lens = []