Skip to content

Commit

Permalink
small bugfix
Browse files Browse the repository at this point in the history
  • Loading branch information
guipenedo committed Nov 9, 2023
1 parent 7a681ae commit 0b95905
Showing 1 changed file with 1 addition and 1 deletion.
2 changes: 1 addition & 1 deletion src/datatrove/pipeline/dedup/exact_substrings.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def set_up_dl_locks(self, dl_lock, up_lock):

def save_sizes(self, doc_lens: list[int], rank: int):
f_lens = self.output_folder.open(f"{rank:05d}{EH.stage_1_sequence_size}", mode="wb")
f_lens._file_handler.write(struct.pack("Q" * len(doc_lens), *doc_lens))
f_lens.write(struct.pack("Q" * len(doc_lens), *doc_lens))

def __call__(self, data: DocumentsPipeline, rank: int = 0, world_size: int = 1):
doc_lens = []
Expand Down

0 comments on commit 0b95905

Please sign in to comment.