Skip to content

Commit

Permalink
fixed minor issues, Signed-off by [email protected]
Browse files Browse the repository at this point in the history
Signed-off-by: Vinay Raman <[email protected]>
  • Loading branch information
vinay-raman committed Feb 11, 2025
1 parent 794ef20 commit 531b5ac
Showing 1 changed file with 4 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import pdb
import shutil
import time
from pathlib import Path
from typing import Any, List

from retriever_hardnegative_miner import HardNegativeMiner
Expand Down Expand Up @@ -72,6 +73,7 @@ def main():

if args.input_dir:
input_dataset = DocumentDataset.read_json(args.input_dir)
# input_dataset = DocumentDataset.read_json(os.path.join(args.input_dir,"clustered_dataset"))
else:
raise ValueError("provide input file path")

Expand All @@ -90,12 +92,13 @@ def main():
st_time = time.time()
mined_dataset = mine_hard_negatives(input_dataset)

print("Time taken = {:.2f}".format(time.time() - st_time))
print("Time taken = {:.2f} s".format(time.time() - st_time))
print("Saving data in jsonl format ...")
mined_dataset.df.to_json(
os.path.join(args.output_dir, "mined_dataset"), lines=True, orient="records"
)


if __name__ == "__main__":
dask_client = get_client(cluster_type="cpu")
main()

0 comments on commit 531b5ac

Please sign in to comment.