Skip to content

Commit

Permalink
Update wiki dump date and few minor updates in single gpu tutorial
Browse files Browse the repository at this point in the history
Signed-off-by: Ayush Dattagupta <[email protected]>
  • Loading branch information
ayushdg committed Feb 21, 2025
1 parent c4cb682 commit 9866f54
Showing 1 changed file with 8 additions and 4 deletions.
12 changes: 8 additions & 4 deletions tutorials/single_node_tutorial/single_gpu_tutorial.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -306,7 +306,7 @@
"source": [
"res = download_wikipedia(download_output_directory,\n",
" language=language, \n",
" url_limit=url_limit).df.compute()"
" url_limit=url_limit, dump_date=\"20250201\").df.compute()"
]
},
{
Expand Down Expand Up @@ -1094,7 +1094,8 @@
"\n",
"!mkdir -p {fuzzy_dedup_base_output_path}\n",
"!mkdir -p {fuzzy_dedup_log_dir}\n",
"!mkdir -p {fuzzy_dedup_cache_dir}\n",
"!mkdir -p {fuzzy_dedup_no_false_positive_cache_dir}\n",
"!mkdir -p {fuzzy_dedup_false_positive_cache_dir}\n",
"!mkdir -p {fuzzy_dedup_output_dir}"
]
},
Expand Down Expand Up @@ -1570,7 +1571,7 @@
"input_id_field = 'id'\n",
"\n",
"\n",
"!mkdir -p {edgelist_output_path}\n",
"!mkdir -p {edgelist_output_dir}\n",
"!mkdir -p {buckets_to_edges_log_path}"
]
},
Expand Down Expand Up @@ -1675,11 +1676,13 @@
"connected_component_base_output_path = os.path.join(data_dir,\"fuzzy/cc\")\n",
"connected_component_output_path = os.path.join(connected_component_base_output_path, \"connected_components.parquet\")\n",
"connected_component_cache_dir = os.path.join(connected_component_base_output_path, \"cache\")\n",
"connected_component_log_path = os.path.join(connected_component_base_output_path,\"log\")\n",
"\n",
"#Relevant parameters\n",
"input_id_field = 'id'\n",
"\n",
"!mkdir -p {connected_component_base_output_path}"
"!mkdir -p {connected_component_base_output_path}\n",
"!mkdir -p {connected_component_log_path}"
]
},
{
Expand All @@ -1705,6 +1708,7 @@
" cache_dir=connected_component_cache_dir,\n",
" jaccard_pairs_path=jaccard_pairs_path,\n",
" id_column=input_id_field,\n",
" logger=connected_component_log_path,\n",
")\n",
"\n",
"#Load and run connected component\n",
Expand Down

0 comments on commit 9866f54

Please sign in to comment.