-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrepeat_experiments.py
93 lines (74 loc) · 3.97 KB
/
repeat_experiments.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
"""Docstring for the iterate_causal_inference.py module.
This module accepts a yaml configuration file that follows
the practice of configuration file within
docs/config.yaml
The user needs to modify the configuration file
to allow different discretisation (or inference) methods
as well as the output_directory_path
so that all the iterative model training results are saved single folder where each
sub-folder is matched to a single method and parameter.
"""
import glob
import subprocess
import yaml
from loguru import logger
def iterate_causal_inference():
"""
This function demonstrates the process of performing causal inference using
various methods, such as simple discretization, trees, and MDLP, on multiple
segmented datasets. The script is designed to be adaptable to different use cases.
It reads configuration parameters from the 'docs/config.yaml' file and iterates
through different discretization methods and their corresponding arguments.
For each combination, causal inference is performed, and the results are saved
in specific output paths based on the dataset segments.
"""
# Load configuration from the YAML file
with open("docs/config.yaml", "r") as f:
config = yaml.safe_load(f)
# Define discretization options for different methods
discretiser = {"simple": [2, 3, 4], "tree": [2, 3, 4, 5], "mdlp": [5, 6, 10]}
# Define the path to the segmented datasets
segments_path = (
"/mnt/VAST_SHARED/lausanne_ds/player360/clustering"
"/bb/player_360_bb_20230125/data/player_360_bb_20230125_"
)
# Get paths to all segmented clusters
all_cluster_paths = glob.glob(segments_path + "cluster*", recursive=True)
# all_cluster_paths = ["/mnt/VAST_SHARED/lausanne_ds/player360/data/player_360_bb_20230125.parquet"]
# all_cluster_paths = ["/mnt/VAST_SHARED/lausanne_ds/player360/data/player_360_20230125_non_zero_flags.parquet"]
# all_cluster_paths = ["/mnt/VAST_SHARED/lausanne_ds/player360/data/player_360_20230125_non_zero_flags_only_vips.parquet"]
output_path = config["output_path"]
structures_path = "/mnt/VAST_SHARED/lausanne_ds/player360/experiment_results/bb/bb_bayesian_network/churn/14_features/"
# structures_path = "/mnt/VAST_SHARED/lausanne_ds/player360/experiment_results/bb/bb_bayesian_network/sink/vips/discovered_graphs/20230824/causal_discovery/DAG_for_inference.dot"
# Iterate through each cluster segment
for i, cluster_path in enumerate(all_cluster_paths):
config["structure_path"] = (
structures_path
+ f"segments/segment_{i}/causal_discovery/DAG_for_inference.dot"
)
# config["structure_path"] = structures_path #+ f"all/causal_discovery/DAG_for_inference.dot"
config["data_input_path"] = cluster_path
output_path_seg = output_path + f"segments/segment_{i}/inference/mle/"
# output_path_seg = output_path + f"BDeu_5K/"
# Iterate through discretisation methods and arguments
for k, v in discretiser.items():
for value in v:
config["discretiser"].update(
{"method": k, "argument": value, "proportion_threshold": 0.08}
)
output_sub_path = f"{k}_{str(value).replace('.', '_')}/"
logger.info(
f"Preparing causal inference with discretiser method {k} and argument {value}"
)
config["output_path"] = output_path_seg + output_sub_path
logger.info(config["output_path"])
config_path = "docs/config_mle_seg.yaml"
with open(config_path, "w") as outfile:
yaml.dump(config, outfile, default_flow_style=False)
# Run the Bayesian network inference script
subprocess.call(
f"python3 infer_bayesian_network.py -config_file {config_path}",
shell=True,
)
if __name__ == "__main__":
iterate_causal_inference()