-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy path07_run_reconstruction.py
109 lines (93 loc) · 3.4 KB
/
07_run_reconstruction.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
from pathlib import Path
import pandas as pd
from src.utils_reconstruction import launch_reconstruction_error_denom_dataset
from src.utils import concatenate_df
def main(
denom,
n_symbols,
abba_scl,
date_exp,
):
# Set the path
cwd = Path.cwd()
path = Path(cwd / "results" / date_exp / "reconstruction" / str(denom))
# Consider univariate and equal-size data sets with at least 100 samples
df_datasets_total = pd.read_csv(
cwd / "data/DataSummary_prep_equalsize_min100samples.csv")
l_datasets_total = df_datasets_total["Name"].unique().tolist()
# Remove the data sets with computing issues
l_datasets_problems = [
"DodgerLoopWeekend",
"DodgerLoopGame",
"DodgerLoopDay",
"MelbournePedestrian",
"UWaveGestureLibraryZ",
]
l_datasets_scope = [
dataset for dataset in l_datasets_total if dataset not in l_datasets_problems]
print("Total number of data sets in the scope:", len(l_datasets_scope))
# Ignore the data sets that have already been computed (to not compute again)
l_csvfiles_computed_datasets = list(
path.rglob(f"reconstruction_errors_*.csv"))
if len(l_csvfiles_computed_datasets) > 0:
df_computed_datasets = concatenate_df(
l_csvfiles_computed_datasets).drop_duplicates()
l_datasets_computed = df_computed_datasets["dataset"].unique().tolist()
print("Number of data sets already computed:", len(l_datasets_computed))
l_datasets_to_compute = [
dataset for dataset in l_datasets_scope if dataset not in l_datasets_computed]
print("Number of new data sets to compute:", len(l_datasets_to_compute))
else:
l_datasets_to_compute = l_datasets_scope
print("Number of data sets to compute:", len(l_datasets_to_compute))
# Launch the signal reconstruction task, for all methods
print(f"\n\n====\n{date_exp = }\n{denom = }\n=====")
for (i, dataset_name_ucr) in enumerate(l_datasets_to_compute):
print(f"Dataset: {dataset_name_ucr}: {i+1}/{len(l_datasets_to_compute)}.")
try:
launch_reconstruction_error_denom_dataset(
denom=denom,
dataset_name_ucr=dataset_name_ucr,
n_symbols=n_symbols,
abba_scl=abba_scl,
date_exp=date_exp,
)
except:
print(f"--ERROR: {dataset_name_ucr} did not pass!")
pass
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser()
parser.add_argument(
"--denom",
type=int,
help="denom is the inverse of the target memory usage ratio, in {3, 4, 5, 6, 10, 15, 20}.",
required=True,
)
parser.add_argument(
"--date_exp",
type=str,
help="Date of the launch of the experiments (for versioning).",
required=True,
)
parser.add_argument(
"--n_symbols",
type=int,
help="Fixed alphabet size for all methods (set to 9 by default).",
required=False,
default=9,
)
parser.add_argument(
"--abba_scl",
type=float,
help="Fixed scaling parameter for ABBA (set to 1 by default).",
required=False,
default=1,
)
args = parser.parse_args()
main(
denom=args.denom,
date_exp=args.date_exp,
n_symbols=args.n_symbols,
abba_scl=args.abba_scl,
)