Skip to content

Commit

Permalink
Merge pull request verilog-to-routing#2675 from verilog-to-routing/sc…
Browse files Browse the repository at this point in the history
…ript_for_tuning_runs

Automate tuning runs
  • Loading branch information
vaughnbetz authored Aug 13, 2024
2 parents 49de5fb + e6adc49 commit 21d0150
Show file tree
Hide file tree
Showing 3 changed files with 291 additions and 2 deletions.
6 changes: 4 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
prettytable
lxml
psutil

pandas
numpy
scipy
# Python linter and formatter
click==8.0.2 # Our version of black needs an older version of click (https://stackoverflow.com/questions/71673404/importerror-cannot-import-name-unicodefun-from-click)
black==21.4b0
pylint==2.7.4

# Surelog
orderedmultidict
orderedmultidict
24 changes: 24 additions & 0 deletions vtr_flow/scripts/tuning_runs/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
A script used to run tuning experiments with multiple parameters.

Steps to use:
=============
1) edit the first section of the script by setting `PARAMS_DICT` dictionary to the parameters that you want to sweep and the corresponding values that you want to try. If you want the resulting spreadheet to include specific metrics, set `KEEP_METRICS_ONLY` variable to `True` and the metrics that you care about in `parsed_metrics`. If you want the full parsed result sheet, set `KEEP_METRICS_ONLY` to `False`

2) run the script as follows:
'''
python control_runs.py --generate <path_to_task_to_run>
'''

This will edit the `config.txt` file of this task adding several lines `script_params_list_add` for each of the combinations of the input params

3) Launch the task using `run_vtr_task.py` script
4) When the run is done, run the script to parse the results as follows:
'''
python control_runs.py --parse <path_to_task_to_parse>
'''

The script will generate 3 csv files in the runXXX idrectory of the task as follows:
- `full_res.csv` that exactly matches parse_results.txt but in csv format
- `avg_seed.csv` that averages the results of the each circuit with one set of parameters over the different seed values
- `geomean_res.csv` that geometrically average the results of all the circuits over the same set of parameters
- `summary.xlsx` that merges all the previously mentioned sheets in a single spreadsheet
263 changes: 263 additions & 0 deletions vtr_flow/scripts/tuning_runs/control_runs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,263 @@
#!/usr/bin/env python3

""" This module controls and parses the large runs that includes
sweeping multiple parameters. """
import itertools
import os
import sys
import csv
import pandas as pd
import numpy as np
from scipy import stats

# Define the global dictionary
PARAMS_DICT = {
"--seed": [1, 2],
"--place_algorithm": ["criticality_timing"],
"--place_agent_epsilon": [0.3],
}

# Set to True if you only care about specific metrics
KEEP_METRICS_ONLY = True
PARSED_METRICS = ["num_io", "num_LAB"]


def safe_gmean(series):
"""Calculate the geomeans of a series in a safe way even for large numbers"""
series = series.replace({0: np.nan})
return stats.gmean(series.dropna())


def generate_combinations():
"""Generates all the parameter combinations between the input parameters values."""
keys = list(PARAMS_DICT.keys())
values = list(PARAMS_DICT.values())
combinations = list(itertools.product(*values))

lines = []
for combination in combinations:
params_str = " ".join(f"{key} {value}" for key, value in zip(keys, combination))
lines.append(f"script_params_list_add={params_str}\n")
return lines


def parse_results(input_path):
"""
Parse the output results
"""
# Find the runXXX directory with the largest XXX
run_dirs = [
d for d in os.listdir(input_path) if d.startswith("run") and d[3:].isdigit()
]
if not run_dirs:
print("No runXXX directories found in the specified input path.")
sys.exit(1)

largest_run_path = os.path.join(input_path, max(run_dirs, key=lambda d: int(d[3:])))

# Path to parse_results.txt and full_res.csv
full_res_csv_path = os.path.join(largest_run_path, "full_res.csv")

if not os.path.exists(os.path.join(largest_run_path, "parse_results.txt")):
print("File parse_results.txt not found.")
sys.exit(1)

# Read the parse_results.txt file and write to full_res.csv
with open(
os.path.join(largest_run_path, "parse_results.txt"), "r"
) as txt_file, open(full_res_csv_path, "w", newline="") as csv_file:
reader = csv.reader(txt_file, delimiter="\t")
writer = csv.writer(csv_file)

headers = next(reader)
script_params_index = headers.index("script_params")

# Create new headers with PARAMS_DICT keys
new_headers = (
headers[:script_params_index]
+ list(PARAMS_DICT.keys())
+ headers[script_params_index + 1 :]
)
writer.writerow(new_headers)

for row in reader:
script_params_value = row[script_params_index]
script_params_dict = parse_script_params(script_params_value)
new_row = (
row[:script_params_index]
+ [script_params_dict.get(key, "") for key in PARAMS_DICT]
+ row[script_params_index + 1 :]
)
writer.writerow(new_row)

print(f"Converted parse_results.txt to {full_res_csv_path}")

# Generate avg_seed.csv if --seed column exists
generate_avg_seed_csv(full_res_csv_path, largest_run_path)
print("Generated average seed results")

# Generate gmean_res.csv
generate_geomean_res_csv(
os.path.join(largest_run_path, "avg_seed.csv"), largest_run_path
)
print("Generated geometric average results over all the circuits")

generate_xlsx(largest_run_path)
print("Generated xlsx that merges all the result csv files")


def generate_xlsx(largest_run_path):
"""Generate a xlsx file that includes the full results, average results over the seed
and the geometrically averaged results over all the benchmarks."""

csv_files = [
os.path.join(largest_run_path, "full_res.csv"),
os.path.join(largest_run_path, "avg_seed.csv"),
os.path.join(largest_run_path, "geomean_res.csv"),
]
sheet_names = ["Full res", "Avg. seeds", "Summary"]
output_excel_file = os.path.join(largest_run_path, "summary.xlsx")
# Create an Excel writer object
# pylint: disable=abstract-class-instantiated
with pd.ExcelWriter(output_excel_file, engine="xlsxwriter") as writer:
for csv_file, sheet_name in zip(csv_files, sheet_names):
# Read each CSV file
df = pd.read_csv(csv_file)

# Write each DataFrame to a different sheet
df.to_excel(writer, sheet_name=sheet_name, index=False)


def parse_script_params(script_params):
"""Helper function to parse the script params values from earch row in
the parse_results.txt"""

parsed_params = {key: "" for key in PARAMS_DICT}

parts = script_params.split("_")
i = 0

while i < len(parts):
for key in PARAMS_DICT:
key_parts = key.split("_")
key_length = len(key_parts)

if parts[i : i + key_length] == key_parts:
value_parts = []
j = i + key_length

while j < len(parts) and not any(
parts[j : j + len(k.split("_"))] == k.split("_")
for k in PARAMS_DICT
):
value_parts.append(parts[j])
j += 1

parsed_params[key] = "_".join(value_parts)
i = j - 1
break

i += 1

return parsed_params


def generate_avg_seed_csv(full_res_csv_path, output_dir):
"""Generate the average results over the seeds"""
df = pd.read_csv(full_res_csv_path)
assert isinstance(df, pd.DataFrame)

if KEEP_METRICS_ONLY:
col_to_keep = ["circuit", "arch"]
col_to_keep.extend(list(PARAMS_DICT.keys()))
col_to_keep.extend(PARSED_METRICS)
df = df.drop(
# pylint: disable=no-member
columns=[col for col in df.columns if col not in col_to_keep]
)

# Check if '--seed' column is present
if "--seed" in df.columns:
# Determine the grouping keys: ['circuit', 'arch'] + keys from PARAMS_DICT that
# are present in the dataframe
grouping_keys = ["circuit", "arch"] + [
key for key in PARAMS_DICT if key in df.columns and key != "--seed"
]

# Group by specified keys and compute the mean for numeric columns
df_grouped = df.groupby(grouping_keys).mean(numeric_only=True).reset_index()

# Drop the '--seed' column if it exists
if "--seed" in df_grouped.columns:
df_grouped.drop(columns=["--seed"], inplace=True)
else:
df_grouped = df

# Save the resulting dataframe to a CSV file
avg_seed_csv_path = os.path.join(output_dir, "avg_seed.csv")
df_grouped.to_csv(avg_seed_csv_path, index=False)


def generate_geomean_res_csv(full_res_csv_path, output_dir):
"""Generate the geometric average results over the different circuits"""

df = pd.read_csv(full_res_csv_path)

param_columns = [key for key in PARAMS_DICT if key != "--seed"]
non_param_columns = [col for col in df.columns if col not in param_columns]

# pylint: disable=no-member
geomean_df = (
df.groupby(param_columns)
.agg(
{
col: (lambda x: "" if x.dtype == "object" else safe_gmean(x))
for col in non_param_columns
}
)
.reset_index()
)

geomean_df.drop(columns=["circuit"], inplace=True)
geomean_df.drop(columns=["arch"], inplace=True)

geomean_res_csv_path = os.path.join(output_dir, "geomean_res.csv")
geomean_df.to_csv(geomean_res_csv_path, index=False)


def main():
"""Main function"""

if len(sys.argv) < 3:
print("Usage: script.py <option> <path_to_directory>")
sys.exit(1)

option = sys.argv[1]
directory_path = sys.argv[2]

if option == "--generate":
# Generate the combinations
lines = generate_combinations()

# Define the path to the config file
config_path = os.path.join(directory_path, "config", "config.txt")

# Ensure the config directory exists
os.makedirs(os.path.dirname(config_path), exist_ok=True)

# Append the lines to the config file
with open(config_path, "a") as file:
file.writelines(lines)

print(f"Appended lines to {config_path}")

elif option == "--parse":
parse_results(directory_path)

else:
print("Invalid option. Use --generate or --parse")
sys.exit(1)


if __name__ == "__main__":
main()

0 comments on commit 21d0150

Please sign in to comment.