Skip to content

Commit

Permalink
Add logger module, and adding wrapper logs to run scripts, will add d…
Browse files Browse the repository at this point in the history
…eeper level logs in next commit
  • Loading branch information
prateekdesai04 committed Oct 11, 2024
1 parent e38a3e7 commit cea6d48
Show file tree
Hide file tree
Showing 2 changed files with 100 additions and 30 deletions.
98 changes: 68 additions & 30 deletions examples/run_scripts_v6/temp_script_v6.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,45 +2,58 @@

import pandas as pd

from tabrepo.scripts_v6.logging_config import setup_logger
from tabrepo import load_repository, EvaluationRepository
from tabrepo.scripts_v6.TabPFN_class import CustomTabPFN
from tabrepo.scripts_v6.TabPFNv2_class import CustomTabPFNv2
from tabrepo.scripts_v6.LGBM_class import CustomLGBM
from experiment_utils import run_experiments, convert_leaderboard_to_configs

logger = setup_logger(log_file_name='temp_script_v6')

if __name__ == '__main__':
# Load Context

logger.info("Starting execution script...")

context_name = "D244_F3_C1530_30"
repo: EvaluationRepository = load_repository(context_name, cache=True)
logger.info(f"Loading repository for context: {context_name}")
try:
repo: EvaluationRepository = load_repository(context_name, cache=True)
logger.info("Repository loaded successfully.")
except Exception as e:
logger.error(f"Failed to load repository: {e}", exc_info=True)
raise

expname = "./initial_experiment_tabpfn_v6" # folder location of all experiment artifacts
ignore_cache = True # set to True to overwrite existing caches and re-run experiments from scratch

# To run everything:
# datasets = repo.datasets
# folds = repo.folds

folds = [0]
datasets = [
"blood-transfusion-service-center", # binary
"Australian", # binary
"balance-scale", # multiclass
# "MIP-2016-regression", # regression
]
logger.info(f"Selected Datasets: {datasets}")
logger.info(f"Folds to run: {folds}")

folds = [0]

# Add a check here if the dataset belong to repo
tids = [repo.dataset_to_tid(dataset) for dataset in datasets]
try:
tids = [repo.dataset_to_tid(dataset) for dataset in datasets]
except Exception as e:
logger.warning(f"Some datasets may not belong to the repository: {e}", exc_info=True)

methods_dict = {
"LightGBM": {
"learning_rate": 0.15,
"num_leaves": 32,
"verbose": -1, # To suppress warnings
},
"TabPFN": {
"device": 'cpu',
"N_ensemble_configurations": 32,
"max_num_features": 50,
},
}
method_cls_dict = {
Expand All @@ -49,20 +62,32 @@
"TabPFNv2": CustomTabPFNv2,
}
methods = list(methods_dict.keys())
logger.info(f"Methods to run: {methods}")

results_lst = run_experiments(
expname=expname,
tids=tids,
folds=repo.folds,
methods=methods,
methods_dict=methods_dict,
method_cls=method_cls_dict,
task_metadata=repo.task_metadata,
ignore_cache=ignore_cache,
)
logger.info("Running experiments...")
try:
results_lst = run_experiments(
expname=expname,
tids=tids,
folds=folds,
methods=methods,
methods_dict=methods_dict,
method_cls=method_cls_dict,
task_metadata=repo.task_metadata,
ignore_cache=ignore_cache,
)
logger.info("Experiments Status: Successful.")
except Exception as e:
logger.error(f"An error occurred while running experiments: {e}", exc_info=True)
raise

results_df = pd.concat(results_lst, ignore_index=True)
logger.info("Concatenating results into Dataframe...")
try:
results_df = pd.concat(results_lst, ignore_index=True)
except Exception as e:
logger.error(f"An error occurred while concatenating results: {e}", exc_info=True)

logger.info("Renaming leaderboard columns... ")
results_df = convert_leaderboard_to_configs(results_df)
with pd.option_context('display.max_rows', None, 'display.max_columns', None, 'display.width', 1000):
print(results_df)
Expand All @@ -77,21 +102,34 @@
"NeuralNetTorch_c1_BAG_L1",
"NeuralNetFastAI_c1_BAG_L1",
]
logger.info(f"Comparison configs: {comparison_configs}")

baselines = [
"AutoGluon_bq_4h8c_2023_11_14",
]
logger.info(f"Baseline: {baselines}")

logger.info(f"Comparing metrics...")
try:
metrics = repo.compare_metrics(
results_df,
datasets=datasets,
folds=repo.folds,
baselines=baselines,
configs=comparison_configs,
)
except Exception as e:
logger.error(f"An error occurred in compare_metrics(): {e}", exc_info=True)
raise

metrics = repo.compare_metrics(
results_df,
datasets=datasets,
folds=repo.folds,
baselines=baselines,
configs=comparison_configs,
)
with pd.option_context("display.max_rows", None, "display.max_columns", None, "display.width", 1000):
print(f"Config Metrics Example:\n{metrics}")
evaluator_output = repo.plot_overall_rank_comparison(
results_df=metrics,
save_dir=expname,
)

logger.info("Plotting overall rank comparison...")
try:
evaluator_output = repo.plot_overall_rank_comparison(
results_df=metrics,
save_dir=expname,
)
except Exception as e:
logger.error(f"An error occurred in plot_overall_rank_comparison(): {e}", exc_info=True)
32 changes: 32 additions & 0 deletions tabrepo/scripts_v6/logging_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
"""
**logger** module just exposes a ``setup_logger`` function to quickly configure the python logger.
"""
import logging
import os


# WIP - Prateek: TODO: pickup filenames dynamically
# TODO: Improve log_dir path
def setup_logger(log_file_name, level=logging.INFO):
"""Set up a logger with a specific log file name."""
logger = logging.getLogger(log_file_name)

if not logger.hasHandlers():
logger.setLevel(level)

log_dir = os.path.join(os.path.dirname(__file__), '..', '..', 'logs')
os.makedirs(log_dir, exist_ok=True)

log_file = os.path.join(log_dir, log_file_name)
file_handler = logging.FileHandler(log_file)

stream_handler = logging.StreamHandler()

formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
file_handler.setFormatter(formatter)
stream_handler.setFormatter(formatter)

logger.addHandler(file_handler)
logger.addHandler(stream_handler)

return logger

0 comments on commit cea6d48

Please sign in to comment.