AnneBeyer · kushal-10 · Dec 9, 2024 · Jan 31, 2025
diff --git a/clemcore/__init__.py b/clemcore/__init__.py
@@ -1,15 +1,17 @@
 """ Main entry point """
 import textwrap
-from typing import List, Dict
+from typing import List, Dict, Union
 import os.path
 import logging
 import logging.config
 import yaml
 from datetime import datetime
+import json
 
 import clemcore.backends as backends
 import clemcore.clemgame as clemgame
 import clemcore.utils.file_utils as file_utils
+from clemcore.clemgame import GameSpec
 
 BANNER = \
     r"""
@@ -59,11 +61,11 @@ def list_games():
         print(game_name, wrapper.fill(game["description"]))
 
 
-def run(game_name: str, model_specs: List[backends.ModelSpec], gen_args: Dict,
+def run(game: Union[str, Dict, GameSpec], model_specs: List[backends.ModelSpec], gen_args: Dict,
         experiment_name: str = None, instances_name: str = None, results_dir: str = None):
     """Run specific model/models with a specified clemgame.
     Args:
-        game_name: Name of the game, matching the game's name in the game registry.
+        game: Name of the game, matching the game's name in the game registry, OR GameSpec-like dict, OR GameSpec.
         model_specs: A list of backends.ModelSpec instances for the player models to run the game with.
         gen_args: Text generation parameters for the backend; output length and temperature are implemented for the
             majority of model backends.
@@ -78,72 +80,77 @@ def run(game_name: str, model_specs: List[backends.ModelSpec], gen_args: Dict,
             model.set_gen_args(**gen_args)  # todo make this somehow available in generate method?
             player_models.append(model)
 
-        game_spec = clemgame.select_game(game_name)
-        game = clemgame.load_game(game_spec, instances_name=instances_name)
-        logger.info(f'Running {game_spec["game_name"]} (models={player_models if player_models is not None else "see experiment configs"})')
-        stdout_logger.info(f"Running game {game_spec['game_name']}")
-        if experiment_name:
-            logger.info("Only running experiment: %s", experiment_name)
-            game.filter_experiment.append(experiment_name)
-        time_start = datetime.now()
-        game.run(player_models=player_models, results_dir=results_dir)
-        time_end = datetime.now()
-        logger.info(f'Running {game_spec["game_name"]} took {str(time_end - time_start)}')
+        game_specs = clemgame.select_game(game)
+        print("Matched game specs in registry:", " ".join([game_spec.game_name for game_spec in game_specs]))
+        for game_spec in game_specs:
+            game_benchmark = clemgame.load_game(game_spec, instances_name=instances_name)
+            logger.info(
+                f'Running {game_spec["game_name"]} (models={player_models if player_models is not None else "see experiment configs"})')
+            stdout_logger.info(f"Running game {game_spec['game_name']}")
+            if experiment_name:  # leaving this as-is for now, needs discussion conclusions
+                logger.info("Only running experiment: %s", experiment_name)
+                game_benchmark.filter_experiment.append(experiment_name)
+            time_start = datetime.now()
+            game_benchmark.run(player_models=player_models, results_dir=results_dir)
+            time_end = datetime.now()
+            logger.info(f'Running {game_spec["game_name"]} took {str(time_end - time_start)}')
+
     except Exception as e:
         stdout_logger.exception(e)
         logger.error(e, exc_info=True)
 
 
-def score(game_name: str, experiment_name: str = None, results_dir: str = None):
+def score(game: Union[str, Dict, GameSpec], experiment_name: str = None, results_dir: str = None):
     """Calculate scores from a game benchmark run's records and store score files.
     Args:
-        game_name: Name of the game, matching the game's name in the game registry.
+        game: Name of the game, matching the game's name in the game registry, OR GameSpec-like dict, OR GameSpec.
         experiment_name: Name of the experiment to score. Corresponds to the experiment directory in each player pair
             subdirectory in the results directory.
         results_dir: Path to the results directory in which the benchmark records are stored.
     """
-    logger.info(f"Scoring game {game_name}")
-    stdout_logger.info(f"Scoring game {game_name}")
+    logger.info(f"Scoring game {game}")
+    stdout_logger.info(f"Scoring game {game}")
 
     if experiment_name:
         logger.info("Only scoring experiment: %s", experiment_name)
-    game_spec = clemgame.select_game(game_name)
-    try:
-        game = clemgame.load_game(game_spec, do_setup=False)
-        if experiment_name:
-            game.filter_experiment.append(experiment_name)
-        time_start = datetime.now()
-        game.compute_scores(results_dir)
-        time_end = datetime.now()
-        logger.info(f"Scoring {game.game_name} took {str(time_end - time_start)}")
-    except Exception as e:
-        stdout_logger.exception(e)
-        logger.error(e, exc_info=True)
-
-
-def transcripts(game_name: str, experiment_name: str = None, results_dir: str = None):
+    game_specs = clemgame.select_game(game)
+    for game_spec in game_specs:
+        try:
+            game = clemgame.load_game(game_spec, do_setup=False)
+            if experiment_name:
+                game.filter_experiment.append(experiment_name)
+            time_start = datetime.now()
+            game.compute_scores(results_dir)
+            time_end = datetime.now()
+            logger.info(f"Scoring {game.game_name} took {str(time_end - time_start)}")
+        except Exception as e:
+            stdout_logger.exception(e)
+            logger.error(e, exc_info=True)
+
+
+def transcripts(game: Union[str, Dict, GameSpec], experiment_name: str = None, results_dir: str = None):
     """Create episode transcripts from a game benchmark run's records and store transcript files.
     Args:
-        game_name: Name of the game, matching the game's name in the game registry.
+        game: Name of the game, matching the game's name in the game registry, OR GameSpec-like dict, OR GameSpec.
         experiment_name: Name of the experiment to score. Corresponds to the experiment directory in each player pair
             subdirectory in the results directory.
         results_dir: Path to the results directory in which the benchmark records are stored.
     """
-    logger.info(f"Transcribing game {game_name}")
-    stdout_logger.info(f"Transcribing game {game_name}")
+    logger.info(f"Transcribing game {game}")
+    stdout_logger.info(f"Transcribing game {game}")
     if experiment_name:
         logger.info("Only transcribing experiment: %s", experiment_name)
-    game_spec = clemgame.select_game(game_name)
-    try:
-        game = clemgame.load_game(game_spec, do_setup=False)
-        if experiment_name:
-            game.filter_experiment.append(experiment_name)
-        time_start = datetime.now()
-        game.build_transcripts(results_dir)
-        time_end = datetime.now()
-        logger.info(f"Building transcripts for {game.game_name} took {str(time_end - time_start)}")
-    except Exception as e:
-        stdout_logger.exception(e)
-        logger.error(e, exc_info=True)
-
+    game_specs = clemgame.select_game(game)
+    for game_spec in game_specs:
+        try:
+            game = clemgame.load_game(game_spec, do_setup=False)
+            if experiment_name:
+                game.filter_experiment.append(experiment_name)
+            time_start = datetime.now()
+            game.build_transcripts(results_dir)
+            time_end = datetime.now()
+            logger.info(f"Building transcripts for {game.game_name} took {str(time_end - time_start)}")
+        except Exception as e:
+            stdout_logger.exception(e)
+            logger.error(e, exc_info=True)