refactor: remove orchestrator abstraction from API (#289)

* refactor: remove orchestrator abstraction from API * Remove orchestrator in GPT-J config * Add `reward_fn` arg to NeMo constructor to match base trainer API * Initial support for `make_experience` in NeMo ILQL * Run pre-commit * Remove unused sampling util
CarperAI · Feb 10, 2023 · 81e935a · 81e935a
1 parent eb62d08
commit 81e935a
Show file tree

Hide file tree

Showing 27 changed files with 466 additions and 607 deletions.
diff --git a/configs/ilql_config.yml b/configs/ilql_config.yml
@@ -8,7 +8,6 @@ train:
   eval_interval: 100
 
   pipeline: "PromptPipeline"
-  orchestrator: "OfflineOrchestrator"
   trainer: "AccelerateILQLTrainer"
   seed: 1000
 

diff --git a/configs/nemo_ilql_config.yml b/configs/nemo_ilql_config.yml
@@ -7,7 +7,6 @@ train:
   eval_interval: 20
 
   pipeline: "PromptPipeline"
-  orchestrator: "OfflineOrchestrator"
   trainer: "NeMoILQLTrainer"
   trainer_kwargs:
     pretrained_model: "/mnt/nvme/home/uwu/nemo-megatron-gpt-20B/"

diff --git a/configs/ppo_config.yml b/configs/ppo_config.yml
@@ -8,7 +8,6 @@ train:
   eval_interval: 100
 
   pipeline: "PromptPipeline"
-  orchestrator: "PPOOrchestrator"
   trainer: "AcceleratePPOTrainer"
 
 model:

diff --git a/configs/ppo_gptj.yml b/configs/ppo_gptj.yml
@@ -8,7 +8,6 @@ train:
   eval_interval: 16
 
   pipeline: "PromptPipeline"
-  orchestrator: "PPOOrchestrator"
   trainer: "AcceleratePPOTrainer"
 
 model:

diff --git a/configs/sft_config.yml b/configs/sft_config.yml
@@ -8,7 +8,6 @@ train:
   eval_interval: 100
 
   pipeline: "PromptPipeline"
-  orchestrator: "PPOOrchestrator"
   trainer: "AccelerateSFTTrainer"
 
 model:

diff --git a/configs/test_config.yml b/configs/test_config.yml
@@ -8,7 +8,6 @@ train:
   eval_interval: 128 # eval interval
 
   pipeline: "PromptPipeline" # prompt pipeline to load
-  orchestrator: "PPOOrchestrator" # orchestrator to load
   trainer: "AcceleratePPOTrainer" # Name of model trainer to load
 
 model:
@@ -36,7 +35,7 @@ scheduler:
 method:
   name: "ppoconfig" # Name of RL method config
   num_rollouts: 128 # Number of rollouts to collect per epoch
-  chunk_size: 128 # Number of rollouts to collect in one loop of orchestrator
+  chunk_size: 128 # Number of rollouts to collect in one loop
   ppo_epochs: 4 # Number of ppo epochs
   init_kl_coef: 0.2 # init kl coefficient
   target: 6 # target kl coefficient, set None for fixed kl coef

diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -14,7 +14,6 @@ currently supports training using PPO or ILQL for models up to 20B using Acceler
 
    data
    models
-   orchestrator
    configs
    pipeline
    examples

diff --git a/docs/source/orchestrator.rst b/docs/source/orchestrator.rst
diff --git a/docs/source/pipeline.rst b/docs/source/pipeline.rst
@@ -4,7 +4,7 @@ Pipelines
 ************************
 
 Pipelines are how you read from a dataset with trlX. Rollout stores are how models store experiences created
-for them by the orchestrator. It is these experiences in their rollout store that they are trained on.
+for them. It is these experiences in their rollout store that they are trained on.
 
 **General**
 

diff --git a/examples/experiments/grounded_program_synthesis/configs/trlx_ppo_config.yml b/examples/experiments/grounded_program_synthesis/configs/trlx_ppo_config.yml
@@ -8,7 +8,6 @@ train:
   eval_interval: 16
 
   pipeline: "PromptPipeline"
-  orchestrator: "PPOOrchestrator"
   trainer: "AcceleratePPOTrainer"
 
 model:

diff --git a/examples/randomwalks/configs/ilql_randomwalks.yml b/examples/randomwalks/configs/ilql_randomwalks.yml
@@ -8,7 +8,6 @@ train:
   eval_interval: 16
 
   pipeline: "PromptPipeline"
-  orchestrator: "OfflineOrchestrator"
   trainer: "AccelerateILQLTrainer"
 
   seed: 1000

diff --git a/examples/randomwalks/configs/ppo_randomwalks.yml b/examples/randomwalks/configs/ppo_randomwalks.yml
@@ -8,7 +8,6 @@ train:
   eval_interval: 20
 
   pipeline: "PromptPipeline"
-  orchestrator: "PPOOrchestrator"
   trainer: "AcceleratePPOTrainer"
 
 model:

diff --git a/examples/summarize_daily_cnn/configs/ppo_config_cnn_daily.yml b/examples/summarize_daily_cnn/configs/ppo_config_cnn_daily.yml
@@ -9,7 +9,6 @@ train:
   save_best: False
 
   pipeline: "PromptPipeline"
-  orchestrator: "PPOOrchestrator"
   trainer: "AcceleratePPOTrainer"
 
 model:

diff --git a/examples/summarize_rlhf/configs/ppo_config_summ_gptj.yml b/examples/summarize_rlhf/configs/ppo_config_summ_gptj.yml
@@ -8,7 +8,6 @@ train:
   eval_interval: 200
 
   pipeline: "PromptPipeline"
-  orchestrator: "PPOOrchestrator"
   trainer: "AcceleratePPOTrainer"
 
 model:

diff --git a/trlx/data/__init__.py b/trlx/data/__init__.py
@@ -1,31 +1,18 @@
 from dataclasses import dataclass
-from typing import Any, Iterable
+from typing import Iterable
 
 from torchtyping import TensorType
 
-from . import configs
-
 
 @dataclass
 class GeneralElement:
     """
-    General element outputted by data pipeline being read by orchestrator.
+    General element outputted by a data pipeline
     """
 
     pass
 
 
-@dataclass
-class SimElement:
-    """
-    Batch element for Gyarados or Gyarados-like similarity scoring model
-    """
-
-    content: Any = None
-    preference: Any = None
-    score: float = None
-
-
 @dataclass
 class RLElement:
     """

diff --git a/trlx/data/configs.py b/trlx/data/configs.py
@@ -152,9 +152,6 @@ class TrainConfig:
     :param pipeline: Pipeline to use for training. One of the registered pipelines present in trlx.pipeline
     :type pipeline: str
 
-    :param orchestrator: Orchestrator to use for training. One of the registered orchestrators present in trlx.orchestrator
-    :type orchestrator: str
-
     :param trainer: Trainer to use for training. One of the registered trainers present in trlx.trainer
     :type trainer: str
 
@@ -193,7 +190,6 @@ class TrainConfig:
     eval_interval: int
 
     pipeline: str  # One of the pipelines in framework.pipeline
-    orchestrator: str  # One of the orchestrators
     trainer: str  # One of the trainers
     trainer_kwargs: Dict[str, Any] = field(default_factory=dict)  # Extra keyword arguments for the trainer
 

diff --git a/trlx/orchestrator/__init__.py b/trlx/orchestrator/__init__.py
diff --git a/trlx/orchestrator/offline_orchestrator.py b/trlx/orchestrator/offline_orchestrator.py
-Original file line number
+Diff line change
@@ Expand Up @@
        data
        models
-       orchestrator
        configs
        pipeline
        examples
@@ Expand Down @@