Merge branch 'master' into split-fail-focus

Learning-and-Intelligent-Systems · Nov 21, 2023 · c89c8c8 · c89c8c8
2 parents 3bf536e + 8f16303
commit c89c8c8
Show file tree

Hide file tree

Showing 18 changed files with 106 additions and 36 deletions.
diff --git a/.coveragerc b/.coveragerc
@@ -4,6 +4,8 @@ omit =
     predicators/envs/kitchen.py
     predicators/perception/kitchen_perceiver.py
     predicators/ground_truth_models/kitchen/**
+    # Currently disabled due to flakiness in the SME dependency.
+    predicators/approaches/sme_pg3_analogy_approach.py
 
 [report]
 # Regexes for lines to exclude from consideration

diff --git a/README.md b/README.md
@@ -2,7 +2,7 @@
 
 ## Repository Description
 
-This codebase implements a framework for *bilevel planning with learned neuro-symbolic relational abstractions*, as described in [this paper](https://arxiv.org/abs/2203.09634). Several features are concurrently under active development. **Please contact <[email protected]> and <ronuchit@mit.edu> before attempting to use it for your own research.** In particular, this codebase aims to ultimately provide an integrated system for learning the ingredients of search-then-sample bilevel planning with learned abstractions. That includes: options, predicates, operators, and samplers. 
+This codebase implements a framework for *bilevel planning with learned neuro-symbolic relational abstractions*, as described in [this paper](https://arxiv.org/abs/2203.09634). Several features are concurrently under active development. **Please contact <[email protected]> or <njk@mit.edu> before attempting to use it for your own research.** In particular, this codebase aims to ultimately provide an integrated system for learning the ingredients of search-then-sample bilevel planning with learned abstractions. That includes: options, predicates, operators, and samplers. 
 
 ### Code Structure
 

diff --git a/predicators/args.py b/predicators/args.py
@@ -28,6 +28,7 @@ def create_arg_parser(env_required: bool = True,
     parser.add_argument("--make_failure_videos", action="store_true")
     parser.add_argument("--make_interaction_videos", action="store_true")
     parser.add_argument("--make_demo_videos", action="store_true")
+    parser.add_argument("--make_cogman_videos", action="store_true")
     parser.add_argument("--load_approach", action="store_true")
     # In the case of online learning approaches, load_approach by itself
     # will try to load an approach on *every* online learning cycle.

diff --git a/predicators/cogman.py b/predicators/cogman.py
@@ -11,13 +11,14 @@
 import logging
 from typing import Callable, List, Optional, Sequence, Set
 
+from predicators import utils
 from predicators.approaches import BaseApproach
 from predicators.execution_monitoring import BaseExecutionMonitor
 from predicators.perception import BasePerceiver
 from predicators.settings import CFG
 from predicators.structs import Action, Dataset, EnvironmentTask, GroundAtom, \
     InteractionRequest, InteractionResult, LowLevelTrajectory, Metrics, \
-    Observation, State, Task
+    Observation, State, Task, Video
 
 
 class CogMan:
@@ -32,24 +33,38 @@ def __init__(self, approach: BaseApproach, perceiver: BasePerceiver,
         self._current_goal: Optional[Set[GroundAtom]] = None
         self._override_policy: Optional[Callable[[State], Action]] = None
         self._termination_fn: Optional[Callable[[State], bool]] = None
+        self._current_env_task: Optional[EnvironmentTask] = None
         self._episode_state_history: List[State] = []
         self._episode_action_history: List[Action] = []
+        self._episode_images: Video = []
+        self._episode_num = -1
 
     def reset(self, env_task: EnvironmentTask) -> None:
         """Start a new episode of environment interaction."""
         logging.info("[CogMan] Reset called.")
+        self._episode_num += 1
         task = self._perceiver.reset(env_task)
+        self._current_env_task = env_task
         self._current_goal = task.goal
         self._reset_policy(task)
         self._exec_monitor.reset(task)
         self._exec_monitor.update_approach_info(
             self._approach.get_execution_monitoring_info())
         self._episode_state_history = [task.init]
         self._episode_action_history = []
+        self._episode_images = []
+        if CFG.make_cogman_videos:
+            imgs = self._perceiver.render_mental_images(task.init, env_task)
+            self._episode_images.extend(imgs)
 
     def step(self, observation: Observation) -> Optional[Action]:
         """Receive an observation and produce an action, or None for done."""
         state = self._perceiver.step(observation)
+        if CFG.make_cogman_videos:
+            assert self._current_env_task is not None
+            imgs = self._perceiver.render_mental_images(
+                state, self._current_env_task)
+            self._episode_images.extend(imgs)
         # Replace the first step because the state was already added in reset().
         if not self._episode_action_history:
             self._episode_state_history[0] = state
@@ -86,6 +101,10 @@ def finish_episode(self, observation: Observation) -> None:
                 self._episode_action_history):
             state = self._perceiver.step(observation)
             self._episode_state_history.append(state)
+        if CFG.make_cogman_videos:
+            save_prefix = utils.get_config_path_str()
+            outfile = f"{save_prefix}__cogman__episode{self._episode_num}.mp4"
+            utils.save_video(outfile, self._episode_images)
 
     # The methods below provide an interface to the approach. In the future,
     # we may want to move some of these methods into cogman properly, e.g.,

diff --git a/predicators/explorers/active_sampler_explorer.py b/predicators/explorers/active_sampler_explorer.py
@@ -57,7 +57,10 @@ def __init__(self, predicates: Set[Predicate],
         self._last_executed_nsrt: Optional[_GroundNSRT] = None
         self._nsrt_to_explorer_sampler = nsrt_to_explorer_sampler
         self._seen_train_task_idxs = seen_train_task_idxs
-        self._task_plan_cache: Dict[_TaskID, List[_GroundSTRIPSOperator]] = {}
+        # If the plan is None, that means none can be found, e.g., due to
+        # timeouts or dead-ends.
+        self._task_plan_cache: Dict[
+            _TaskID, Optional[List[_GroundSTRIPSOperator]]] = {}
         self._task_plan_calls_since_replan: Dict[_TaskID, int] = {}
         self._sorted_options = sorted(options, key=lambda o: o.name)
 
@@ -412,17 +415,20 @@ def _score_ground_op_planning_progress(
         replan_task_ids = [("replan", i) for i in range(len(num_replan_tasks))]
         for task_id in train_task_ids + replan_task_ids:
             plan = self._get_task_plan_for_task(task_id, ground_op_costs)
-            task_plan_costs = []
-            for op in plan:
-                op_cost = ground_op_costs.get(op, self._default_cost)
-                task_plan_costs.append(op_cost)
-            plan_costs.append(sum(task_plan_costs))
+            # If no plan can be found for a task, the task is just ignored.
+            if plan is not None:
+                task_plan_costs = []
+                for op in plan:
+                    op_cost = ground_op_costs.get(op, self._default_cost)
+                    task_plan_costs.append(op_cost)
+                plan_costs.append(sum(task_plan_costs))
         return -sum(plan_costs)  # higher scores are better
 
     def _get_task_plan_for_task(
         self, task_id: _TaskID, ground_op_costs: Dict[_GroundSTRIPSOperator,
                                                       float]
-    ) -> List[_GroundSTRIPSOperator]:
+    ) -> Optional[List[_GroundSTRIPSOperator]]:
+        """Returns None if no task plan can be found."""
         # Optimization: only re-plan at a certain frequency.
         replan_freq = CFG.active_sampler_explorer_replan_frequency
         if task_id not in self._task_plan_calls_since_replan or \
@@ -435,18 +441,22 @@ def _get_task_plan_for_task(
                 "train": self._train_tasks,
                 "replan": self._replanning_tasks,
             }[task_type][task_idx]
-            plan, _, _ = run_task_plan_once(
-                task,
-                self._nsrts,
-                self._predicates,
-                self._types,
-                timeout,
-                self._seed,
-                task_planning_heuristic=task_planning_heuristic,
-                ground_op_costs=ground_op_costs,
-                default_cost=self._default_cost,
-                max_horizon=np.inf)
-            self._task_plan_cache[task_id] = [n.op for n in plan]
+            try:
+                plan, _, _ = run_task_plan_once(
+                    task,
+                    self._nsrts,
+                    self._predicates,
+                    self._types,
+                    timeout,
+                    self._seed,
+                    task_planning_heuristic=task_planning_heuristic,
+                    ground_op_costs=ground_op_costs,
+                    default_cost=self._default_cost,
+                    max_horizon=np.inf)
+                self._task_plan_cache[task_id] = [n.op for n in plan]
+            except (PlanningFailure, PlanningTimeout):  # pragma: no cover
+                logging.info("WARNING: task planning failed in the explorer.")
+                self._task_plan_cache[task_id] = None
 
         self._task_plan_calls_since_replan[task_id] += 1
         return self._task_plan_cache[task_id]

diff --git a/predicators/perception/base_perceiver.py b/predicators/perception/base_perceiver.py
@@ -2,7 +2,8 @@
 
 import abc
 
-from predicators.structs import EnvironmentTask, Observation, State, Task
+from predicators.structs import EnvironmentTask, Observation, State, Task, \
+    Video
 
 
 class BasePerceiver(abc.ABC):
@@ -20,3 +21,8 @@ def reset(self, env_task: EnvironmentTask) -> Task:
     @abc.abstractmethod
     def step(self, observation: Observation) -> State:
         """Produce a State given the current and past observations."""
+
+    @abc.abstractmethod
+    def render_mental_images(self, observation: Observation,
+                             env_task: EnvironmentTask) -> Video:
+        """Create mental images for the given observation."""
diff --git a/predicators/perception/kitchen_perceiver.py b/predicators/perception/kitchen_perceiver.py
@@ -3,7 +3,7 @@
 from predicators.envs.kitchen import KitchenEnv
 from predicators.perception.base_perceiver import BasePerceiver
 from predicators.structs import EnvironmentTask, GroundAtom, Observation, \
-    State, Task
+    State, Task, Video
 
 
 class KitchenPerceiver(BasePerceiver):
@@ -49,3 +49,7 @@ def step(self, observation: Observation) -> State:
 
     def _observation_to_state(self, obs: Observation) -> State:
         return KitchenEnv.state_info_to_state(obs["state_info"])
+
+    def render_mental_images(self, observation: Observation,
+                             env_task: EnvironmentTask) -> Video:
+        raise NotImplementedError("Mental images not implemented for kitchen")
diff --git a/predicators/perception/sokoban_perceiver.py b/predicators/perception/sokoban_perceiver.py
@@ -8,7 +8,7 @@
 from predicators.envs.sokoban import SokobanEnv
 from predicators.perception.base_perceiver import BasePerceiver
 from predicators.structs import EnvironmentTask, GroundAtom, Object, \
-    Observation, State, Task
+    Observation, State, Task, Video
 
 # Each observation is a tuple of four 2D boolean masks (numpy arrays).
 # The order is: free, goals, boxes, player.
@@ -95,3 +95,7 @@ def _get_object_name(r: int, c: int, type_name: str) -> str:
 
         state = utils.create_state_from_dict(state_dict)
         return state
+
+    def render_mental_images(self, observation: Observation,
+                             env_task: EnvironmentTask) -> Video:
+        raise NotImplementedError("Mental images not implemented for sokoban")
diff --git a/predicators/perception/trivial_perceiver.py b/predicators/perception/trivial_perceiver.py
@@ -1,7 +1,10 @@
 """A trivial perceiver that assumes observations are already states."""
 
+from predicators.envs import get_or_create_env
 from predicators.perception.base_perceiver import BasePerceiver
-from predicators.structs import EnvironmentTask, Observation, State, Task
+from predicators.settings import CFG
+from predicators.structs import EnvironmentTask, Observation, State, Task, \
+    Video
 
 
 class TrivialPerceiver(BasePerceiver):
@@ -17,3 +20,10 @@ def reset(self, env_task: EnvironmentTask) -> Task:
     def step(self, observation: Observation) -> State:
         assert isinstance(observation, State)
         return observation
+
+    def render_mental_images(self, observation: Observation,
+                             env_task: EnvironmentTask) -> Video:
+        # Use the environment's render function by default.
+        assert isinstance(observation, State)
+        env = get_or_create_env(CFG.env)
+        return env.render_state(observation, env_task)
diff --git a/predicators/planning.py b/predicators/planning.py
@@ -1060,7 +1060,8 @@ def fd_plan_from_sas_file(
     metrics: Metrics = defaultdict(float)
     num_nodes_expanded = re.findall(r"Expanded (\d+) state", output)
     num_nodes_created = re.findall(r"Evaluated (\d+) state", output)
-    assert len(num_nodes_expanded) == 1
+    if len(num_nodes_expanded) != 1:
+        raise PlanningFailure(f"Plan not found with FD! Error: {output}")
     assert len(num_nodes_created) == 1
     metrics["num_nodes_expanded"] = float(num_nodes_expanded[0])
     metrics["num_nodes_created"] = float(num_nodes_created[0])

diff --git a/predicators/settings.py b/predicators/settings.py
@@ -663,7 +663,7 @@ def get_arg_specific_settings(cls, args: Dict[str, Any]) -> Dict[str, Any]:
                     # the horizon to be shorter.
                     "touch_point": 15,
                     # Ditto for the simple grid row environment.
-                    "grid_row": cls.grid_row_num_cells + 5,
+                    "grid_row": cls.grid_row_num_cells + 2,
                 })[args.get("env", "")],
 
             # Maximum number of steps to roll out an option policy.

diff --git a/scripts/configs/active_sampler_learning.yaml b/scripts/configs/active_sampler_learning.yaml
@@ -49,7 +49,7 @@ ENVS:
   grid_row:
     NAME: "grid_row"
     FLAGS:
-      max_num_steps_interaction_request: 500
+      max_num_steps_interaction_request: 150
       active_sampler_learning_explore_length_base: 100000  # effectively disable
       active_sampler_learning_feature_selection: all
   ball_and_cup_sticky_table:

diff --git a/setup.py b/setup.py
@@ -25,7 +25,7 @@
         "pybullet>=3.2.0",
         "scikit-learn",
         "graphlib-backport",
-        "openai",
+        "openai==0.28.1",
         "pyyaml",
         "pylint==2.14.5",
         "types-PyYAML",

diff --git a/tests/approaches/test_oracle_approach.py b/tests/approaches/test_oracle_approach.py
@@ -754,6 +754,7 @@ def simulate(self, state, action):
             return self._transition_stack(state, x, y, z)
 
     env = _ExternalBlocksEnv()
+    assert env.get_name() == "external_blocks"
 
     # Create external options by modifying blocks options.
     options = set()

diff --git a/tests/approaches/test_pg3_analogy_approach.py b/tests/approaches/test_pg3_analogy_approach.py
@@ -145,8 +145,14 @@ def test_pg3_analogy_approach():
 )"""
 
 
-def test_find_env_analogies():
-    """Tests for _find_env_analogies()."""
+def _disabled_test_find_env_analogies():  # pragma: no cover
+    """Tests for _find_env_analogies().
+
+    NOTE: this test is currently disabled because of sudden flakiness in the
+    SME depedency, despite no changes for months. Since we're not actively
+    using this code, we're just disabling it, but leaving it here in case we
+    do want to resurrect the code in the future.
+    """
     # Test for gripper -> ferry.
     base_env = create_new_env("pddl_gripper_procedural_tasks")
     base_nsrts = get_gt_nsrts(base_env.get_name(), base_env.predicates,

diff --git a/tests/envs/test_pybullet_cover.py b/tests/envs/test_pybullet_cover.py
@@ -166,8 +166,12 @@ def test_pybullet_cover_step(env):
     assert abs(state.get(block, "pose") - 0.75) < 0.01
 
 
-def test_pybullet_cover_pick_workspace_bounds(env):
-    """Tests for picking at workspace bounds in PyBulletCoverEnv."""
+def _disabled_test_pybullet_cover_pick_workspace_bounds(
+        env):  # pragma: no cover
+    """Tests for picking at workspace bounds in PyBulletCoverEnv.
+
+    This is currently disabled due to nondeterminism issues in IK.
+    """
     block = Object("block0", env.block_type)
     robot = env.robot
     workspace_x, workspace_z = env.workspace_dimensions

diff --git a/tests/envs/test_sokoban.py b/tests/envs/test_sokoban.py
@@ -78,6 +78,8 @@ def test_sokoban():
     imgs = env.render()
     assert len(imgs) == 1
     task = perceiver.reset(env_task)
+    with pytest.raises(NotImplementedError):
+        perceiver.render_mental_images(env_task.init_obs, env_task)
     state = task.init
     atoms = utils.abstract(state, env.predicates)
     num_boxes = len({a for a in atoms if a.predicate == IsBox})

diff --git a/tests/test_main.py b/tests/test_main.py
@@ -117,9 +117,9 @@ def test_main():
     eval_traj_dir = os.path.join(parent_dir, "_fake_trajs")
     sys.argv = [
         "dummy", "--env", "cover", "--approach", "oracle", "--seed", "123",
-        "--make_test_videos", "--num_test_tasks", "1", "--video_dir",
-        video_dir, "--results_dir", results_dir, "--eval_trajectories_dir",
-        eval_traj_dir
+        "--make_test_videos", "--make_cogman_videos", "--num_test_tasks", "1",
+        "--video_dir", video_dir, "--results_dir", results_dir,
+        "--eval_trajectories_dir", eval_traj_dir
     ]
     main()
     # Test making videos of failures and local logging.