From 48e302e8924751198e4e40431426cf67cae10010 Mon Sep 17 00:00:00 2001 From: Tom Silver Date: Tue, 14 Nov 2023 13:51:27 -0500 Subject: [PATCH 1/7] pin openai dependency (#1580) --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 54e749820e..1de2d2da9e 100644 --- a/setup.py +++ b/setup.py @@ -25,7 +25,7 @@ "pybullet>=3.2.0", "scikit-learn", "graphlib-backport", - "openai", + "openai==0.28.1", "pyyaml", "pylint==2.14.5", "types-PyYAML", From c1d177857431e16ca7e930c2edcab153b2e480e2 Mon Sep 17 00:00:00 2001 From: Nishanth Kumar Date: Tue, 14 Nov 2023 14:03:43 -0500 Subject: [PATCH 2/7] changes to produce prettier grid row graphs (#1577) --- predicators/settings.py | 2 +- scripts/configs/active_sampler_learning.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/predicators/settings.py b/predicators/settings.py index a856581fe3..e2febdbd8c 100644 --- a/predicators/settings.py +++ b/predicators/settings.py @@ -662,7 +662,7 @@ def get_arg_specific_settings(cls, args: Dict[str, Any]) -> Dict[str, Any]: # the horizon to be shorter. "touch_point": 15, # Ditto for the simple grid row environment. - "grid_row": cls.grid_row_num_cells + 5, + "grid_row": cls.grid_row_num_cells + 2, })[args.get("env", "")], # Maximum number of steps to roll out an option policy. diff --git a/scripts/configs/active_sampler_learning.yaml b/scripts/configs/active_sampler_learning.yaml index 082135e0a8..83d8679260 100644 --- a/scripts/configs/active_sampler_learning.yaml +++ b/scripts/configs/active_sampler_learning.yaml @@ -42,7 +42,7 @@ ENVS: grid_row: NAME: "grid_row" FLAGS: - max_num_steps_interaction_request: 500 + max_num_steps_interaction_request: 150 active_sampler_learning_explore_length_base: 100000 # effectively disable active_sampler_learning_feature_selection: all ball_and_cup_sticky_table: From e6be2c032c62bfa2e88559e91fbc733d602926ab Mon Sep 17 00:00:00 2001 From: Tom Silver Date: Thu, 16 Nov 2023 11:40:48 -0500 Subject: [PATCH 3/7] add functionality for rendering videos within cogman, rather than within the environment (#1581) --- predicators/args.py | 1 + predicators/cogman.py | 21 ++++++++++++++++++++- predicators/perception/base_perceiver.py | 8 +++++++- predicators/perception/kitchen_perceiver.py | 6 +++++- predicators/perception/sokoban_perceiver.py | 6 +++++- predicators/perception/trivial_perceiver.py | 12 +++++++++++- tests/envs/test_sokoban.py | 2 ++ tests/test_main.py | 6 +++--- 8 files changed, 54 insertions(+), 8 deletions(-) diff --git a/predicators/args.py b/predicators/args.py index 2de1ec07ed..56a085a454 100644 --- a/predicators/args.py +++ b/predicators/args.py @@ -28,6 +28,7 @@ def create_arg_parser(env_required: bool = True, parser.add_argument("--make_failure_videos", action="store_true") parser.add_argument("--make_interaction_videos", action="store_true") parser.add_argument("--make_demo_videos", action="store_true") + parser.add_argument("--make_cogman_videos", action="store_true") parser.add_argument("--load_approach", action="store_true") # In the case of online learning approaches, load_approach by itself # will try to load an approach on *every* online learning cycle. diff --git a/predicators/cogman.py b/predicators/cogman.py index e73b48bfda..62a28f5193 100644 --- a/predicators/cogman.py +++ b/predicators/cogman.py @@ -11,13 +11,14 @@ import logging from typing import Callable, List, Optional, Sequence, Set +from predicators import utils from predicators.approaches import BaseApproach from predicators.execution_monitoring import BaseExecutionMonitor from predicators.perception import BasePerceiver from predicators.settings import CFG from predicators.structs import Action, Dataset, EnvironmentTask, GroundAtom, \ InteractionRequest, InteractionResult, LowLevelTrajectory, Metrics, \ - Observation, State, Task + Observation, State, Task, Video class CogMan: @@ -32,13 +33,18 @@ def __init__(self, approach: BaseApproach, perceiver: BasePerceiver, self._current_goal: Optional[Set[GroundAtom]] = None self._override_policy: Optional[Callable[[State], Action]] = None self._termination_fn: Optional[Callable[[State], bool]] = None + self._current_env_task: Optional[EnvironmentTask] = None self._episode_state_history: List[State] = [] self._episode_action_history: List[Action] = [] + self._episode_images: Video = [] + self._episode_num = -1 def reset(self, env_task: EnvironmentTask) -> None: """Start a new episode of environment interaction.""" logging.info("[CogMan] Reset called.") + self._episode_num += 1 task = self._perceiver.reset(env_task) + self._current_env_task = env_task self._current_goal = task.goal self._reset_policy(task) self._exec_monitor.reset(task) @@ -46,10 +52,19 @@ def reset(self, env_task: EnvironmentTask) -> None: self._approach.get_execution_monitoring_info()) self._episode_state_history = [task.init] self._episode_action_history = [] + self._episode_images = [] + if CFG.make_cogman_videos: + imgs = self._perceiver.render_mental_images(task.init, env_task) + self._episode_images.extend(imgs) def step(self, observation: Observation) -> Optional[Action]: """Receive an observation and produce an action, or None for done.""" state = self._perceiver.step(observation) + if CFG.make_cogman_videos: + assert self._current_env_task is not None + imgs = self._perceiver.render_mental_images( + state, self._current_env_task) + self._episode_images.extend(imgs) # Replace the first step because the state was already added in reset(). if not self._episode_action_history: self._episode_state_history[0] = state @@ -86,6 +101,10 @@ def finish_episode(self, observation: Observation) -> None: self._episode_action_history): state = self._perceiver.step(observation) self._episode_state_history.append(state) + if CFG.make_cogman_videos: + save_prefix = utils.get_config_path_str() + outfile = f"{save_prefix}__cogman__episode{self._episode_num}.mp4" + utils.save_video(outfile, self._episode_images) # The methods below provide an interface to the approach. In the future, # we may want to move some of these methods into cogman properly, e.g., diff --git a/predicators/perception/base_perceiver.py b/predicators/perception/base_perceiver.py index bca657e78b..3bf36e23d1 100644 --- a/predicators/perception/base_perceiver.py +++ b/predicators/perception/base_perceiver.py @@ -2,7 +2,8 @@ import abc -from predicators.structs import EnvironmentTask, Observation, State, Task +from predicators.structs import EnvironmentTask, Observation, State, Task, \ + Video class BasePerceiver(abc.ABC): @@ -20,3 +21,8 @@ def reset(self, env_task: EnvironmentTask) -> Task: @abc.abstractmethod def step(self, observation: Observation) -> State: """Produce a State given the current and past observations.""" + + @abc.abstractmethod + def render_mental_images(self, observation: Observation, + env_task: EnvironmentTask) -> Video: + """Create mental images for the given observation.""" diff --git a/predicators/perception/kitchen_perceiver.py b/predicators/perception/kitchen_perceiver.py index e8b6dfdb48..0aa179b90b 100644 --- a/predicators/perception/kitchen_perceiver.py +++ b/predicators/perception/kitchen_perceiver.py @@ -3,7 +3,7 @@ from predicators.envs.kitchen import KitchenEnv from predicators.perception.base_perceiver import BasePerceiver from predicators.structs import EnvironmentTask, GroundAtom, Observation, \ - State, Task + State, Task, Video class KitchenPerceiver(BasePerceiver): @@ -49,3 +49,7 @@ def step(self, observation: Observation) -> State: def _observation_to_state(self, obs: Observation) -> State: return KitchenEnv.state_info_to_state(obs["state_info"]) + + def render_mental_images(self, observation: Observation, + env_task: EnvironmentTask) -> Video: + raise NotImplementedError("Mental images not implemented for kitchen") diff --git a/predicators/perception/sokoban_perceiver.py b/predicators/perception/sokoban_perceiver.py index 428c9c21b6..e633d4c802 100644 --- a/predicators/perception/sokoban_perceiver.py +++ b/predicators/perception/sokoban_perceiver.py @@ -8,7 +8,7 @@ from predicators.envs.sokoban import SokobanEnv from predicators.perception.base_perceiver import BasePerceiver from predicators.structs import EnvironmentTask, GroundAtom, Object, \ - Observation, State, Task + Observation, State, Task, Video # Each observation is a tuple of four 2D boolean masks (numpy arrays). # The order is: free, goals, boxes, player. @@ -95,3 +95,7 @@ def _get_object_name(r: int, c: int, type_name: str) -> str: state = utils.create_state_from_dict(state_dict) return state + + def render_mental_images(self, observation: Observation, + env_task: EnvironmentTask) -> Video: + raise NotImplementedError("Mental images not implemented for sokoban") diff --git a/predicators/perception/trivial_perceiver.py b/predicators/perception/trivial_perceiver.py index 345d44f63a..bed17f1b0c 100644 --- a/predicators/perception/trivial_perceiver.py +++ b/predicators/perception/trivial_perceiver.py @@ -1,7 +1,10 @@ """A trivial perceiver that assumes observations are already states.""" +from predicators.envs import get_or_create_env from predicators.perception.base_perceiver import BasePerceiver -from predicators.structs import EnvironmentTask, Observation, State, Task +from predicators.settings import CFG +from predicators.structs import EnvironmentTask, Observation, State, Task, \ + Video class TrivialPerceiver(BasePerceiver): @@ -17,3 +20,10 @@ def reset(self, env_task: EnvironmentTask) -> Task: def step(self, observation: Observation) -> State: assert isinstance(observation, State) return observation + + def render_mental_images(self, observation: Observation, + env_task: EnvironmentTask) -> Video: + # Use the environment's render function by default. + assert isinstance(observation, State) + env = get_or_create_env(CFG.env) + return env.render_state(observation, env_task) diff --git a/tests/envs/test_sokoban.py b/tests/envs/test_sokoban.py index 0f0e120637..c0caf0008f 100644 --- a/tests/envs/test_sokoban.py +++ b/tests/envs/test_sokoban.py @@ -78,6 +78,8 @@ def test_sokoban(): imgs = env.render() assert len(imgs) == 1 task = perceiver.reset(env_task) + with pytest.raises(NotImplementedError): + perceiver.render_mental_images(env_task.init_obs, env_task) state = task.init atoms = utils.abstract(state, env.predicates) num_boxes = len({a for a in atoms if a.predicate == IsBox}) diff --git a/tests/test_main.py b/tests/test_main.py index 7f0a7a3a0a..6b6f85d051 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -117,9 +117,9 @@ def test_main(): eval_traj_dir = os.path.join(parent_dir, "_fake_trajs") sys.argv = [ "dummy", "--env", "cover", "--approach", "oracle", "--seed", "123", - "--make_test_videos", "--num_test_tasks", "1", "--video_dir", - video_dir, "--results_dir", results_dir, "--eval_trajectories_dir", - eval_traj_dir + "--make_test_videos", "--make_cogman_videos", "--num_test_tasks", "1", + "--video_dir", video_dir, "--results_dir", results_dir, + "--eval_trajectories_dir", eval_traj_dir ] main() # Test making videos of failures and local logging. From 97b961e3f6af8b5ed8aa40f8a9ea0dd98dbb86f0 Mon Sep 17 00:00:00 2001 From: Tom Silver Date: Tue, 21 Nov 2023 09:50:39 -0500 Subject: [PATCH 4/7] add info to FD crashes (#1582) --- predicators/planning.py | 3 ++- tests/approaches/test_oracle_approach.py | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/predicators/planning.py b/predicators/planning.py index 4f398ecd9e..eb1337d521 100644 --- a/predicators/planning.py +++ b/predicators/planning.py @@ -1060,7 +1060,8 @@ def fd_plan_from_sas_file( metrics: Metrics = defaultdict(float) num_nodes_expanded = re.findall(r"Expanded (\d+) state", output) num_nodes_created = re.findall(r"Evaluated (\d+) state", output) - assert len(num_nodes_expanded) == 1 + if len(num_nodes_expanded) != 1: + raise PlanningFailure(f"Plan not found with FD! Error: {output}") assert len(num_nodes_created) == 1 metrics["num_nodes_expanded"] = float(num_nodes_expanded[0]) metrics["num_nodes_created"] = float(num_nodes_created[0]) diff --git a/tests/approaches/test_oracle_approach.py b/tests/approaches/test_oracle_approach.py index 13c8ec3454..a5ca837a22 100644 --- a/tests/approaches/test_oracle_approach.py +++ b/tests/approaches/test_oracle_approach.py @@ -754,6 +754,7 @@ def simulate(self, state, action): return self._transition_stack(state, x, y, z) env = _ExternalBlocksEnv() + assert env.get_name() == "external_blocks" # Create external options by modifying blocks options. options = set() From 0bdd180027fa8fefd56ae531731c121d8e9dd511 Mon Sep 17 00:00:00 2001 From: Tom Silver Date: Tue, 21 Nov 2023 10:45:25 -0500 Subject: [PATCH 5/7] disable flakey tests (#1586) --- .coveragerc | 2 ++ tests/approaches/test_pg3_analogy_approach.py | 10 ++++++++-- tests/envs/test_pybullet_cover.py | 8 ++++++-- 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/.coveragerc b/.coveragerc index b3cb5ad766..6e0c85841b 100644 --- a/.coveragerc +++ b/.coveragerc @@ -4,6 +4,8 @@ omit = predicators/envs/kitchen.py predicators/perception/kitchen_perceiver.py predicators/ground_truth_models/kitchen/** + # Currently disabled due to flakiness in the SME dependency. + predicators/approaches/sme_pg3_analogy_approach.py [report] # Regexes for lines to exclude from consideration diff --git a/tests/approaches/test_pg3_analogy_approach.py b/tests/approaches/test_pg3_analogy_approach.py index 55700e3b3c..52c9cdb079 100644 --- a/tests/approaches/test_pg3_analogy_approach.py +++ b/tests/approaches/test_pg3_analogy_approach.py @@ -145,8 +145,14 @@ def test_pg3_analogy_approach(): )""" -def test_find_env_analogies(): - """Tests for _find_env_analogies().""" +def _disabled_test_find_env_analogies(): # pragma: no cover + """Tests for _find_env_analogies(). + + NOTE: this test is currently disabled because of sudden flakiness in the + SME depedency, despite no changes for months. Since we're not actively + using this code, we're just disabling it, but leaving it here in case we + do want to resurrect the code in the future. + """ # Test for gripper -> ferry. base_env = create_new_env("pddl_gripper_procedural_tasks") base_nsrts = get_gt_nsrts(base_env.get_name(), base_env.predicates, diff --git a/tests/envs/test_pybullet_cover.py b/tests/envs/test_pybullet_cover.py index 0794b6c010..c8df9aae8a 100644 --- a/tests/envs/test_pybullet_cover.py +++ b/tests/envs/test_pybullet_cover.py @@ -166,8 +166,12 @@ def test_pybullet_cover_step(env): assert abs(state.get(block, "pose") - 0.75) < 0.01 -def test_pybullet_cover_pick_workspace_bounds(env): - """Tests for picking at workspace bounds in PyBulletCoverEnv.""" +def _disabled_test_pybullet_cover_pick_workspace_bounds( + env): # pragma: no cover + """Tests for picking at workspace bounds in PyBulletCoverEnv. + + This is currently disabled due to nondeterminism issues in IK. + """ block = Object("block0", env.block_type) robot = env.robot workspace_x, workspace_z = env.workspace_dimensions From 5c029fc2dbdeb24f470198121a5677e02dc01e36 Mon Sep 17 00:00:00 2001 From: Tom Silver Date: Tue, 21 Nov 2023 11:00:51 -0500 Subject: [PATCH 6/7] Remove dead email and add NJK email in README (#1583) with Rohan's blessing --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index c411f3207d..39fb557e9c 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ ## Repository Description -This codebase implements a framework for *bilevel planning with learned neuro-symbolic relational abstractions*, as described in [this paper](https://arxiv.org/abs/2203.09634). Several features are concurrently under active development. **Please contact and before attempting to use it for your own research.** In particular, this codebase aims to ultimately provide an integrated system for learning the ingredients of search-then-sample bilevel planning with learned abstractions. That includes: options, predicates, operators, and samplers. +This codebase implements a framework for *bilevel planning with learned neuro-symbolic relational abstractions*, as described in [this paper](https://arxiv.org/abs/2203.09634). Several features are concurrently under active development. **Please contact or before attempting to use it for your own research.** In particular, this codebase aims to ultimately provide an integrated system for learning the ingredients of search-then-sample bilevel planning with learned abstractions. That includes: options, predicates, operators, and samplers. ### Code Structure From 8f1630324d95440f0fa023a124dbcabd03c1f635 Mon Sep 17 00:00:00 2001 From: Tom Silver Date: Tue, 21 Nov 2023 11:23:41 -0500 Subject: [PATCH 7/7] handle planning failures within task planning in active sampler explorer (#1584) --- .../explorers/active_sampler_explorer.py | 48 +++++++++++-------- 1 file changed, 29 insertions(+), 19 deletions(-) diff --git a/predicators/explorers/active_sampler_explorer.py b/predicators/explorers/active_sampler_explorer.py index 6f23e7ebff..898f772a35 100644 --- a/predicators/explorers/active_sampler_explorer.py +++ b/predicators/explorers/active_sampler_explorer.py @@ -57,7 +57,10 @@ def __init__(self, predicates: Set[Predicate], self._last_executed_nsrt: Optional[_GroundNSRT] = None self._nsrt_to_explorer_sampler = nsrt_to_explorer_sampler self._seen_train_task_idxs = seen_train_task_idxs - self._task_plan_cache: Dict[_TaskID, List[_GroundSTRIPSOperator]] = {} + # If the plan is None, that means none can be found, e.g., due to + # timeouts or dead-ends. + self._task_plan_cache: Dict[ + _TaskID, Optional[List[_GroundSTRIPSOperator]]] = {} self._task_plan_calls_since_replan: Dict[_TaskID, int] = {} self._sorted_options = sorted(options, key=lambda o: o.name) @@ -410,17 +413,20 @@ def _score_ground_op_planning_progress( replan_task_ids = [("replan", i) for i in range(len(num_replan_tasks))] for task_id in train_task_ids + replan_task_ids: plan = self._get_task_plan_for_task(task_id, ground_op_costs) - task_plan_costs = [] - for op in plan: - op_cost = ground_op_costs.get(op, self._default_cost) - task_plan_costs.append(op_cost) - plan_costs.append(sum(task_plan_costs)) + # If no plan can be found for a task, the task is just ignored. + if plan is not None: + task_plan_costs = [] + for op in plan: + op_cost = ground_op_costs.get(op, self._default_cost) + task_plan_costs.append(op_cost) + plan_costs.append(sum(task_plan_costs)) return -sum(plan_costs) # higher scores are better def _get_task_plan_for_task( self, task_id: _TaskID, ground_op_costs: Dict[_GroundSTRIPSOperator, float] - ) -> List[_GroundSTRIPSOperator]: + ) -> Optional[List[_GroundSTRIPSOperator]]: + """Returns None if no task plan can be found.""" # Optimization: only re-plan at a certain frequency. replan_freq = CFG.active_sampler_explorer_replan_frequency if task_id not in self._task_plan_calls_since_replan or \ @@ -433,18 +439,22 @@ def _get_task_plan_for_task( "train": self._train_tasks, "replan": self._replanning_tasks, }[task_type][task_idx] - plan, _, _ = run_task_plan_once( - task, - self._nsrts, - self._predicates, - self._types, - timeout, - self._seed, - task_planning_heuristic=task_planning_heuristic, - ground_op_costs=ground_op_costs, - default_cost=self._default_cost, - max_horizon=np.inf) - self._task_plan_cache[task_id] = [n.op for n in plan] + try: + plan, _, _ = run_task_plan_once( + task, + self._nsrts, + self._predicates, + self._types, + timeout, + self._seed, + task_planning_heuristic=task_planning_heuristic, + ground_op_costs=ground_op_costs, + default_cost=self._default_cost, + max_horizon=np.inf) + self._task_plan_cache[task_id] = [n.op for n in plan] + except (PlanningFailure, PlanningTimeout): # pragma: no cover + logging.info("WARNING: task planning failed in the explorer.") + self._task_plan_cache[task_id] = None self._task_plan_calls_since_replan[task_id] += 1 return self._task_plan_cache[task_id]