Skip to content

Commit

Permalink
Merge branch 'master' into split-fail-focus
Browse files Browse the repository at this point in the history
  • Loading branch information
NishanthJKumar authored Nov 21, 2023
2 parents 3bf536e + 8f16303 commit c89c8c8
Show file tree
Hide file tree
Showing 18 changed files with 106 additions and 36 deletions.
2 changes: 2 additions & 0 deletions .coveragerc
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ omit =
predicators/envs/kitchen.py
predicators/perception/kitchen_perceiver.py
predicators/ground_truth_models/kitchen/**
# Currently disabled due to flakiness in the SME dependency.
predicators/approaches/sme_pg3_analogy_approach.py

[report]
# Regexes for lines to exclude from consideration
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

## Repository Description

This codebase implements a framework for *bilevel planning with learned neuro-symbolic relational abstractions*, as described in [this paper](https://arxiv.org/abs/2203.09634). Several features are concurrently under active development. **Please contact <[email protected]> and <ronuchit@mit.edu> before attempting to use it for your own research.** In particular, this codebase aims to ultimately provide an integrated system for learning the ingredients of search-then-sample bilevel planning with learned abstractions. That includes: options, predicates, operators, and samplers.
This codebase implements a framework for *bilevel planning with learned neuro-symbolic relational abstractions*, as described in [this paper](https://arxiv.org/abs/2203.09634). Several features are concurrently under active development. **Please contact <[email protected]> or <njk@mit.edu> before attempting to use it for your own research.** In particular, this codebase aims to ultimately provide an integrated system for learning the ingredients of search-then-sample bilevel planning with learned abstractions. That includes: options, predicates, operators, and samplers.

### Code Structure

Expand Down
1 change: 1 addition & 0 deletions predicators/args.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ def create_arg_parser(env_required: bool = True,
parser.add_argument("--make_failure_videos", action="store_true")
parser.add_argument("--make_interaction_videos", action="store_true")
parser.add_argument("--make_demo_videos", action="store_true")
parser.add_argument("--make_cogman_videos", action="store_true")
parser.add_argument("--load_approach", action="store_true")
# In the case of online learning approaches, load_approach by itself
# will try to load an approach on *every* online learning cycle.
Expand Down
21 changes: 20 additions & 1 deletion predicators/cogman.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,14 @@
import logging
from typing import Callable, List, Optional, Sequence, Set

from predicators import utils
from predicators.approaches import BaseApproach
from predicators.execution_monitoring import BaseExecutionMonitor
from predicators.perception import BasePerceiver
from predicators.settings import CFG
from predicators.structs import Action, Dataset, EnvironmentTask, GroundAtom, \
InteractionRequest, InteractionResult, LowLevelTrajectory, Metrics, \
Observation, State, Task
Observation, State, Task, Video


class CogMan:
Expand All @@ -32,24 +33,38 @@ def __init__(self, approach: BaseApproach, perceiver: BasePerceiver,
self._current_goal: Optional[Set[GroundAtom]] = None
self._override_policy: Optional[Callable[[State], Action]] = None
self._termination_fn: Optional[Callable[[State], bool]] = None
self._current_env_task: Optional[EnvironmentTask] = None
self._episode_state_history: List[State] = []
self._episode_action_history: List[Action] = []
self._episode_images: Video = []
self._episode_num = -1

def reset(self, env_task: EnvironmentTask) -> None:
"""Start a new episode of environment interaction."""
logging.info("[CogMan] Reset called.")
self._episode_num += 1
task = self._perceiver.reset(env_task)
self._current_env_task = env_task
self._current_goal = task.goal
self._reset_policy(task)
self._exec_monitor.reset(task)
self._exec_monitor.update_approach_info(
self._approach.get_execution_monitoring_info())
self._episode_state_history = [task.init]
self._episode_action_history = []
self._episode_images = []
if CFG.make_cogman_videos:
imgs = self._perceiver.render_mental_images(task.init, env_task)
self._episode_images.extend(imgs)

def step(self, observation: Observation) -> Optional[Action]:
"""Receive an observation and produce an action, or None for done."""
state = self._perceiver.step(observation)
if CFG.make_cogman_videos:
assert self._current_env_task is not None
imgs = self._perceiver.render_mental_images(
state, self._current_env_task)
self._episode_images.extend(imgs)
# Replace the first step because the state was already added in reset().
if not self._episode_action_history:
self._episode_state_history[0] = state
Expand Down Expand Up @@ -86,6 +101,10 @@ def finish_episode(self, observation: Observation) -> None:
self._episode_action_history):
state = self._perceiver.step(observation)
self._episode_state_history.append(state)
if CFG.make_cogman_videos:
save_prefix = utils.get_config_path_str()
outfile = f"{save_prefix}__cogman__episode{self._episode_num}.mp4"
utils.save_video(outfile, self._episode_images)

# The methods below provide an interface to the approach. In the future,
# we may want to move some of these methods into cogman properly, e.g.,
Expand Down
48 changes: 29 additions & 19 deletions predicators/explorers/active_sampler_explorer.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,10 @@ def __init__(self, predicates: Set[Predicate],
self._last_executed_nsrt: Optional[_GroundNSRT] = None
self._nsrt_to_explorer_sampler = nsrt_to_explorer_sampler
self._seen_train_task_idxs = seen_train_task_idxs
self._task_plan_cache: Dict[_TaskID, List[_GroundSTRIPSOperator]] = {}
# If the plan is None, that means none can be found, e.g., due to
# timeouts or dead-ends.
self._task_plan_cache: Dict[
_TaskID, Optional[List[_GroundSTRIPSOperator]]] = {}
self._task_plan_calls_since_replan: Dict[_TaskID, int] = {}
self._sorted_options = sorted(options, key=lambda o: o.name)

Expand Down Expand Up @@ -412,17 +415,20 @@ def _score_ground_op_planning_progress(
replan_task_ids = [("replan", i) for i in range(len(num_replan_tasks))]
for task_id in train_task_ids + replan_task_ids:
plan = self._get_task_plan_for_task(task_id, ground_op_costs)
task_plan_costs = []
for op in plan:
op_cost = ground_op_costs.get(op, self._default_cost)
task_plan_costs.append(op_cost)
plan_costs.append(sum(task_plan_costs))
# If no plan can be found for a task, the task is just ignored.
if plan is not None:
task_plan_costs = []
for op in plan:
op_cost = ground_op_costs.get(op, self._default_cost)
task_plan_costs.append(op_cost)
plan_costs.append(sum(task_plan_costs))
return -sum(plan_costs) # higher scores are better

def _get_task_plan_for_task(
self, task_id: _TaskID, ground_op_costs: Dict[_GroundSTRIPSOperator,
float]
) -> List[_GroundSTRIPSOperator]:
) -> Optional[List[_GroundSTRIPSOperator]]:
"""Returns None if no task plan can be found."""
# Optimization: only re-plan at a certain frequency.
replan_freq = CFG.active_sampler_explorer_replan_frequency
if task_id not in self._task_plan_calls_since_replan or \
Expand All @@ -435,18 +441,22 @@ def _get_task_plan_for_task(
"train": self._train_tasks,
"replan": self._replanning_tasks,
}[task_type][task_idx]
plan, _, _ = run_task_plan_once(
task,
self._nsrts,
self._predicates,
self._types,
timeout,
self._seed,
task_planning_heuristic=task_planning_heuristic,
ground_op_costs=ground_op_costs,
default_cost=self._default_cost,
max_horizon=np.inf)
self._task_plan_cache[task_id] = [n.op for n in plan]
try:
plan, _, _ = run_task_plan_once(
task,
self._nsrts,
self._predicates,
self._types,
timeout,
self._seed,
task_planning_heuristic=task_planning_heuristic,
ground_op_costs=ground_op_costs,
default_cost=self._default_cost,
max_horizon=np.inf)
self._task_plan_cache[task_id] = [n.op for n in plan]
except (PlanningFailure, PlanningTimeout): # pragma: no cover
logging.info("WARNING: task planning failed in the explorer.")
self._task_plan_cache[task_id] = None

self._task_plan_calls_since_replan[task_id] += 1
return self._task_plan_cache[task_id]
Expand Down
8 changes: 7 additions & 1 deletion predicators/perception/base_perceiver.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@

import abc

from predicators.structs import EnvironmentTask, Observation, State, Task
from predicators.structs import EnvironmentTask, Observation, State, Task, \
Video


class BasePerceiver(abc.ABC):
Expand All @@ -20,3 +21,8 @@ def reset(self, env_task: EnvironmentTask) -> Task:
@abc.abstractmethod
def step(self, observation: Observation) -> State:
"""Produce a State given the current and past observations."""

@abc.abstractmethod
def render_mental_images(self, observation: Observation,
env_task: EnvironmentTask) -> Video:
"""Create mental images for the given observation."""
6 changes: 5 additions & 1 deletion predicators/perception/kitchen_perceiver.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from predicators.envs.kitchen import KitchenEnv
from predicators.perception.base_perceiver import BasePerceiver
from predicators.structs import EnvironmentTask, GroundAtom, Observation, \
State, Task
State, Task, Video


class KitchenPerceiver(BasePerceiver):
Expand Down Expand Up @@ -49,3 +49,7 @@ def step(self, observation: Observation) -> State:

def _observation_to_state(self, obs: Observation) -> State:
return KitchenEnv.state_info_to_state(obs["state_info"])

def render_mental_images(self, observation: Observation,
env_task: EnvironmentTask) -> Video:
raise NotImplementedError("Mental images not implemented for kitchen")
6 changes: 5 additions & 1 deletion predicators/perception/sokoban_perceiver.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from predicators.envs.sokoban import SokobanEnv
from predicators.perception.base_perceiver import BasePerceiver
from predicators.structs import EnvironmentTask, GroundAtom, Object, \
Observation, State, Task
Observation, State, Task, Video

# Each observation is a tuple of four 2D boolean masks (numpy arrays).
# The order is: free, goals, boxes, player.
Expand Down Expand Up @@ -95,3 +95,7 @@ def _get_object_name(r: int, c: int, type_name: str) -> str:

state = utils.create_state_from_dict(state_dict)
return state

def render_mental_images(self, observation: Observation,
env_task: EnvironmentTask) -> Video:
raise NotImplementedError("Mental images not implemented for sokoban")
12 changes: 11 additions & 1 deletion predicators/perception/trivial_perceiver.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
"""A trivial perceiver that assumes observations are already states."""

from predicators.envs import get_or_create_env
from predicators.perception.base_perceiver import BasePerceiver
from predicators.structs import EnvironmentTask, Observation, State, Task
from predicators.settings import CFG
from predicators.structs import EnvironmentTask, Observation, State, Task, \
Video


class TrivialPerceiver(BasePerceiver):
Expand All @@ -17,3 +20,10 @@ def reset(self, env_task: EnvironmentTask) -> Task:
def step(self, observation: Observation) -> State:
assert isinstance(observation, State)
return observation

def render_mental_images(self, observation: Observation,
env_task: EnvironmentTask) -> Video:
# Use the environment's render function by default.
assert isinstance(observation, State)
env = get_or_create_env(CFG.env)
return env.render_state(observation, env_task)
3 changes: 2 additions & 1 deletion predicators/planning.py
Original file line number Diff line number Diff line change
Expand Up @@ -1060,7 +1060,8 @@ def fd_plan_from_sas_file(
metrics: Metrics = defaultdict(float)
num_nodes_expanded = re.findall(r"Expanded (\d+) state", output)
num_nodes_created = re.findall(r"Evaluated (\d+) state", output)
assert len(num_nodes_expanded) == 1
if len(num_nodes_expanded) != 1:
raise PlanningFailure(f"Plan not found with FD! Error: {output}")
assert len(num_nodes_created) == 1
metrics["num_nodes_expanded"] = float(num_nodes_expanded[0])
metrics["num_nodes_created"] = float(num_nodes_created[0])
Expand Down
2 changes: 1 addition & 1 deletion predicators/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -663,7 +663,7 @@ def get_arg_specific_settings(cls, args: Dict[str, Any]) -> Dict[str, Any]:
# the horizon to be shorter.
"touch_point": 15,
# Ditto for the simple grid row environment.
"grid_row": cls.grid_row_num_cells + 5,
"grid_row": cls.grid_row_num_cells + 2,
})[args.get("env", "")],

# Maximum number of steps to roll out an option policy.
Expand Down
2 changes: 1 addition & 1 deletion scripts/configs/active_sampler_learning.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ ENVS:
grid_row:
NAME: "grid_row"
FLAGS:
max_num_steps_interaction_request: 500
max_num_steps_interaction_request: 150
active_sampler_learning_explore_length_base: 100000 # effectively disable
active_sampler_learning_feature_selection: all
ball_and_cup_sticky_table:
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
"pybullet>=3.2.0",
"scikit-learn",
"graphlib-backport",
"openai",
"openai==0.28.1",
"pyyaml",
"pylint==2.14.5",
"types-PyYAML",
Expand Down
1 change: 1 addition & 0 deletions tests/approaches/test_oracle_approach.py
Original file line number Diff line number Diff line change
Expand Up @@ -754,6 +754,7 @@ def simulate(self, state, action):
return self._transition_stack(state, x, y, z)

env = _ExternalBlocksEnv()
assert env.get_name() == "external_blocks"

# Create external options by modifying blocks options.
options = set()
Expand Down
10 changes: 8 additions & 2 deletions tests/approaches/test_pg3_analogy_approach.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,8 +145,14 @@ def test_pg3_analogy_approach():
)"""


def test_find_env_analogies():
"""Tests for _find_env_analogies()."""
def _disabled_test_find_env_analogies(): # pragma: no cover
"""Tests for _find_env_analogies().
NOTE: this test is currently disabled because of sudden flakiness in the
SME depedency, despite no changes for months. Since we're not actively
using this code, we're just disabling it, but leaving it here in case we
do want to resurrect the code in the future.
"""
# Test for gripper -> ferry.
base_env = create_new_env("pddl_gripper_procedural_tasks")
base_nsrts = get_gt_nsrts(base_env.get_name(), base_env.predicates,
Expand Down
8 changes: 6 additions & 2 deletions tests/envs/test_pybullet_cover.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,8 +166,12 @@ def test_pybullet_cover_step(env):
assert abs(state.get(block, "pose") - 0.75) < 0.01


def test_pybullet_cover_pick_workspace_bounds(env):
"""Tests for picking at workspace bounds in PyBulletCoverEnv."""
def _disabled_test_pybullet_cover_pick_workspace_bounds(
env): # pragma: no cover
"""Tests for picking at workspace bounds in PyBulletCoverEnv.
This is currently disabled due to nondeterminism issues in IK.
"""
block = Object("block0", env.block_type)
robot = env.robot
workspace_x, workspace_z = env.workspace_dimensions
Expand Down
2 changes: 2 additions & 0 deletions tests/envs/test_sokoban.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,8 @@ def test_sokoban():
imgs = env.render()
assert len(imgs) == 1
task = perceiver.reset(env_task)
with pytest.raises(NotImplementedError):
perceiver.render_mental_images(env_task.init_obs, env_task)
state = task.init
atoms = utils.abstract(state, env.predicates)
num_boxes = len({a for a in atoms if a.predicate == IsBox})
Expand Down
6 changes: 3 additions & 3 deletions tests/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,9 +117,9 @@ def test_main():
eval_traj_dir = os.path.join(parent_dir, "_fake_trajs")
sys.argv = [
"dummy", "--env", "cover", "--approach", "oracle", "--seed", "123",
"--make_test_videos", "--num_test_tasks", "1", "--video_dir",
video_dir, "--results_dir", results_dir, "--eval_trajectories_dir",
eval_traj_dir
"--make_test_videos", "--make_cogman_videos", "--num_test_tasks", "1",
"--video_dir", video_dir, "--results_dir", results_dir,
"--eval_trajectories_dir", eval_traj_dir
]
main()
# Test making videos of failures and local logging.
Expand Down

0 comments on commit c89c8c8

Please sign in to comment.