From 1f19a2fac8bba3180c80cda612fc1b115f65b381 Mon Sep 17 00:00:00 2001 From: Nishanth Kumar Date: Tue, 10 Oct 2023 16:31:42 -0400 Subject: [PATCH 1/8] Implement infinite-horizon for exploration (#1565) * simple initial implementation * fix checks * okay - really fix checks now --- .../approaches/bilevel_planning_approach.py | 1 + .../explorers/active_sampler_explorer.py | 8 ++++-- predicators/planning.py | 10 ++++--- tests/test_planning.py | 27 ++++++++++++++++++- 4 files changed, 40 insertions(+), 6 deletions(-) diff --git a/predicators/approaches/bilevel_planning_approach.py b/predicators/approaches/bilevel_planning_approach.py index 6466e5bde4..66b701bf31 100644 --- a/predicators/approaches/bilevel_planning_approach.py +++ b/predicators/approaches/bilevel_planning_approach.py @@ -131,6 +131,7 @@ def _run_task_plan( timeout, seed, task_planning_heuristic=self._task_planning_heuristic, + max_horizon=float(CFG.horizon), **kwargs) except PlanningFailure as e: raise ApproachFailure(e.args[0], e.info) diff --git a/predicators/explorers/active_sampler_explorer.py b/predicators/explorers/active_sampler_explorer.py index 7c184824ca..bb15001a1f 100644 --- a/predicators/explorers/active_sampler_explorer.py +++ b/predicators/explorers/active_sampler_explorer.py @@ -304,6 +304,8 @@ def _get_option_policy_for_task(self, o: -np.log(m.get_current_competence()) for o, m in self._competence_models.items() } + # Set large horizon for planning here because we don't want to error + # out due to plan exceeding horizon here. plan, atoms_seq, _ = run_task_plan_once( task, self._nsrts, @@ -313,7 +315,8 @@ def _get_option_policy_for_task(self, self._seed, task_planning_heuristic=task_planning_heuristic, ground_op_costs=ground_op_costs, - default_cost=self._default_cost) + default_cost=self._default_cost, + max_horizon=np.inf) return utils.nsrt_plan_to_greedy_option_policy( plan, task.goal, self._rng, necessary_atoms_seq=atoms_seq) @@ -415,7 +418,8 @@ def _get_task_plan_for_task( self._seed, task_planning_heuristic=task_planning_heuristic, ground_op_costs=ground_op_costs, - default_cost=self._default_cost) + default_cost=self._default_cost, + max_horizon=np.inf) self._task_plan_cache[task_id] = [n.op for n in plan] self._task_plan_calls_since_replan[task_id] += 1 diff --git a/predicators/planning.py b/predicators/planning.py index 0d1f148405..4f398ecd9e 100644 --- a/predicators/planning.py +++ b/predicators/planning.py @@ -1046,7 +1046,7 @@ def _update_sas_file_with_costs( def fd_plan_from_sas_file( sas_file: str, timeout_cmd: str, timeout: float, exec_str: str, alias_flag: str, start_time: float, objects: List[Object], - init_atoms: Set[GroundAtom], nsrts: Set[NSRT], max_horizon: int + init_atoms: Set[GroundAtom], nsrts: Set[NSRT], max_horizon: float ) -> Tuple[List[_GroundNSRT], List[Set[GroundAtom]], Metrics]: # pragma: no cover """Given a SAS file, runs search on it to generate a plan.""" @@ -1137,7 +1137,7 @@ def _sesame_plan_with_fast_downward( while True: skeleton, atoms_sequence, metrics = fd_plan_from_sas_file( sas_file, timeout_cmd, timeout, exec_str, alias_flag, start_time, - objects, init_atoms, nsrts, max_horizon) + objects, init_atoms, nsrts, float(max_horizon)) # Run low-level search on this skeleton. low_level_timeout = timeout - (time.perf_counter() - start_time) try: @@ -1174,6 +1174,7 @@ def run_task_plan_once( ground_op_costs: Optional[Dict[_GroundSTRIPSOperator, float]] = None, default_cost: float = 1.0, cost_precision: int = 3, + max_horizon: float = np.inf, **kwargs: Any ) -> Tuple[List[_GroundNSRT], List[Set[GroundAtom]], Metrics]: """Get a single abstract plan for a task.""" @@ -1204,6 +1205,9 @@ def run_task_plan_once( max_skeletons_optimized=1, use_visited_state_set=True, **kwargs)) + if len(plan) > max_horizon: + raise PlanningFailure( + "Skeleton produced by A-star exceeds horizon!") elif "fd" in CFG.sesame_task_planner: # pragma: no cover fd_exec_path = os.environ["FD_EXEC_PATH"] exec_str = os.path.join(fd_exec_path, "fast-downward.py") @@ -1243,7 +1247,7 @@ def run_task_plan_once( plan, atoms_seq, metrics = fd_plan_from_sas_file( sas_file, timeout_cmd, timeout, exec_str, alias_flag, start_time, - list(objects), init_atoms, nsrts, CFG.horizon) + list(objects), init_atoms, nsrts, float(max_horizon)) else: raise ValueError("Unrecognized sesame_task_planner: " f"{CFG.sesame_task_planner}") diff --git a/tests/test_planning.py b/tests/test_planning.py index 0600e5ae15..f3a4a3508f 100644 --- a/tests/test_planning.py +++ b/tests/test_planning.py @@ -17,7 +17,7 @@ from predicators.option_model import _OptionModelBase, _OracleOptionModel, \ create_option_model from predicators.planning import PlanningFailure, PlanningTimeout, \ - sesame_plan, task_plan, task_plan_grounding + run_task_plan_once, sesame_plan, task_plan, task_plan_grounding from predicators.settings import CFG from predicators.structs import NSRT, Action, ParameterizedOption, Predicate, \ State, STRIPSOperator, Task, Type, _GroundNSRT, _Option @@ -693,3 +693,28 @@ def test_sesame_plan_fast_downward(): assert "Please follow the instructions" in str(e) except ValueError as e: assert "Unrecognized sesame_task_planner" in str(e) + + +def test_task_planning_only(): + """Tests for the run_task_plan_once function.""" + utils.reset_config({ + "env": "cluttered_table", + "num_test_tasks": 50, + }) + env = ClutteredTableEnv() + nsrts = get_gt_nsrts(env.get_name(), env.predicates, + get_gt_options(env.get_name())) + env_task = env.get_test_tasks()[0] + task = env_task.task + preds = env.predicates + types = env.types + with pytest.raises(PlanningFailure) as e: + run_task_plan_once(task, + nsrts, + preds, + types, + 100000.0, + 0, + task_planning_heuristic="lmcut", + max_horizon=0.0) + assert "exceeds horizon" in str(e) From 9f45d9c23c9096b3a1666f71c3655a01f23386ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bart=C5=82omiej=20Cie=C5=9Blar?= <37981302+barci2@users.noreply.github.com> Date: Tue, 10 Oct 2023 20:56:46 -0400 Subject: [PATCH 2/8] MyPy Bump and changes (#1568) --- predicators/utils.py | 2 +- scripts/cluster_utils.py | 7 ++++--- setup.py | 3 +-- tests/approaches/__init__.py | 0 tests/envs/test_base_env.py | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) create mode 100644 tests/approaches/__init__.py diff --git a/predicators/utils.py b/predicators/utils.py index c61d02f40e..b1ed1776cb 100644 --- a/predicators/utils.py +++ b/predicators/utils.py @@ -2169,7 +2169,7 @@ def all_ground_nsrts_fd_translator( prob_str = create_pddl_problem(objects, init_atoms, goal, "mydomain", "myproblem") with nostdout(): - sas_task = downward_translate(dom_str, prob_str) + sas_task = downward_translate(dom_str, prob_str) # type: ignore for operator in sas_task.operators: split_name = operator.name[1:-1].split() # strip out ( and ) nsrt = nsrt_name_to_nsrt[split_name[0]] diff --git a/scripts/cluster_utils.py b/scripts/cluster_utils.py index 1289e1afed..94f1ece871 100644 --- a/scripts/cluster_utils.py +++ b/scripts/cluster_utils.py @@ -3,7 +3,7 @@ import os import subprocess from dataclasses import dataclass -from typing import Any, Dict, Iterator, List, Tuple +from typing import Any, Dict, Iterator, List, Optional, Tuple import yaml @@ -161,8 +161,9 @@ def run_cmds_on_machine( cmds: List[str], user: str, machine: str, - ssh_key: str = None, - allowed_return_codes: Tuple[int, ...] = (0, )) -> None: + ssh_key: Optional[str] = None, + allowed_return_codes: Tuple[int, ...] = (0, ) +) -> None: """SSH into the machine, run the commands, then exit.""" host = f"{user}@{machine}" ssh_cmd = f"ssh -tt -o StrictHostKeyChecking=no {host}" diff --git a/setup.py b/setup.py index 7b442239c0..54e749820e 100644 --- a/setup.py +++ b/setup.py @@ -8,6 +8,7 @@ install_requires=[ "numpy>=1.22.3", "pytest", + "mypy", "gym==0.26.2", "matplotlib", "imageio", @@ -42,7 +43,5 @@ "yapf==0.32.0", "docformatter==1.4", "isort==5.10.1", - "mypy@git+https://github.com/python/mypy.git@9bd651758e8ea2494" + - "837814092af70f8d9e6f7a1", ] }) diff --git a/tests/approaches/__init__.py b/tests/approaches/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/envs/test_base_env.py b/tests/envs/test_base_env.py index ee82bf35fe..d11f531a0e 100644 --- a/tests/envs/test_base_env.py +++ b/tests/envs/test_base_env.py @@ -5,11 +5,11 @@ from unittest.mock import patch import pytest -from test_oracle_approach import ENV_NAME_AND_CLS import predicators.envs from predicators import utils from predicators.envs import BaseEnv, create_new_env, get_or_create_env +from tests.approaches.test_oracle_approach import ENV_NAME_AND_CLS _MODULE_PATH = predicators.envs.__name__ From 199c0e0ba222e5bb8a3cc49de4965491701bdfb9 Mon Sep 17 00:00:00 2001 From: Nishanth Kumar Date: Thu, 12 Oct 2023 08:53:21 -0400 Subject: [PATCH 3/8] minor changes to fix bugs (#1569) --- predicators/approaches/online_nsrt_learning_approach.py | 8 ++++++++ predicators/cogman.py | 6 +++++- .../execution_monitoring/base_execution_monitor.py | 1 + 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/predicators/approaches/online_nsrt_learning_approach.py b/predicators/approaches/online_nsrt_learning_approach.py index 15d3705c40..5d3fcd1e80 100644 --- a/predicators/approaches/online_nsrt_learning_approach.py +++ b/predicators/approaches/online_nsrt_learning_approach.py @@ -52,6 +52,14 @@ def get_interaction_requests(self) -> List[InteractionRequest]: # is randomly selected. explorer = self._create_explorer() + # NOTE: this is definitely awkward, but we have to reset this + # info so that if we ever use the execution monitor while doing + # exploration and collecting more data, it doesn't mistakenly + # try to monitor stuff using a previously-saved plan. + self._last_nsrt_plan = [] + self._last_atoms_seq = [] + self._last_plan = [] + # Create the interaction requests. requests = [] for _ in range(CFG.online_nsrt_learning_requests_per_cycle): diff --git a/predicators/cogman.py b/predicators/cogman.py index 121f681291..e73b48bfda 100644 --- a/predicators/cogman.py +++ b/predicators/cogman.py @@ -67,7 +67,11 @@ def step(self, observation: Observation) -> Optional[Action]: self._exec_monitor.reset(task) self._exec_monitor.update_approach_info( self._approach.get_execution_monitoring_info()) - assert not self._exec_monitor.step(state) + # We only reset the approach if the override policy is + # None, so this below assertion only works in this + # case. + if self._override_policy is None: + assert not self._exec_monitor.step(state) assert self._current_policy is not None act = self._current_policy(state) self._exec_monitor.update_approach_info( diff --git a/predicators/execution_monitoring/base_execution_monitor.py b/predicators/execution_monitoring/base_execution_monitor.py index 6e1a75c287..631b979158 100644 --- a/predicators/execution_monitoring/base_execution_monitor.py +++ b/predicators/execution_monitoring/base_execution_monitor.py @@ -22,6 +22,7 @@ def reset(self, task: Task) -> None: """Reset after replanning.""" del task # unused self._curr_plan_timestep = 0 + self._approach_info = [] @abc.abstractmethod def step(self, state: State) -> bool: From 7aa8f592959eccabf6338db957244785cf49110f Mon Sep 17 00:00:00 2001 From: Tom Silver Date: Mon, 16 Oct 2023 13:28:01 -0400 Subject: [PATCH 4/8] fix get_objects in hierarchical typing case (#1572) Co-authored-by: Tom Silver --- predicators/structs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/predicators/structs.py b/predicators/structs.py index 4f4d499920..41f4b2cf41 100644 --- a/predicators/structs.py +++ b/predicators/structs.py @@ -134,7 +134,7 @@ def set(self, obj: Object, feature_name: str, feature_val: Any) -> None: def get_objects(self, object_type: Type) -> List[Object]: """Return objects of the given type in the order of __iter__().""" - return [o for o in self if o.type == object_type] + return [o for o in self if o.is_instance(object_type)] def vec(self, objects: Sequence[Object]) -> Array: """Concatenated vector of features for each of the objects in the given From fba285dd49978ab0cb1543b9b90a5181bebf8753 Mon Sep 17 00:00:00 2001 From: Tom Silver Date: Tue, 17 Oct 2023 17:55:27 -0400 Subject: [PATCH 5/8] fix hierarchical typing edge case (#1574) --- predicators/utils.py | 14 +++++++++++--- tests/test_utils.py | 6 +++++- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/predicators/utils.py b/predicators/utils.py index b1ed1776cb..11db9e37ab 100644 --- a/predicators/utils.py +++ b/predicators/utils.py @@ -2806,9 +2806,17 @@ def create_pddl_domain(operators: Collection[NSRTOrSTRIPSOperator], for parent_type in sorted(parent_to_children_types): child_types = parent_to_children_types[parent_type] if not child_types: - continue - child_type_str = " ".join(t.name for t in child_types) - types_str += f"\n {child_type_str} - {parent_type.name}" + # Special case: type has no children and also does not appear + # as a child of another type. + is_child_type = any( + parent_type in children + for children in parent_to_children_types.values()) + if not is_child_type: + types_str += f"\n {parent_type.name}" + # Otherwise, the type will appear as a child elsewhere. + else: + child_type_str = " ".join(t.name for t in child_types) + types_str += f"\n {child_type_str} - {parent_type.name}" ops_lst = sorted(operators) preds_str = "\n ".join(pred.pddl_str() for pred in preds_lst) ops_strs = "\n\n ".join(op.pddl_str() for op in ops_lst) diff --git a/tests/test_utils.py b/tests/test_utils.py index 2743f7b71b..06e73cb340 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -2318,12 +2318,16 @@ def test_create_pddl(): env = ProceduralTasksSpannerPDDLEnv() nsrts = get_gt_nsrts(env.get_name(), env.predicates, get_gt_options(env.get_name())) - domain_str = utils.create_pddl_domain(nsrts, env.predicates, env.types, + # Test case where there is a special type with no parents or children. + monkey_type = Type("monkey", []) + types = env.types | {monkey_type} + domain_str = utils.create_pddl_domain(nsrts, env.predicates, types, "spanner") assert domain_str == """(define (domain spanner) (:requirements :typing) (:types man nut spanner - locatable + monkey locatable location - object) (:predicates From db29a6c4bb8d29d2adcf9a1740af5f9c61f6f65d Mon Sep 17 00:00:00 2001 From: Nishanth Kumar Date: Wed, 18 Oct 2023 10:02:46 -0400 Subject: [PATCH 6/8] Fix + raise awareness of subtle bugs with active sampler exploration (#1575) * fix subtle bugs * yapf --- .../explorers/active_sampler_explorer.py | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/predicators/explorers/active_sampler_explorer.py b/predicators/explorers/active_sampler_explorer.py index bb15001a1f..5b592ca40f 100644 --- a/predicators/explorers/active_sampler_explorer.py +++ b/predicators/explorers/active_sampler_explorer.py @@ -97,6 +97,7 @@ def _get_exploration_strategy(self, train_task_idx: int, assigned_task_horizon = CFG.horizon current_policy: Optional[Callable[[State], _Option]] = None next_practice_nsrt: Optional[_GroundNSRT] = None + current_task_repeat_goal: Optional[Set[GroundAtom]] = None using_random = False def _option_policy(state: State) -> _Option: @@ -162,6 +163,7 @@ def generate_goals() -> Iterator[Set[GroundAtom]]: logging.info("[Explorer] Pursuing repeat task") def generate_goals() -> Iterator[Set[GroundAtom]]: + nonlocal current_task_repeat_goal # Loop through seen tasks in random order. Propose # their initial abstract states and their goals until # one is found that is not already achieved. @@ -171,12 +173,23 @@ def generate_goals() -> Iterator[Set[GroundAtom]]: task = self._train_tasks[train_task_idx] # Can only practice the task if the objects match. if set(task.init) == set(state): + # If we've already been trying to achieve a + # particular goal, then keep trying to achieve + # it. + if current_task_repeat_goal is not None: + current_pursuit_goal_achieved = all( + a.holds(state) + for a in current_task_repeat_goal) + if not current_pursuit_goal_achieved: + yield current_task_repeat_goal + # Else, figure out the next goal to plan to! possible_goals = [ task.goal, utils.abstract(task.init, self._predicates) ] for goal in possible_goals: if any(not a.holds(state) for a in goal): + current_task_repeat_goal = goal yield goal # Otherwise, practice. @@ -212,6 +225,13 @@ def generate_goals() -> Iterator[Set[GroundAtom]]: # crash in case that assumption is not met. except (PlanningFailure, PlanningTimeout): # pragma: no cover + logging.info( + "WARNING: Planning graph is not " + "fully-connected! This violates a key " + "assumption of our active sampler learning " + "framework; ensure you DO NOT see this message " + "if you're running experiments comparing" + "different active sampler learning approaches.") continue logging.info("[Explorer] Plan found.") break From 30905911f62490d954882de6ba42496de0da8679 Mon Sep 17 00:00:00 2001 From: Nishanth Kumar Date: Fri, 27 Oct 2023 15:27:48 -0400 Subject: [PATCH 7/8] Ball and Cup Sticky Table Env (#1576) * initial commit that seems to run without error... * fix bug in placing logic * delete outdated comment * fix replanning bug * more data = better results??? * starting tests * try oracle feature selection? * fix buggy test * increase training time? * yapf + fix tom comment * fix reachability issue in placing * minor * more unit tests * fix and more tests * this should be interesting * see if this yields a difference * let's see what happens now * woops * try removing placing cup with the ball on the table * hail mary * minor changes + logging * run task repeat first * sticky table with moving radius * yay! try other approaches... * polar coordinates ftw! * try a simpler thing * let's see how this does. * try more probability of success * all baselines * try running grid row env * most things passing * try this * progress towards PR * should be ready! * revert unnecessary change * fix linting * tom comments --------- Co-authored-by: Tom Silver --- predicators/envs/ball_and_cup_sticky_table.py | 563 ++++++++++++++++++ .../explorers/active_sampler_explorer.py | 6 +- .../ball_and_cup_sticky_table/__init__.py | 9 + .../ball_and_cup_sticky_table/nsrts.py | 533 +++++++++++++++++ .../ball_and_cup_sticky_table/options.py | 198 ++++++ predicators/settings.py | 6 + predicators/utils.py | 118 +++- scripts/configs/active_sampler_learning.yaml | 35 +- .../create_active_sampler_learning_plots.py | 15 +- .../test_ball_and_cup_sticky_table_env.py | 246 ++++++++ tests/envs/test_kitchen.py | 23 +- tests/test_utils.py | 49 +- 12 files changed, 1740 insertions(+), 61 deletions(-) create mode 100644 predicators/envs/ball_and_cup_sticky_table.py create mode 100644 predicators/ground_truth_models/ball_and_cup_sticky_table/__init__.py create mode 100644 predicators/ground_truth_models/ball_and_cup_sticky_table/nsrts.py create mode 100644 predicators/ground_truth_models/ball_and_cup_sticky_table/options.py create mode 100644 tests/envs/test_ball_and_cup_sticky_table_env.py diff --git a/predicators/envs/ball_and_cup_sticky_table.py b/predicators/envs/ball_and_cup_sticky_table.py new file mode 100644 index 0000000000..7a19221a54 --- /dev/null +++ b/predicators/envs/ball_and_cup_sticky_table.py @@ -0,0 +1,563 @@ +"""Ball and cup with sticky table simulated environment.""" + +from typing import ClassVar, Dict, List, Optional, Sequence, Set, Tuple + +import matplotlib +import numpy as np +from gym.spaces import Box + +from predicators import utils +from predicators.envs import BaseEnv +from predicators.settings import CFG +from predicators.structs import Action, EnvironmentTask, GroundAtom, Object, \ + Predicate, State, Type + + +class BallAndCupStickyTableEnv(BaseEnv): + """An environment where a ball must be transported between different + tables. This environment is a more-complex (but significantly different) + version of the sticky-table environment. + + Most of the tables are completely flat, but one is half is mostly smooth + sticky in a particular circular region on the table. If the agent tries + to place the ball directly on any table, it will roll off with high + probability. If it tries to place it on a special table, + the ball will *certainly* roll off. However, if the ball is placed inside + a cup first, then the ball + cup system stays on the sticky and normal + table surfaces with high probability. + + Note that unlike almost all of our other environments, there is real + stochasticity in the outcomes of placing. + + The action space is 5D and slightly complicated: please see the comment + under the action_space class property below. + """ + x_lb: ClassVar[float] = 0.0 + x_ub: ClassVar[float] = 1.0 + y_lb: ClassVar[float] = 0.0 + y_ub: ClassVar[float] = 1.0 + reachable_thresh: ClassVar[float] = 0.1 + objs_scale: ClassVar[float] = 0.25 # as a function of table radius + sticky_region_radius_scale: ClassVar[float] = 0.35 + # Types + _table_type: ClassVar[Type] = Type("table", [ + "x", "y", "radius", "sticky", "sticky_region_x_offset", + "sticky_region_y_offset", "sticky_region_radius" + ]) + _robot_type: ClassVar[Type] = Type("robot", ["x", "y"]) + _ball_type: ClassVar[Type] = Type("ball", ["x", "y", "radius", "held"]) + _cup_type: ClassVar[Type] = Type("cup", ["x", "y", "radius", "held"]) + + def __init__(self, use_gui: bool = True) -> None: + super().__init__(use_gui) + + # For noisy simulation. + self._noise_rng = np.random.default_rng(CFG.seed) + + # Predicates + self._BallOnTable = Predicate("BallOnTable", + [self._ball_type, self._table_type], + self._OnTable_holds) + self._BallOnFloor = Predicate("BallOnFloor", [self._ball_type], + self._OnFloor_holds) + self._CupOnTable = Predicate("CupOnTable", + [self._cup_type, self._table_type], + self._OnTable_holds) + self._CupOnFloor = Predicate("CupOnFloor", [self._cup_type], + self._OnFloor_holds) + self._HoldingBall = Predicate("HoldingBall", [self._ball_type], + self._Holding_holds) + self._HoldingCup = Predicate("HoldingCup", [self._cup_type], + self._Holding_holds) + self._HandEmpty = Predicate("HandEmpty", [], self._HandEmpty_holds) + self._IsReachableSurface = Predicate( + "IsReachableSurface", [self._robot_type, self._table_type], + self._IsReachable_holds) + self._IsReachableBall = Predicate("IsReachableBall", + [self._robot_type, self._ball_type], + self._IsReachable_holds) + self._IsReachableCup = Predicate("IsReachableCup", + [self._robot_type, self._cup_type], + self._IsReachable_holds) + self._BallInCup = Predicate("BallInCup", + [self._ball_type, self._cup_type], + self._BallInCup_holds) + self._BallNotInCup = Predicate("BallNotInCup", + [self._ball_type, self._cup_type], + self._BallNotInCup_holds) + + def render_state_plt( + self, + state: State, + task: EnvironmentTask, + action: Optional[Action] = None, + caption: Optional[str] = None) -> matplotlib.figure.Figure: + raise NotImplementedError("Rendering not implemented yet!") + + @classmethod + def get_name(cls) -> str: + return "ball_and_cup_sticky_table" + + @property + def predicates(self) -> Set[Predicate]: + return { + self._BallOnTable, self._BallOnFloor, self._CupOnTable, + self._CupOnFloor, self._HoldingBall, self._HoldingCup, + self._HandEmpty, self._IsReachableSurface, self._IsReachableBall, + self._IsReachableCup, self._BallInCup, self._BallNotInCup + } + + @property + def types(self) -> Set[Type]: + return { + self._table_type, self._robot_type, self._ball_type, self._cup_type + } + + @property + def goal_predicates(self) -> Set[Predicate]: + return {self._BallOnTable} + + @property + def action_space(self) -> Box: + # Action space is [move_or_pickplace, obj_type_id, ball_only, x, y]. + # If move_or_pickplace is 0, robot will move to the indicated + # x, y location. + # Otherwise, if move_or_pickplace is 1, it will either pick or place + # the object with obj_type_id at the x, y location. + # obj_type_id 1.0 = ball, 2.0 = cup, 3.0 table + # The ball_only var is used to handle the case where we're holding the + # ball and cup and want to only place the ball somewhere. + return Box( + np.array([0.0, 0.0, 0.0, self.x_lb, self.y_lb], dtype=np.float32), + np.array([1.0, 3.0, 1.0, self.x_ub, self.y_ub], dtype=np.float32)) + + @property + def _pick_success_prob(self) -> float: + return CFG.sticky_table_pick_success_prob + + @property + def _place_sticky_fall_prob(self) -> float: + return CFG.sticky_table_place_sticky_fall_prob + + @property + def _place_ball_fall_prob(self) -> float: + return CFG.sticky_table_place_ball_fall_prob + + @property + def _place_smooth_fall_prob(self) -> float: + return CFG.sticky_table_place_smooth_fall_prob + + @classmethod + def _object_to_geom(cls, obj: Object, state: State) -> utils._Geom2D: + x = state.get(obj, "x") + y = state.get(obj, "y") + radius = state.get(obj, "radius") + return utils.Circle(x, y, radius) + + def _generate_train_tasks(self) -> List[EnvironmentTask]: + return self._get_tasks(num=CFG.num_train_tasks, rng=self._train_rng) + + def _generate_test_tasks(self) -> List[EnvironmentTask]: + return self._get_tasks(num=CFG.num_test_tasks, rng=self._test_rng) + + def _get_tasks(self, num: int, + rng: np.random.Generator) -> List[EnvironmentTask]: + tasks: List[EnvironmentTask] = [] + while len(tasks) < num: + # The initial location of the the robot is randomized. + num_tables = CFG.sticky_table_num_tables + assert num_tables >= 2 + state_dict: Dict[Object, Dict[str, float]] = {} + # Generate the tables in a ring around the center of the room. + origin_x = (self.x_ub - self.x_lb) / 2 + origin_y = (self.y_ub - self.y_lb) / 2 + d = min(self.x_ub - self.x_lb, self.y_ub - self.y_lb) / 3 + thetas = np.linspace(0, 2 * np.pi, num=num_tables, endpoint=False) + # Select the radius to prevent any overlap. Exact would be + # d * sin(theta / 2). Divide by 2 to be conservative. + angle_diff = thetas[1] - thetas[0] + radius = d * np.sin(angle_diff / 2) / 2 + size = radius * self.objs_scale + # Add a random spin to offset the circle. This is to ensure + # the tables are in different positions along the circle every + # time. + sticky_region_radius = radius * self.sticky_region_radius_scale + # Now, actually instantiate the tables. + for i, theta in enumerate(thetas): + x = d * np.cos(theta) + origin_x + y = d * np.sin(theta) + origin_y + if i >= CFG.sticky_table_num_sticky_tables: + prefix = "normal" + sticky = 0.0 + else: + prefix = "sticky" + sticky = 1.0 + obj = Object(f"{prefix}-table-{i}", self._table_type) + sticky_region_dist_from_center = rng.uniform( + 0.0, radius - sticky_region_radius) + sticky_region_theta_from_center = rng.uniform(0.0, 2 * np.pi) + state_dict[obj] = { + "x": + x, + "y": + y, + "radius": + radius, + "sticky": + sticky, + "sticky_region_x_offset": + sticky_region_dist_from_center * + np.cos(sticky_region_theta_from_center), + "sticky_region_y_offset": + sticky_region_dist_from_center * + np.sin(sticky_region_theta_from_center), + "sticky_region_radius": + sticky_region_radius + } + tables = sorted(state_dict) + target_table = tables[-1] + ball_table = tables[0] + # Create cup and initialize it to be somewhere + # on the floor. + while True: + x = rng.uniform(self.x_lb, self.x_ub) + y = rng.uniform(self.y_lb, self.y_ub) + cup = Object("cup", self._cup_type) + state_dict[cup] = { + "x": x, + "y": y, + "radius": size + + 0.05 * size, # need to make sure cup is bigger than ball + "held": 0.0, + } + state = utils.create_state_from_dict(state_dict) + if self._OnFloor_holds(state, [cup]): + break + # Create ball and place it delicately balanced atop + # a table initially. This is intentional: we want the agent + # to really struggle/be unable to recreate the initial + # set of atoms. + table_x = state_dict[ball_table]["x"] + table_y = state_dict[ball_table]["y"] + while True: + theta = rng.uniform(0, 2 * np.pi) + dist = rng.uniform(0, radius) + x = table_x + dist * np.cos(theta) + y = table_y + dist * np.sin(theta) + ball = Object("ball", self._ball_type) + state_dict[ball] = { + "x": x, + "y": y, + "radius": size - + 0.05 * size, # need to make sure cup is bigger than ball + "held": 0.0 + } + state = utils.create_state_from_dict(state_dict) + if self._OnTable_holds(state, [ball, ball_table]): + break + # Create robot. Set the robot's pose by randomly sampling + # valid poses in the room, but ensure that the pose is + # such that the robot is initially only reachable to 1 + # object (otherwise, the domain is not necessarily + # reversible given our defined NSRTs). + while True: + x = rng.uniform(self.x_lb, self.x_ub) + y = rng.uniform(self.y_lb, self.y_ub) + robot = Object("robot", self._robot_type) + state_dict[robot] = { + "x": x, + "y": y, + } + state = utils.create_state_from_dict(state_dict) + if not self._invalid_robot_init_pos(state): + break + + goal = {GroundAtom(self._BallOnTable, [ball, target_table])} + task = EnvironmentTask(state, goal) + tasks.append(task) + return tasks + + @classmethod + def exists_robot_collision(cls, state: State) -> bool: + """Return true if there is a collision between the robot and any other + object in the environment.""" + robot, = state.get_objects(cls._robot_type) + all_possible_collision_objs = state.get_objects( + cls._table_type) + state.get_objects( + cls._cup_type) + state.get_objects(cls._ball_type) + for obj in all_possible_collision_objs: + obj_geom = cls._object_to_geom(obj, state) + if obj_geom.contains_point(state.get(robot, "x"), + state.get(robot, "y")): + return True + return False + + def _invalid_robot_init_pos(self, state: State) -> bool: + """Return true if the robot position either (1) is in collision or (2) + is reachable to either no objects, or more than one object (important + for reversibility of domain).""" + robot, = state.get_objects(self._robot_type) + all_possible_collision_objs = state.get_objects( + self._table_type) + state.get_objects( + self._cup_type) + state.get_objects(self._ball_type) + num_objs_reachable = 0 + for obj in all_possible_collision_objs: + if self._IsReachable_holds(state, [robot, obj]): + num_objs_reachable += 1 + if num_objs_reachable > 1: + return True + if num_objs_reachable != 1: + return True + return self.exists_robot_collision(state) + + def _OnTable_holds(self, state: State, objects: Sequence[Object]) -> bool: + obj, table = objects + if self._Holding_holds(state, [obj]): + return False + obj_geom = self._object_to_geom(obj, state) + circ = self._object_to_geom(table, state) + assert isinstance(circ, utils.Circle) + assert isinstance(obj_geom, utils.Circle) + return circ.contains_circle(obj_geom) + + def _OnFloor_holds(self, state: State, objects: Sequence[Object]) -> bool: + obj, = objects + if self._Holding_holds(state, [obj]): + return False + for table in state.get_objects(self._table_type): + if self._OnTable_holds(state, [obj, table]): + return False + return True + + def _Holding_holds(self, state: State, objects: Sequence[Object]) -> bool: + obj, = objects + return state.get(obj, "held") > 0.5 + + def _HandEmpty_holds(self, state: State, + objects: Sequence[Object]) -> bool: + assert not objects + ball, = state.get_objects(self._ball_type) + cup, = state.get_objects(self._cup_type) + return not (self._Holding_holds(state, [ball]) + or self._Holding_holds(state, [cup])) + + def _euclidean_reachability_check(self, x1: float, y1: float, x2: float, + y2: float) -> bool: + return np.sqrt((x1 - x2)**2 + (y1 - y2)**2) <= self.reachable_thresh + + def _IsReachable_holds(self, state: State, + objects: Sequence[Object]) -> bool: + robot, other_obj = objects + return self._euclidean_reachability_check(state.get(robot, "x"), + state.get(robot, "y"), + state.get(other_obj, "x"), + state.get(other_obj, "y")) + + def _BallInCup_holds(self, state: State, + objects: Sequence[Object]) -> bool: + ball, cup = objects + ball_geom = self._object_to_geom(ball, state) + cup_geom = self._object_to_geom(cup, state) + assert isinstance(ball_geom, utils.Circle) + assert isinstance(cup_geom, utils.Circle) + ball_and_cup_at_same_pos = cup_geom.contains_circle(ball_geom) + holding_ball = self._Holding_holds(state, [ball]) + holding_cup = self._Holding_holds(state, [cup]) + return ball_and_cup_at_same_pos and ( + (holding_ball and holding_cup) or + (not holding_ball and not holding_cup)) + + def _BallNotInCup_holds(self, state: State, + objects: Sequence[Object]) -> bool: + return not self._BallInCup_holds(state, objects) + + def _table_is_sticky(self, table: Object, state: State) -> bool: + return state.get(table, "sticky") > 0.5 + + def simulate(self, state: State, action: Action) -> State: + # NOTE: noise is added here. Two calls to simulate with the same + # inputs may produce different outputs! + assert self.action_space.contains(action.arr) + move_or_pickplace, obj_type_id, ball_only, act_x, act_y = action.arr + next_state = state.copy() + hand_empty = self._HandEmpty_holds(state, []) + ball, = state.get_objects(self._ball_type) + cup, = state.get_objects(self._cup_type) + robot, = state.get_objects(self._robot_type) + ball_held = self._Holding_holds(state, [ball]) + cup_held = self._Holding_holds(state, [cup]) + ball_in_cup = self._BallInCup_holds(state, [ball, cup]) + obj_being_held: Optional[Object] = None + if (ball_held and not cup_held) or ball_only > 0.5: + obj_being_held = ball + elif cup_held: + obj_being_held = cup + # In this case, handle picking/placing. + if move_or_pickplace == 1.0: + # Picking logic. + if hand_empty: + # Fail sometimes. + if self._noise_rng.uniform() < self._pick_success_prob: + if obj_type_id == 1.0: + # Pick ball. + if self._action_grasps_object(act_x, act_y, ball, + state): + next_state.set(ball, "held", 1.0) + assert self._Holding_holds(next_state, [ball]) + else: + assert obj_type_id == 2.0 + if self._action_grasps_object(act_x, act_y, cup, + state): + # Pick cup. + next_state.set(cup, "held", 1.0) + if ball_in_cup: + # Pick both ball and cup simultaneously. + next_state.set(ball, "held", 1.0) + assert self._Holding_holds(next_state, [ball]) + assert self._Holding_holds(next_state, [cup]) + # Placing logic. + else: + if not hand_empty: + assert obj_being_held is not None + # Find the table for placing, if any. + table: Optional[Object] = None + for target in state.get_objects(self._table_type): + circ = self._object_to_geom(target, state) + if circ.contains_point(act_x, act_y): + table = target + break + if table is None: + # Put on the floor at the commanded position. + next_state = self._handle_placing_object( + act_x, act_y, next_state, obj_being_held, ball, + cup, ball_in_cup, ball_only) + # Release object being held. + if obj_being_held is not None: + next_state.set(obj_being_held, "held", 0.0) + assert self._OnFloor_holds(next_state, + [obj_being_held]) + else: + # Check that we are only attempting to place + # within our reachable radius. Note that we don't + # check this for placing on the floor, because the + # robot is allowed to 'throw' things onto the floor. + table_x = state.get(table, "x") + table_y = state.get(table, "y") + if self._euclidean_reachability_check( + state.get(robot, "x"), state.get(robot, "y"), + table_x, table_y): + # Release object being held. + if obj_being_held is not None: + next_state.set(obj_being_held, "held", 0.0) + if obj_type_id == 3.0: + # Possibly put on the table, or have it fall + # somewhere near. + fall_prob = self._place_sticky_fall_prob + if obj_being_held == ball: + fall_prob = self._place_ball_fall_prob + if self._table_is_sticky(table, state): + # Check if placing on the smooth part of + # the sticky table, and set fall prob + # accordingly. + sticky_region_x = state.get( + table, + "sticky_region_x_offset") + table_x + sticky_region_y = state.get( + table, + "sticky_region_y_offset") + table_y + sticky_region = utils.Circle( + sticky_region_x, sticky_region_y, + state.get(table, + "sticky_region_radius")) + if not sticky_region.contains_point( + act_x, act_y): + if obj_being_held == cup: + fall_prob = \ + self._place_smooth_fall_prob + else: + assert obj_being_held == ball + fall_prob = 1.0 + # Handle object falling or placing on table + # surface. + if self._noise_rng.uniform() < fall_prob: + fall_x, fall_y = \ + self._sample_floor_point_around_table( + table, state, self._noise_rng) + next_state = self._handle_placing_object( + fall_x, fall_y, next_state, + obj_being_held, ball, cup, ball_in_cup, + ball_only) + assert self._OnFloor_holds( + next_state, [obj_being_held]) + else: + next_state = self._handle_placing_object( + act_x, act_y, next_state, + obj_being_held, ball, cup, ball_in_cup, + ball_only) + assert self._OnTable_holds( + next_state, [obj_being_held, table]) + else: + # corresponding to placing in cup + assert obj_type_id == 2.0 + assert obj_being_held == ball + next_state.set(ball, "x", act_x) + next_state.set(ball, "y", act_y) + next_state.set(ball, "held", 0.0) + assert self._BallInCup_holds( + next_state, [ball, cup]) + if ball_only < 0.5: + assert self._HandEmpty_holds(next_state, []) + else: + # Navigation logic. + pseudo_next_state = state.copy() + pseudo_next_state.set(robot, "x", act_x) + pseudo_next_state.set(robot, "y", act_y) + if self.exists_robot_collision(pseudo_next_state): + return next_state # pragma: no cover + next_state.set(robot, "x", act_x) + next_state.set(robot, "y", act_y) + return next_state + + def _action_grasps_object(self, act_x: float, act_y: float, obj: Object, + state: State) -> bool: + obj_geom = self._object_to_geom(obj, state) + return obj_geom.contains_point(act_x, act_y) + + def _handle_placing_object(self, act_x: float, act_y: float, state: State, + obj_being_held: Object, ball: Object, + cup: Object, ball_in_cup: bool, + ball_only: float) -> State: + """Logic for correctly setting the location of the held object after + executing the place skill.""" + assert ball.type == self._ball_type + assert cup.type == self._cup_type + next_state = state.copy() + next_state.set(obj_being_held, "x", act_x) + next_state.set(obj_being_held, "y", act_y) + next_state.set(obj_being_held, "held", 0.0) + if ball_in_cup and obj_being_held == cup and ball_only < 0.5: + next_state.set(ball, "x", act_x) + next_state.set(ball, "y", act_y) + next_state.set(ball, "held", 0.0) + return next_state + + def _sample_floor_point_around_table( + self, table: Object, state: State, + rng: np.random.Generator) -> Tuple[float, float]: + x = state.get(table, "x") + y = state.get(table, "y") + radius = state.get(table, "radius") + dist_from_table = self.objs_scale * radius + dist = radius + rng.uniform(radius + dist_from_table, radius + + (1.15 * dist_from_table)) + theta = rng.uniform(0, 2 * np.pi) + sampled_x = x + dist * np.cos(theta) + sampled_y = y + dist * np.sin(theta) + while sampled_x < self.x_lb or sampled_x > self.x_ub or \ + sampled_y < self.y_lb or sampled_y > self.y_ub: + dist = radius + rng.uniform(radius + dist_from_table, radius + + (1.15 * dist_from_table)) + theta = rng.uniform(0, 2 * np.pi) + sampled_x = x + dist * np.cos(theta) + sampled_y = y + dist * np.sin(theta) + return (sampled_x, sampled_y) diff --git a/predicators/explorers/active_sampler_explorer.py b/predicators/explorers/active_sampler_explorer.py index 5b592ca40f..6f23e7ebff 100644 --- a/predicators/explorers/active_sampler_explorer.py +++ b/predicators/explorers/active_sampler_explorer.py @@ -213,7 +213,11 @@ def generate_goals() -> Iterator[Set[GroundAtom]]: for goal in generate_goals(): task = Task(state, goal) logging.info(f"[Explorer] Replanning to {task.goal}") - + # If the goal is empty, then we can just recursively + # call the policy, since we don't need to execute + # anything. + if len(goal) == 0: + return _option_policy(state) # pragma: no cover # Add this task to the re-planning task queue. self._replanning_tasks.append(task) diff --git a/predicators/ground_truth_models/ball_and_cup_sticky_table/__init__.py b/predicators/ground_truth_models/ball_and_cup_sticky_table/__init__.py new file mode 100644 index 0000000000..52ef5dd2f1 --- /dev/null +++ b/predicators/ground_truth_models/ball_and_cup_sticky_table/__init__.py @@ -0,0 +1,9 @@ +"""Ground-truth models for ball and cup sticky table environment.""" + +from .nsrts import BallAndCupStickyTableGroundTruthNSRTFactory +from .options import BallAndCupStickyTableGroundTruthOptionFactory + +__all__ = [ + "BallAndCupStickyTableGroundTruthNSRTFactory", + "BallAndCupStickyTableGroundTruthOptionFactory" +] diff --git a/predicators/ground_truth_models/ball_and_cup_sticky_table/nsrts.py b/predicators/ground_truth_models/ball_and_cup_sticky_table/nsrts.py new file mode 100644 index 0000000000..ef68c8c499 --- /dev/null +++ b/predicators/ground_truth_models/ball_and_cup_sticky_table/nsrts.py @@ -0,0 +1,533 @@ +"""Ground-truth NSRTs for the ball and cup sticky table environment.""" + +from typing import Dict, Sequence, Set + +import numpy as np + +from predicators.envs.ball_and_cup_sticky_table import BallAndCupStickyTableEnv +from predicators.ground_truth_models import GroundTruthNSRTFactory +from predicators.structs import NSRT, Array, GroundAtom, LiftedAtom, Object, \ + ParameterizedOption, Predicate, State, Type, Variable + + +class BallAndCupStickyTableGroundTruthNSRTFactory(GroundTruthNSRTFactory): + """Ground-truth NSRTs for the sticky table environment.""" + + @classmethod + def get_env_names(cls) -> Set[str]: + return {"ball_and_cup_sticky_table"} + + @staticmethod + def get_nsrts(env_name: str, types: Dict[str, Type], + predicates: Dict[str, Predicate], + options: Dict[str, ParameterizedOption]) -> Set[NSRT]: + + # Types + robot_type = types["robot"] + ball_type = types["ball"] + cup_type = types["cup"] + table_type = types["table"] + + # Predicates + BallOnTable = predicates["BallOnTable"] + BallOnFloor = predicates["BallOnFloor"] + HoldingBall = predicates["HoldingBall"] + CupOnTable = predicates["CupOnTable"] + CupOnFloor = predicates["CupOnFloor"] + HoldingCup = predicates["HoldingCup"] + HandEmpty = predicates["HandEmpty"] + ReachableSurface = predicates["IsReachableSurface"] + ReachableBall = predicates["IsReachableBall"] + ReachableCup = predicates["IsReachableCup"] + BallInCup = predicates["BallInCup"] + BallNotInCup = predicates["BallNotInCup"] + + # Options + NavigateToTable = options["NavigateToTable"] + PickBallFromTable = options["PickBallFromTable"] + PickBallFromFloor = options["PickBallFromFloor"] + PlaceBallOnTable = options["PlaceBallOnTable"] + PlaceBallOnFloor = options["PlaceBallOnFloor"] + PickCupWithoutBallFromTable = options["PickCupWithoutBallFromTable"] + PickCupWithBallFromTable = options["PickCupWithBallFromTable"] + PickCupWithoutBallFromFloor = options["PickCupWithoutBallFromFloor"] + PickCupWithBallFromFloor = options["PickCupWithBallFromFloor"] + PlaceCupWithoutBallOnTable = options["PlaceCupWithoutBallOnTable"] + PlaceCupWithBallOnFloor = options["PlaceCupWithBallOnFloor"] + PlaceCupWithoutBallOnFloor = options["PlaceCupWithoutBallOnFloor"] + PlaceBallInCupOnFloor = options["PlaceBallInCupOnFloor"] + PlaceBallInCupOnTable = options["PlaceBallInCupOnTable"] + NavigateToBall = options["NavigateToBall"] + NavigateToCup = options["NavigateToCup"] + + nsrts = set() + + # PickBallFromTable + robot = Variable("?robot", robot_type) + table = Variable("?table", table_type) + ball = Variable("?ball", ball_type) + cup = Variable("?cup", cup_type) + parameters = [robot, ball, cup, table] + option_vars = parameters + option = PickBallFromTable + preconditions = { + LiftedAtom(BallNotInCup, [ball, cup]), + LiftedAtom(ReachableSurface, [robot, table]), + LiftedAtom(BallOnTable, [ball, table]), + LiftedAtom(HandEmpty, []), + } + add_effects = {LiftedAtom(HoldingBall, [ball])} + delete_effects = { + LiftedAtom(BallOnTable, [ball, table]), + LiftedAtom(HandEmpty, []), + } + ignore_effects = {BallInCup} + + def pick_obj_sampler(state: State, goal: Set[GroundAtom], + rng: np.random.Generator, + objs: Sequence[Object]) -> Array: + # Sample within ball around center of the object. + del goal # unused + obj = objs[1] + if obj.type.name == "ball": + obj_type_id = 1.0 + else: + assert obj.type.name == "cup" + obj_type_id = 2.0 + size = state.get(obj, "radius") + obj_x = state.get(obj, "x") + size / 2 + obj_y = state.get(obj, "y") + size / 2 + dist = rng.uniform(0, size / 4) + theta = rng.uniform(0, 2 * np.pi) + x = obj_x + dist * np.cos(theta) + y = obj_y + dist * np.sin(theta) + return np.array([1.0, obj_type_id, 0.0, x, y], dtype=np.float32) + + pickballfromtable_nsrt = NSRT("PickBallFromTable", parameters, + preconditions, add_effects, + delete_effects, ignore_effects, option, + option_vars, pick_obj_sampler) + nsrts.add(pickballfromtable_nsrt) + + # PickBallFromFloor + parameters = [robot, ball, cup] + option_vars = parameters + option = PickBallFromFloor + preconditions = { + LiftedAtom(ReachableBall, [robot, ball]), + LiftedAtom(BallOnFloor, [ball]), + LiftedAtom(HandEmpty, []), + } + add_effects = { + LiftedAtom(HoldingBall, [ball]), + LiftedAtom(BallNotInCup, [ball, cup]) + } + delete_effects = { + LiftedAtom(BallOnFloor, [ball]), + LiftedAtom(HandEmpty, []), + LiftedAtom(BallInCup, [ball, cup]) + } + ignore_effects = set() + pickballfromfloor_nsrt = NSRT("PickBallFromFloor", parameters, + preconditions, add_effects, + delete_effects, ignore_effects, option, + option_vars, pick_obj_sampler) + nsrts.add(pickballfromfloor_nsrt) + + # PickCupWithoutBallFromTable + parameters = [robot, cup, ball, table] + option_vars = parameters + option = PickCupWithoutBallFromTable + preconditions = { + LiftedAtom(BallNotInCup, [ball, cup]), + LiftedAtom(ReachableSurface, [robot, table]), + LiftedAtom(CupOnTable, [cup, table]), + LiftedAtom(HandEmpty, []), + } + add_effects = {LiftedAtom(HoldingCup, [cup])} + delete_effects = { + LiftedAtom(CupOnTable, [cup, table]), + LiftedAtom(HandEmpty, []), + } + pickcupwithoutballfromtable_nsrt = NSRT("PickCupWithoutBallFromTable", + parameters, preconditions, + add_effects, delete_effects, + set(), option, option_vars, + pick_obj_sampler) + nsrts.add(pickcupwithoutballfromtable_nsrt) + + # PickCupWithBallFromTable + parameters = [robot, cup, ball, table] + option_vars = parameters + option = PickCupWithBallFromTable + preconditions = { + LiftedAtom(BallInCup, [ball, cup]), + LiftedAtom(ReachableSurface, [robot, table]), + LiftedAtom(CupOnTable, [cup, table]), + LiftedAtom(HandEmpty, []), + LiftedAtom(BallOnTable, [ball, table]) + } + add_effects = { + LiftedAtom(HoldingCup, [cup]), + LiftedAtom(HoldingBall, [ball]) + } + delete_effects = { + LiftedAtom(CupOnTable, [cup, table]), + LiftedAtom(HandEmpty, []), + LiftedAtom(BallOnTable, [ball, table]) + } + pickcupwithoutballfromtable_nsrt = NSRT("PickCupWithBallFromTable", + parameters, preconditions, + add_effects, delete_effects, + set(), option, option_vars, + pick_obj_sampler) + nsrts.add(pickcupwithoutballfromtable_nsrt) + + # PickCupWithoutBallFromFloor + parameters = [robot, cup, ball] + option_vars = parameters + option = PickCupWithoutBallFromFloor + preconditions = { + LiftedAtom(BallNotInCup, [ball, cup]), + LiftedAtom(ReachableCup, [robot, cup]), + LiftedAtom(CupOnFloor, [cup]), + LiftedAtom(HandEmpty, []), + } + add_effects = {LiftedAtom(HoldingCup, [cup])} + delete_effects = { + LiftedAtom(CupOnFloor, [cup]), + LiftedAtom(HandEmpty, []), + } + pickcupwithoutballfromfloor_nsrt = NSRT("PickCupWithoutBallFromFloor", + parameters, preconditions, + add_effects, delete_effects, + set(), option, option_vars, + pick_obj_sampler) + nsrts.add(pickcupwithoutballfromfloor_nsrt) + + # PickCupWithBallFromFloor + parameters = [robot, cup, ball] + option_vars = parameters + option = PickCupWithBallFromFloor + preconditions = { + LiftedAtom(BallOnFloor, [ball]), + LiftedAtom(BallInCup, [ball, cup]), + LiftedAtom(ReachableBall, [robot, ball]), + LiftedAtom(CupOnFloor, [cup]), + LiftedAtom(HandEmpty, []), + } + add_effects = { + LiftedAtom(HoldingCup, [cup]), + LiftedAtom(HoldingBall, [ball]) + } + delete_effects = { + LiftedAtom(CupOnFloor, [cup]), + LiftedAtom(HandEmpty, []), + LiftedAtom(BallOnFloor, [ball]) + } + pickcupwithballfromfloor_nsrt = NSRT("PickCupWithBallFromFloor", + parameters, preconditions, + add_effects, delete_effects, + set(), option, option_vars, + pick_obj_sampler) + nsrts.add(pickcupwithballfromfloor_nsrt) + + # PlaceBallOnTable + parameters = [robot, ball, cup, table] + option_vars = parameters + option = PlaceBallOnTable + preconditions = { + LiftedAtom(BallNotInCup, [ball, cup]), + LiftedAtom(ReachableSurface, [robot, table]), + LiftedAtom(HoldingBall, [ball]) + } + add_effects = { + LiftedAtom(BallOnTable, [ball, table]), + LiftedAtom(HandEmpty, []), + } + delete_effects = {LiftedAtom(HoldingBall, [ball])} + + def place_on_table_sampler(state: State, goal: Set[GroundAtom], + rng: np.random.Generator, + objs: Sequence[Object]) -> Array: + del goal # unused + table = objs[-1] + obj = objs[-2] + table_x = state.get(table, "x") + table_y = state.get(table, "y") + table_radius = state.get(table, "radius") + assert obj.type.name in ["ball", "cup"] + size = state.get(obj, "radius") * 2 + dist = rng.uniform(0, table_radius - size) + theta = rng.uniform(0, 2 * np.pi) + x = table_x + dist * np.cos(theta) + y = table_y + dist * np.sin(theta) + # NOTE: set obj_type_id to 3.0, since we want to + # place onto the table! + return np.array([1.0, 3.0, 0.0, x, y], dtype=np.float32) + + placeballontable_nsrt = NSRT("PlaceBallOnTable", parameters, + preconditions, add_effects, + delete_effects, set(), option, + option_vars, place_on_table_sampler) + nsrts.add(placeballontable_nsrt) + + # PlaceBallOnFloor + parameters = [robot, cup, ball] + option_vars = parameters + option = PlaceBallOnFloor + preconditions = {LiftedAtom(HoldingBall, [ball])} + add_effects = { + LiftedAtom(BallNotInCup, [ball, cup]), + LiftedAtom(BallOnFloor, [ball]), + } + delete_effects = {LiftedAtom(HoldingBall, [ball])} + ignore_effects = {BallInCup, ReachableBall} + + def place_on_floor_sampler(state: State, goal: Set[GroundAtom], + rng: np.random.Generator, + objs: Sequence[Object]) -> Array: + del goal # not used + obj_to_place = objs[-1] + size = state.get(obj_to_place, "radius") * 2 + dist = rng.uniform(0, size) + theta = rng.uniform(0, 2 * np.pi) + # Just place in a small radius near the center of the room. + x_c = (BallAndCupStickyTableEnv.x_lb + + BallAndCupStickyTableEnv.x_ub) / 2 + y_c = (BallAndCupStickyTableEnv.y_lb + + BallAndCupStickyTableEnv.y_ub) / 2 + x = x_c + dist * np.cos(theta) + y = y_c + dist * np.sin(theta) + # NOTE: obj_type_id set to 0.0 since it doesn't matter. + return np.array([1.0, 0.0, 0.0, x, y], dtype=np.float32) + + def place_ball_on_floor_sampler(state: State, goal: Set[GroundAtom], + rng: np.random.Generator, + objs: Sequence[Object]) -> Array: + sample_arr = place_on_floor_sampler(state, goal, rng, objs) + # In this case, we need to manipulate the ball separately from the + # cup! + sample_arr[2] = 1.0 + return sample_arr + + placeballonfloor_nsrt = NSRT("PlaceBallOnFloor", parameters, + preconditions, add_effects, + delete_effects, ignore_effects, option, + option_vars, place_ball_on_floor_sampler) + nsrts.add(placeballonfloor_nsrt) + + # PlaceBallInCupOnFloor + parameters = [robot, ball, cup] + option_vars = parameters + option = PlaceBallInCupOnFloor + preconditions = { + LiftedAtom(BallNotInCup, [ball, cup]), + LiftedAtom(ReachableCup, [robot, cup]), + LiftedAtom(CupOnFloor, [cup]), + LiftedAtom(HoldingBall, [ball]) + } + add_effects = { + LiftedAtom(BallInCup, [ball, cup]), + LiftedAtom(BallOnFloor, [ball]), + LiftedAtom(HandEmpty, []), + } + delete_effects = { + LiftedAtom(HoldingBall, [ball]), + LiftedAtom(BallNotInCup, [ball, cup]) + } + + def place_ball_in_cup_sampler(state: State, goal: Set[GroundAtom], + rng: np.random.Generator, + objs: Sequence[Object]) -> Array: + del rng, goal # unused + cup = objs[2] + # Just place the ball in the middle of the cup. Set + # the type id to be 2.0 to correspond to the cup + return np.array( + [1.0, 2.0, 0.0, + state.get(cup, "x"), + state.get(cup, "y")], + dtype=np.float32) + + placeballincuponfloor_nsrt = NSRT("PlaceBallInCupOnFloor", parameters, + preconditions, add_effects, + delete_effects, set(), option, + option_vars, + place_ball_in_cup_sampler) + nsrts.add(placeballincuponfloor_nsrt) + + # PlaceBallInCupOnTable + parameters = [robot, ball, cup, table] + option_vars = parameters + option = PlaceBallInCupOnTable + preconditions = { + LiftedAtom(ReachableSurface, [robot, table]), + LiftedAtom(CupOnTable, [cup, table]), + LiftedAtom(HoldingBall, [ball]), + LiftedAtom(BallNotInCup, [ball, cup]), + } + add_effects = { + LiftedAtom(BallInCup, [ball, cup]), + LiftedAtom(BallOnTable, [ball, table]), + LiftedAtom(HandEmpty, []), + } + delete_effects = { + LiftedAtom(HoldingBall, [ball]), + LiftedAtom(BallNotInCup, [ball, cup]), + } + placeballincupontable_nsrt = NSRT("PlaceBallInCupOnTable", parameters, + preconditions, add_effects, + delete_effects, set(), option, + option_vars, + place_ball_in_cup_sampler) + nsrts.add(placeballincupontable_nsrt) + + # PlaceCupWithoutBallOnTable + parameters = [robot, ball, cup, table] + option_vars = parameters + option = PlaceCupWithoutBallOnTable + preconditions = { + LiftedAtom(ReachableSurface, [robot, table]), + LiftedAtom(HoldingCup, [cup]), + LiftedAtom(BallNotInCup, [ball, cup]) + } + add_effects = { + LiftedAtom(CupOnTable, [cup, table]), + LiftedAtom(HandEmpty, []), + } + delete_effects = {LiftedAtom(HoldingCup, [cup])} + placecupwithoutballontable_nsrt = NSRT("PlaceCupWithoutBallOnTable", + parameters, preconditions, + add_effects, delete_effects, + set(), option, option_vars, + place_on_table_sampler) + nsrts.add(placecupwithoutballontable_nsrt) + + # PlaceCupWithoutBallOnFloor + parameters = [robot, ball, cup] + option_vars = parameters + option = PlaceCupWithoutBallOnFloor + preconditions = { + LiftedAtom(HoldingCup, [cup]), + LiftedAtom(BallNotInCup, [ball, cup]) + } + add_effects = { + LiftedAtom(CupOnFloor, [cup]), + LiftedAtom(HandEmpty, []), + } + delete_effects = {LiftedAtom(HoldingCup, [cup])} + placecupwithoutballonfloor_nsrt = NSRT("PlaceCupWithoutBallOnFloor", + parameters, preconditions, + add_effects, delete_effects, + set(), option, option_vars, + place_on_floor_sampler) + nsrts.add(placecupwithoutballonfloor_nsrt) + + # PlaceCupWithBallOnFloor + parameters = [robot, ball, cup] + option_vars = parameters + option = PlaceCupWithBallOnFloor + preconditions = { + LiftedAtom(HoldingCup, [cup]), + LiftedAtom(BallInCup, [ball, cup]) + } + add_effects = { + LiftedAtom(CupOnFloor, [cup]), + LiftedAtom(HandEmpty, []), + LiftedAtom(BallOnFloor, [ball]) + } + delete_effects = { + LiftedAtom(HoldingCup, [cup]), + LiftedAtom(HoldingBall, [ball]) + } + placecupwithballonfloor_nsrt = NSRT("PlaceCupWithBallOnFloor", + parameters, preconditions, + add_effects, delete_effects, set(), + option, option_vars, + place_on_floor_sampler) + nsrts.add(placecupwithballonfloor_nsrt) + + # NavigateToBall + parameters = [robot, ball] + option_vars = parameters + option = NavigateToBall + preconditions = set() + add_effects = {LiftedAtom(ReachableBall, [robot, ball])} + ignore_effects = {ReachableSurface, ReachableBall, ReachableCup} + + def navigate_to_obj_sampler(state: State, goal: Set[GroundAtom], + rng: np.random.Generator, + objs: Sequence[Object]) -> Array: + del goal # not used + robot, obj = objs + assert obj.type.name in ["table", "cup", "ball"] + size = state.get(obj, "radius") + obj_x = state.get(obj, "x") + obj_y = state.get(obj, "y") + max_dist = BallAndCupStickyTableEnv.reachable_thresh + # NOTE: This must terminate for the problem to + # be feasible, which it is basically guaranteed to + # be, so there should be no worries that this will + # loop forever. + while True: + dist = rng.uniform(size, max_dist) + theta = rng.uniform(0, 2 * np.pi) + x = obj_x + dist * np.cos(theta) + y = obj_y + dist * np.sin(theta) + # If this x and y is not in the env bounds, + # continue. + if x < BallAndCupStickyTableEnv.x_lb or \ + x > BallAndCupStickyTableEnv.x_ub or \ + y < BallAndCupStickyTableEnv.y_lb or \ + y > BallAndCupStickyTableEnv.y_ub: + continue # pragma: no cover + pseudo_next_state = state.copy() + pseudo_next_state.set(robot, "x", x) + pseudo_next_state.set(robot, "y", y) + if not BallAndCupStickyTableEnv.exists_robot_collision( + pseudo_next_state): + if obj.is_instance(cup_type): + assert ReachableCup.holds(pseudo_next_state, + [robot, obj]) + elif obj.is_instance(ball_type): + assert ReachableBall.holds(pseudo_next_state, + [robot, obj]) + elif obj.is_instance(table_type): + assert ReachableSurface.holds(pseudo_next_state, + [robot, obj]) + break + # NOTE: obj_type_id set to 0.0 since it doesn't matter. + return np.array([0.0, 0.0, 0.0, x, y], dtype=np.float32) + + navigatetoball_nsrt = NSRT("NavigateToBall", parameters, preconditions, + add_effects, set(), ignore_effects, option, + option_vars, navigate_to_obj_sampler) + nsrts.add(navigatetoball_nsrt) + + # NavigateToCup + parameters = [robot, cup] + option_vars = parameters + option = NavigateToCup + preconditions = set() + add_effects = {LiftedAtom(ReachableCup, [robot, cup])} + ignore_effects = {ReachableSurface, ReachableBall, ReachableCup} + navigatetocup_nsrt = NSRT("NavigateToCup", parameters, preconditions, + add_effects, set(), ignore_effects, option, + option_vars, navigate_to_obj_sampler) + nsrts.add(navigatetocup_nsrt) + + # NavigateToTable + parameters = [robot, table] + option_vars = parameters + option = NavigateToTable + preconditions = set() + add_effects = {LiftedAtom(ReachableSurface, [robot, table])} + ignore_effects = {ReachableSurface, ReachableBall, ReachableCup} + navigatetotable_nsrt = NSRT("NavigateToTable", + parameters, preconditions, add_effects, + set(), ignore_effects, option, option_vars, + navigate_to_obj_sampler) + nsrts.add(navigatetotable_nsrt) + + return nsrts diff --git a/predicators/ground_truth_models/ball_and_cup_sticky_table/options.py b/predicators/ground_truth_models/ball_and_cup_sticky_table/options.py new file mode 100644 index 0000000000..bc8d4c40f4 --- /dev/null +++ b/predicators/ground_truth_models/ball_and_cup_sticky_table/options.py @@ -0,0 +1,198 @@ +"""Ground-truth options for the ball and cup sticky table environment.""" + +from typing import Dict, Sequence, Set + +import numpy as np +from gym.spaces import Box + +from predicators import utils +from predicators.envs.ball_and_cup_sticky_table import BallAndCupStickyTableEnv +from predicators.ground_truth_models import GroundTruthOptionFactory +from predicators.structs import Action, Array, Object, ParameterizedOption, \ + ParameterizedPolicy, Predicate, State, Type + + +class BallAndCupStickyTableGroundTruthOptionFactory(GroundTruthOptionFactory): + """Ground-truth options for the sticky table environment.""" + + @classmethod + def get_env_names(cls) -> Set[str]: + return {"ball_and_cup_sticky_table"} + + @classmethod + def get_options(cls, env_name: str, types: Dict[str, Type], + predicates: Dict[str, Predicate], + action_space: Box) -> Set[ParameterizedOption]: + + cup_type = types["cup"] + ball_type = types["ball"] + table_type = types["table"] + # Parameters are move_or_pickplace, obj_type_id, ball_only, + # absolute x, y actions. + params_space = Box( + np.array([ + 0.0, 0.0, 0.0, BallAndCupStickyTableEnv.x_lb, + BallAndCupStickyTableEnv.y_lb + ]), + np.array([ + 1.0, 3.0, 1.0, BallAndCupStickyTableEnv.x_ub, + BallAndCupStickyTableEnv.y_ub + ])) + robot_type = types["robot"] + + PickBallFromTable = utils.SingletonParameterizedOption( + # variables: [robot, ball, table] + "PickBallFromTable", + cls._create_pass_through_policy(action_space), + params_space=params_space, + types=[robot_type, ball_type, cup_type, table_type]) + + PickBallFromFloor = utils.SingletonParameterizedOption( + # variables: [robot, ball] + "PickBallFromFloor", + cls._create_pass_through_policy(action_space), + # Parameters are absolute x, y actions. + params_space=params_space, + types=[robot_type, ball_type, cup_type]) + + PlaceBallOnTable = utils.SingletonParameterizedOption( + # variables: [robot, ball, cup, table] + "PlaceBallOnTable", + cls._create_pass_through_policy(action_space), + # Parameters are absolute x, y actions. + params_space=params_space, + types=[robot_type, ball_type, cup_type, table_type]) + + PlaceBallOnFloor = utils.SingletonParameterizedOption( + # variables: [robot, cup, ball] + "PlaceBallOnFloor", + cls._create_pass_through_policy(action_space), + # Parameters are absolute x, y actions. + params_space=params_space, + types=[robot_type, cup_type, ball_type]) + + PickCupWithoutBallFromTable = utils.SingletonParameterizedOption( + # variables: [robot, cup, ball, table] + "PickCupWithoutBallFromTable", + cls._create_pass_through_policy(action_space), + params_space=params_space, + types=[robot_type, cup_type, ball_type, table_type]) + + PickCupWithBallFromTable = utils.SingletonParameterizedOption( + # variables: [robot, cup, ball, table] + "PickCupWithBallFromTable", + cls._create_pass_through_policy(action_space), + params_space=params_space, + types=[robot_type, cup_type, ball_type, table_type]) + + PickCupWithoutBallFromFloor = utils.SingletonParameterizedOption( + # variables: [robot, cup, ball] + "PickCupWithoutBallFromFloor", + cls._create_pass_through_policy(action_space), + # Parameters are absolute x, y actions. + params_space=params_space, + types=[robot_type, cup_type, ball_type]) + + PickCupWithBallFromFloor = utils.SingletonParameterizedOption( + # variables: [robot, cup, ball] + "PickCupWithBallFromFloor", + cls._create_pass_through_policy(action_space), + # Parameters are absolute x, y actions. + params_space=params_space, + types=[robot_type, cup_type, ball_type]) + + PlaceCupWithoutBallOnTable = utils.SingletonParameterizedOption( + # variables: [robot, ball, cup, table] + "PlaceCupWithoutBallOnTable", + cls._create_pass_through_policy(action_space), + # Parameters are absolute x, y actions. + params_space=params_space, + types=[robot_type, ball_type, cup_type, table_type]) + + PlaceCupWithBallOnFloor = utils.SingletonParameterizedOption( + # variables: [robot, ball, cup] + "PlaceCupWithBallOnFloor", + cls._create_pass_through_policy(action_space), + # Parameters are absolute x, y actions. + params_space=params_space, + types=[robot_type, ball_type, cup_type]) + + PlaceCupWithoutBallOnFloor = utils.SingletonParameterizedOption( + # variables: [robot, ball, cup] + "PlaceCupWithoutBallOnFloor", + cls._create_pass_through_policy(action_space), + # Parameters are absolute x, y actions. + params_space=params_space, + types=[robot_type, ball_type, cup_type]) + + PlaceBallInCupOnFloor = utils.SingletonParameterizedOption( + # variables: [robot, ball, cup] + "PlaceBallInCupOnFloor", + cls._create_pass_through_policy(action_space), + # Parameters are absolute x, y actions. + params_space=params_space, + types=[robot_type, ball_type, cup_type]) + + PlaceBallInCupOnTable = utils.SingletonParameterizedOption( + # variables: [robot, ball, cup] + "PlaceBallInCupOnTable", + cls._create_pass_through_policy(action_space), + # Parameters are absolute x, y actions. + params_space=params_space, + types=[robot_type, ball_type, cup_type, table_type]) + + NavigateToTable = utils.SingletonParameterizedOption( + # variables: [robot, table] + "NavigateToTable", + cls._create_pass_through_policy(action_space), + # Parameters are absolute x, y actions. + params_space=params_space, + types=[robot_type, table_type]) + + NavigateToBall = utils.SingletonParameterizedOption( + # variables: [robot, ball] + "NavigateToBall", + cls._create_pass_through_policy(action_space), + # Parameters are absolute x, y actions. + params_space=params_space, + types=[robot_type, ball_type]) + + NavigateToCup = utils.SingletonParameterizedOption( + # variables: [robot, cup] + "NavigateToCup", + cls._create_pass_through_policy(action_space), + # Parameters are absolute x, y actions. + params_space=params_space, + types=[robot_type, cup_type]) + + return { + NavigateToTable, + PickBallFromTable, + PickBallFromFloor, + PlaceBallOnTable, + PlaceBallOnFloor, + PickCupWithoutBallFromTable, + PickCupWithBallFromTable, + PickCupWithoutBallFromFloor, + PickCupWithBallFromFloor, #PlaceCupWithBallOnTable, + PlaceCupWithoutBallOnTable, + PlaceCupWithBallOnFloor, + PlaceCupWithoutBallOnFloor, + PlaceBallInCupOnFloor, + PlaceBallInCupOnTable, + NavigateToBall, + NavigateToCup + } + + @classmethod + def _create_pass_through_policy(cls, + action_space: Box) -> ParameterizedPolicy: + + def policy(state: State, memory: Dict, objects: Sequence[Object], + params: Array) -> Action: + del state, memory, objects # unused + arr = np.array(params, dtype=np.float32) + arr = np.clip(arr, action_space.low, action_space.high) + return Action(arr) + + return policy diff --git a/predicators/settings.py b/predicators/settings.py index 4852530f2d..f8aa365902 100644 --- a/predicators/settings.py +++ b/predicators/settings.py @@ -328,6 +328,12 @@ class GlobalSettings: sticky_table_place_sticky_fall_prob = 0.05 sticky_table_pick_success_prob = 0.9 sticky_table_tricky_floor_place_sticky_fall_prob = 0.5 + sticky_table_num_tables = 5 # cannot be less than 3 + sticky_table_place_smooth_fall_prob = 0.6 + sticky_table_place_sticky_fall_prob = 0.00 + sticky_table_place_ball_fall_prob = 1.00 + sticky_table_pick_success_prob = 1.00 + sticky_table_num_sticky_tables = 1 # must be less than the num_tables # grid row env parameters grid_row_num_cells = 100 diff --git a/predicators/utils.py b/predicators/utils.py index 11db9e37ab..c674e60bab 100644 --- a/predicators/utils.py +++ b/predicators/utils.py @@ -278,27 +278,54 @@ def construct_active_sampler_input(state: State, objects: Sequence[Object], else: assert CFG.active_sampler_learning_feature_selection == "oracle" - assert CFG.env == "bumpy_cover" - if param_option.name == "Pick": - # In this case, the x-data should be - # [block_bumpy, relative_pick_loc] - assert len(objects) == 1 - block = objects[0] - block_pos = state[block][3] - block_bumpy = state[block][5] - sampler_input_lst.append(block_bumpy) - assert len(params) == 1 - sampler_input_lst.append(params[0] - block_pos) + if CFG.env == "bumpy_cover": + if param_option.name == "Pick": + # In this case, the x-data should be + # [block_bumpy, relative_pick_loc] + assert len(objects) == 1 + block = objects[0] + block_pos = state[block][3] + block_bumpy = state[block][5] + sampler_input_lst.append(block_bumpy) + assert len(params) == 1 + sampler_input_lst.append(params[0] - block_pos) + else: + assert param_option.name == "Place" + assert len(objects) == 2 + block, target = objects + target_pos = state[target][3] + grasp = state[block][4] + target_width = state[target][2] + sampler_input_lst.extend([grasp, target_width]) + assert len(params) == 1 + sampler_input_lst.append(params[0] - target_pos) + elif CFG.env == "ball_and_cup_sticky_table": + if "PlaceCup" in param_option.name and "Table" in param_option.name: + _, _, _, table = objects + table_y = state.get(table, "y") + table_x = state.get(table, "x") + sticky = state.get(table, "sticky") + sticky_region_x = state.get(table, "sticky_region_x_offset") + sticky_region_y = state.get(table, "sticky_region_y_offset") + sticky_region_radius = state.get(table, "sticky_region_radius") + table_radius = state.get(table, "radius") + _, _, _, param_x, param_y = params + sampler_input_lst.append(table_radius) + sampler_input_lst.append(sticky) + sampler_input_lst.append(sticky_region_x) + sampler_input_lst.append(sticky_region_y) + sampler_input_lst.append(sticky_region_radius) + sampler_input_lst.append(table_x) + sampler_input_lst.append(table_y) + sampler_input_lst.append(param_x) + sampler_input_lst.append(param_y) + else: # Use all features. + for obj in objects: + sampler_input_lst.extend(state[obj]) + sampler_input_lst.extend(params) else: - assert param_option.name == "Place" - assert len(objects) == 2 - block, target = objects - target_pos = state[target][3] - grasp = state[block][4] - target_width = state[target][2] - sampler_input_lst.extend([grasp, target_width]) - assert len(params) == 1 - sampler_input_lst.append(params[0] - target_pos) + raise NotImplementedError("Oracle feature selection not " + f"implemented for {CFG.env}") return np.array(sampler_input_lst) @@ -362,6 +389,12 @@ def plot(self, ax: plt.Axes, **kwargs: Any) -> None: def contains_point(self, x: float, y: float) -> bool: return (x - self.x)**2 + (y - self.y)**2 <= self.radius**2 + def contains_circle(self, other_circle: Circle) -> bool: + """Check whether this circle wholly contains another one.""" + dist_between_centers = np.sqrt((other_circle.x - self.x)**2 + + (other_circle.y - self.y)**2) + return (dist_between_centers + other_circle.radius) <= self.radius + @dataclass(frozen=True) class Triangle(_Geom2D): @@ -3452,3 +3485,48 @@ def beta_from_mean_and_variance(mean: float, rv = BetaRV(alpha, beta) assert abs(rv.mean() - mean) < 1e-6 return rv + + +def _obs_to_state_pass_through(obs: Observation) -> State: + """Helper for run_ground_nsrt_with_assertions.""" + assert isinstance(obs, State) + return obs + + +def run_ground_nsrt_with_assertions(ground_nsrt: _GroundNSRT, + state: State, + env: BaseEnv, + rng: np.random.Generator, + override_params: Optional[Array] = None, + obs_to_state: Callable[ + [Observation], + State] = _obs_to_state_pass_through, + assert_effects: bool = True, + max_steps: int = 400) -> State: + """Utility for tests. + + NOTE: assumes that the internal state of env corresponds to state. + """ + ground_nsrt_str = f"{ground_nsrt.name}{ground_nsrt.objects}" + for atom in ground_nsrt.preconditions: + assert atom.holds(state), \ + f"Precondition for {ground_nsrt_str} failed: {atom}" + option = ground_nsrt.sample_option(state, set(), rng) + if override_params is not None: + option = option.parent.ground(option.objects, + override_params) # pragma: no cover + assert option.initiable(state) + for _ in range(max_steps): + act = option.policy(state) + obs = env.step(act) + state = obs_to_state(obs) + if option.terminal(state): + break + if assert_effects: + for atom in ground_nsrt.add_effects: + assert atom.holds(state), \ + f"Add effect for {ground_nsrt_str} failed: {atom}" + for atom in ground_nsrt.delete_effects: + assert not atom.holds(state), \ + f"Delete effect for {ground_nsrt_str} failed: {atom}" + return state diff --git a/scripts/configs/active_sampler_learning.yaml b/scripts/configs/active_sampler_learning.yaml index ba23443b23..082135e0a8 100644 --- a/scripts/configs/active_sampler_learning.yaml +++ b/scripts/configs/active_sampler_learning.yaml @@ -36,12 +36,31 @@ ENVS: NAME: "grid_row" FLAGS: grid_row_num_cells: 3 + max_num_steps_interaction_request: 500 + active_sampler_learning_explore_length_base: 100000 # effectively disable + active_sampler_learning_feature_selection: all grid_row: NAME: "grid_row" - sticky_table: - NAME: "sticky_table" - sticky_table_tricky_floor: - NAME: "sticky_table_tricky_floor" + FLAGS: + max_num_steps_interaction_request: 500 + active_sampler_learning_explore_length_base: 100000 # effectively disable + active_sampler_learning_feature_selection: all + ball_and_cup_sticky_table: + NAME: "ball_and_cup_sticky_table" + FLAGS: + sticky_table_place_smooth_fall_prob: 1.00 + sticky_table_place_sticky_fall_prob: 0.00 + sticky_table_pick_success_prob: 1.0 + sticky_table_num_sticky_tables: 1 + sticky_table_num_tables: 5 + sticky_table_place_ball_fall_prob: 1.00 + active_sampler_learning_explore_length_base: 25 + active_sampler_learning_exploration_epsilon: 0.5 + skill_competence_model_optimistic_recency_size: 2 + skill_competence_model_optimistic_window_size: 2 + horizon: 8 + active_sampler_learning_explore_length_base: 100000 # effectively disable + active_sampler_learning_feature_selection: oracle regional_bumpy_cover: NAME: "regional_bumpy_cover" FLAGS: @@ -53,11 +72,13 @@ ENVS: cover_block_widths: '[0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01]' cover_num_targets: 10 cover_target_widths: '[0.008,0.008,0.008,0.008,0.008,0.008,0.008,0.008,0.008,0.008]' + active_sampler_learning_feature_selection: all kitchen: NAME: "kitchen" FLAGS: num_train_tasks: 100 max_num_steps_option_rollout: 250 + active_sampler_learning_feature_selection: all ARGS: - "debug" FLAGS: @@ -73,10 +94,10 @@ FLAGS: active_sampler_learning_model: "myopic_classifier_mlp" active_sampler_learning_use_teacher: False online_nsrt_learning_requests_per_cycle: 1 - max_num_steps_interaction_request: 500 + max_num_steps_interaction_request: 100 num_online_learning_cycles: 10 - active_sampler_learning_explore_length_base: 100000 # effectively disable sesame_task_planner: "fdopt-costs" sesame_grounder: "fd_translator" + active_sampler_learning_n_iter_no_change: 5000 START_SEED: 456 -NUM_SEEDS: 10 +NUM_SEEDS: 10 \ No newline at end of file diff --git a/scripts/plotting/create_active_sampler_learning_plots.py b/scripts/plotting/create_active_sampler_learning_plots.py index 80e1f3f378..8fde3caefa 100644 --- a/scripts/plotting/create_active_sampler_learning_plots.py +++ b/scripts/plotting/create_active_sampler_learning_plots.py @@ -117,7 +117,7 @@ def _derive_per_task_average(metric: str, ("Random Skills", "blue", lambda df: df["EXPERIMENT_ID"].apply( lambda v: "grid_row-random_nsrts_explore" in v)), ], - "Sticky Table": [ + "Ball and Cup Sticky Table": [ ("Planning Progress", "green", lambda df: df["EXPERIMENT_ID"].apply( lambda v: "sticky_table-planning_progress_explore" in v)), ("Task Repeat", "orange", lambda df: df["EXPERIMENT_ID"].apply( @@ -129,19 +129,6 @@ def _derive_per_task_average(metric: str, ("Random Skills", "blue", lambda df: df["EXPERIMENT_ID"].apply( lambda v: "sticky_table-random_nsrts_explore" in v)), ], - "Sticky Table Tricky Floor": [ - ("Planning Progress", "green", lambda df: df["EXPERIMENT_ID"]. - apply(lambda v: "sticky_table_tricky_floor-planning_progress_explore" - in v)), - ("Task Repeat", "orange", lambda df: df["EXPERIMENT_ID"].apply( - lambda v: "sticky_table_tricky_floor-task_repeat_explore" in v)), - ("Fail Focus", "red", lambda df: df["EXPERIMENT_ID"].apply( - lambda v: "sticky_table_tricky_floor-success_rate_explore" in v)), - ("Task-Relevant", "purple", lambda df: df["EXPERIMENT_ID"].apply( - lambda v: "sticky_table_tricky_floor-random_score_explore" in v)), - ("Random Skills", "blue", lambda df: df["EXPERIMENT_ID"].apply( - lambda v: "sticky_table_tricky_floor-random_nsrts_explore" in v)), - ], } # If True, add (0, 0) to every plot. diff --git a/tests/envs/test_ball_and_cup_sticky_table_env.py b/tests/envs/test_ball_and_cup_sticky_table_env.py new file mode 100644 index 0000000000..8c3acff10d --- /dev/null +++ b/tests/envs/test_ball_and_cup_sticky_table_env.py @@ -0,0 +1,246 @@ +"""Test cases for the Ball and Cup Sticky Table environment.""" + +import numpy as np +import pytest + +from predicators import utils +from predicators.envs.ball_and_cup_sticky_table import BallAndCupStickyTableEnv +from predicators.ground_truth_models import get_gt_nsrts, get_gt_options + + +def test_sticky_table(): + """Tests for the Ball and Cup Sticky Table environment.""" + utils.reset_config({ + "env": "ball_and_cup_sticky_table", + "num_train_tasks": 1, + "num_test_tasks": 2, + "sticky_table_place_smooth_fall_prob": 1.0, + "sticky_table_place_sticky_fall_prob": 0.0, + "sticky_table_pick_success_prob": 1.0, + "sticky_table_place_ball_fall_prob": 0.0, + }) + env = BallAndCupStickyTableEnv() + assert env.get_name() == "ball_and_cup_sticky_table" + for env_task in env.get_train_tasks(): + task = env_task.task + for obj in task.init: + assert len(obj.type.feature_names) == len(task.init[obj]) + for env_task in env.get_test_tasks(): + task = env_task.task + for obj in task.init: + assert len(obj.type.feature_names) == len(task.init[obj]) + assert len(env.types) == 4 + type_name_to_type = {t.name: t for t in env.types} + cup_type = type_name_to_type["cup"] + table_type = type_name_to_type["table"] + robot_type = type_name_to_type["robot"] + ball_type = type_name_to_type["ball"] + assert len(env.predicates) == 12 + pred_name_to_pred = {p.name: p for p in env.predicates} + BallOnTable = pred_name_to_pred["BallOnTable"] + BallOnFloor = pred_name_to_pred["BallOnFloor"] + CupOnTable = pred_name_to_pred["CupOnTable"] + CupOnFloor = pred_name_to_pred["CupOnFloor"] + assert env.goal_predicates == {BallOnTable} + assert env.action_space.shape == (5, ) + options = get_gt_options(env.get_name()) + nsrts = get_gt_nsrts(env.get_name(), env.predicates, options) + nsrt_name_to_nsrt = {n.name: n for n in nsrts} + NavigateToCup = nsrt_name_to_nsrt["NavigateToCup"] + PickCupWithoutBallFromFloor = nsrt_name_to_nsrt[ + "PickCupWithoutBallFromFloor"] + NavigateToTable = nsrt_name_to_nsrt["NavigateToTable"] + PlaceCupWithoutBallOnTable = nsrt_name_to_nsrt[ + "PlaceCupWithoutBallOnTable"] + PickCupWithoutBallFromTable = nsrt_name_to_nsrt[ + "PickCupWithoutBallFromTable"] + PlaceCupWithoutBallOnFloor = nsrt_name_to_nsrt[ + "PlaceCupWithoutBallOnFloor"] + PickBallFromTable = nsrt_name_to_nsrt["PickBallFromTable"] + PlaceBallOnTable = nsrt_name_to_nsrt["PlaceBallOnTable"] + PlaceBallOnFloor = nsrt_name_to_nsrt["PlaceBallOnFloor"] + PickBallFromFloor = nsrt_name_to_nsrt["PickBallFromFloor"] + NavigateToBall = nsrt_name_to_nsrt["NavigateToBall"] + PlaceBallInCupOnFloor = nsrt_name_to_nsrt["PlaceBallInCupOnFloor"] + PlaceBallInCupOnTable = nsrt_name_to_nsrt["PlaceBallInCupOnTable"] + PickCupWithBallFromFloor = nsrt_name_to_nsrt["PickCupWithBallFromFloor"] + # PlaceCupWithBallOnTable = nsrt_name_to_nsrt["PlaceCupWithBallOnTable"] + PlaceCupWithBallOnFloor = nsrt_name_to_nsrt["PlaceCupWithBallOnFloor"] + # PickCupWithBallFromTable = nsrt_name_to_nsrt["PickCupWithBallFromTable"] + + assert len(options) == len(nsrts) == 16 + env_train_tasks = env.get_train_tasks() + assert len(env_train_tasks) == 1 + env_test_tasks = env.get_test_tasks() + assert len(env_test_tasks) == 2 + env_task = env_test_tasks[1] + + # Test rendering. + env.reset("test", 1) + with pytest.raises(NotImplementedError): + env.render(caption="Test") + + # Extract objects for NSRT testing. + init_state = env_test_tasks[0].task.init + rng = np.random.default_rng(123) + + robot, = init_state.get_objects(robot_type) + ball, = init_state.get_objects(ball_type) + cup, = init_state.get_objects(cup_type) + tables = init_state.get_objects(table_type) + sticky_tables = [t for t in tables if init_state.get(t, "sticky") > 0.5] + assert len(sticky_tables) == 1 + sticky_table = sticky_tables[0] + normal_tables = [t for t in tables if t != sticky_table] + # The cup starts out on the floor. + assert CupOnFloor([cup]).holds(init_state) + assert not any(CupOnTable([cup, t]).holds(init_state) for t in tables) + # The ball starts out on some table. + ball_init_tables = [ + t for t in tables if BallOnTable([ball, t]).holds(init_state) + ] + assert len(ball_init_tables) == 1 + ball_init_table = ball_init_tables[0] + + # Test noise-free CUP picking and placing on the floor and normal tables. + # Also test placing the ball into the cup on the floor. + first_table = normal_tables[0] + ground_nsrt_plan = [ + NavigateToCup.ground([robot, cup]), + PickCupWithoutBallFromFloor.ground([robot, cup, ball]), + NavigateToTable.ground([robot, first_table]), + PlaceCupWithoutBallOnTable.ground([robot, ball, cup, first_table]), + ] + for table, next_table in zip(normal_tables[:-1], normal_tables[1:]): + ground_nsrt_plan.append( + PickCupWithoutBallFromTable.ground([robot, cup, ball, table])) + ground_nsrt_plan.append(NavigateToTable.ground([robot, next_table])) + ground_nsrt_plan.append( + PlaceCupWithoutBallOnTable.ground([robot, ball, cup, next_table])) + ground_nsrt_plan.append( + PickCupWithoutBallFromTable.ground( + [robot, cup, ball, normal_tables[-1]])) + ground_nsrt_plan.append( + PlaceCupWithoutBallOnFloor.ground([robot, ball, cup])) + ground_nsrt_plan.append(NavigateToTable.ground([robot, ball_init_table])) + ground_nsrt_plan.append( + PickBallFromTable.ground([robot, ball, cup, ball_init_table])) + ground_nsrt_plan.append(NavigateToCup.ground([robot, cup])) + ground_nsrt_plan.append(PlaceBallInCupOnFloor.ground([robot, ball, cup])) + state = env.reset("test", 0) + for ground_nsrt in ground_nsrt_plan: + state = utils.run_ground_nsrt_with_assertions(ground_nsrt, state, env, + rng) + + # Test noise-free BALL picking and placing on the floor and normal tables. + table_order = [ball_init_table + ] + [t for t in normal_tables if t != ball_init_table] + ground_nsrt_plan = [NavigateToTable.ground([robot, ball_init_table])] + for table, next_table in zip(table_order[:-1], table_order[1:]): + ground_nsrt_plan.append( + PickBallFromTable.ground([robot, ball, cup, table])) + ground_nsrt_plan.append(NavigateToTable.ground([robot, next_table])) + ground_nsrt_plan.append( + PlaceBallOnTable.ground([robot, ball, cup, next_table])) + ground_nsrt_plan.append( + PickBallFromTable.ground([robot, ball, cup, normal_tables[-1]])) + ground_nsrt_plan.append(PlaceBallOnFloor.ground([robot, cup, ball])) + ground_nsrt_plan.append(NavigateToBall.ground([robot, ball])) + ground_nsrt_plan.append(PickBallFromFloor.ground([robot, ball, cup])) + state = env.reset("test", 0) + for ground_nsrt in ground_nsrt_plan: + state = utils.run_ground_nsrt_with_assertions(ground_nsrt, state, env, + rng) + + # Test putting the cup on the table first and then the ball in the cup. + table = ball_init_table + ground_nsrt_plan = [ + NavigateToCup.ground([robot, cup]), + PickCupWithoutBallFromFloor.ground([robot, cup, ball]), + NavigateToTable.ground([robot, table]), + PlaceCupWithoutBallOnTable.ground([robot, ball, cup, table]), + PickBallFromTable.ground([robot, ball, cup, table]), + PlaceBallInCupOnTable.ground([robot, ball, cup, table]), + ] + state = env.reset("test", 0) + for ground_nsrt in ground_nsrt_plan: + state = utils.run_ground_nsrt_with_assertions(ground_nsrt, state, env, + rng) + + # Test putting the ball in the cup first and then going to the table, + # then placing back onto the floor. + table = ball_init_table + ground_nsrt_plan = [ + NavigateToTable.ground([robot, table]), + PickBallFromTable.ground([robot, ball, cup, table]), + NavigateToCup.ground([robot, cup]), + PlaceBallInCupOnFloor.ground([robot, ball, cup]), + PickCupWithBallFromFloor.ground([robot, cup, ball]), + NavigateToTable.ground([robot, table]), + PlaceCupWithBallOnFloor.ground([robot, ball, cup]), + ] + state = env.reset("test", 0) + for ground_nsrt in ground_nsrt_plan: + state = utils.run_ground_nsrt_with_assertions(ground_nsrt, state, env, + rng) + + # Test picking the ball from inside the cup on the floor. + table = ball_init_table + ground_nsrt_plan = [ + NavigateToTable.ground([robot, table]), + PickBallFromTable.ground([robot, ball, cup, table]), + NavigateToCup.ground([robot, cup]), + PlaceBallInCupOnFloor.ground([robot, ball, cup]), + PickBallFromFloor.ground([robot, ball, cup]), + ] + state = env.reset("test", 0) + for ground_nsrt in ground_nsrt_plan: + state = utils.run_ground_nsrt_with_assertions(ground_nsrt, state, env, + rng) + + # Test placing the ball on the sticky table, which should always fail. + utils.reset_config({ + "env": "ball_and_cup_sticky_table", + "num_train_tasks": 1, + "num_test_tasks": 2, + }) + ground_nsrt_plan = [ + NavigateToTable.ground([robot, ball_init_table]), + PickBallFromTable.ground([robot, ball, cup, ball_init_table]), + NavigateToTable.ground([robot, sticky_table]), + PlaceBallOnTable.ground([robot, ball, cup, sticky_table]), + ] + # Test 10 times, with different samples per time. + for _ in range(10): + state = env.reset("test", 0) + for i, ground_nsrt in enumerate(ground_nsrt_plan): + if i == len(ground_nsrt_plan) - 1: + state = utils.run_ground_nsrt_with_assertions( + ground_nsrt, state, env, rng, assert_effects=False) + assert BallOnFloor([ball]).holds(state) + else: + state = utils.run_ground_nsrt_with_assertions( + ground_nsrt, state, env, rng) + + # Test placing the cup without the ball on the sticky table, which should + # SOMETIMES fail. + ground_nsrt_plan = [ + NavigateToCup.ground([robot, cup]), + PickCupWithoutBallFromFloor.ground([robot, cup, ball]), + NavigateToTable.ground([robot, sticky_table]), + PlaceCupWithoutBallOnTable.ground([robot, ball, cup, sticky_table]), + ] + # Test 10 times, with different samples per time. + num_success_places = 0 + for _ in range(10): + state = env.reset("test", 0) + for i, ground_nsrt in enumerate(ground_nsrt_plan): + if i == len(ground_nsrt_plan) - 1: + state = utils.run_ground_nsrt_with_assertions( + ground_nsrt, state, env, rng, assert_effects=False) + if CupOnTable([cup, sticky_table]).holds(state): + num_success_places += 1 + else: + state = utils.run_ground_nsrt_with_assertions( + ground_nsrt, state, env, rng) + assert 0 < num_success_places < 10 diff --git a/tests/envs/test_kitchen.py b/tests/envs/test_kitchen.py index b8e46fa3da..f63fe22f77 100644 --- a/tests/envs/test_kitchen.py +++ b/tests/envs/test_kitchen.py @@ -121,24 +121,11 @@ def _run_ground_nsrt(ground_nsrt, state, override_params=None, assert_effects=True): - for atom in ground_nsrt.preconditions: - assert atom.holds(state) - option = ground_nsrt.sample_option(state, set(), rng) - if override_params is not None: - option = option.parent.ground(option.objects, override_params) - assert option.initiable(state) - for _ in range(400): - act = option.policy(state) - obs = env.step(act) - state = env.state_info_to_state(obs["state_info"]) - if option.terminal(state): - break - if assert_effects: - for atom in ground_nsrt.add_effects: - assert atom.holds(state) - for atom in ground_nsrt.delete_effects: - assert not atom.holds(state) - return state + obs_to_state = lambda obs: env.state_info_to_state(obs["state_info"]) + return utils.run_ground_nsrt_with_assertions(ground_nsrt, state, env, + rng, obs_to_state, + override_params, + assert_effects) # Set up all the NSRTs for the following tests. move_to_light_pre_on_nsrt = MoveToPreTurnOn.ground([gripper, light]) diff --git a/tests/test_utils.py b/tests/test_utils.py index 06e73cb340..cd28b7f38f 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,7 +1,7 @@ """Test cases for utils.""" import os import time -from typing import Iterator, Tuple +from typing import Iterator, Optional, Tuple from typing import Type as TypingType import matplotlib.pyplot as plt @@ -10,6 +10,7 @@ from gym.spaces import Box from predicators import utils +from predicators.envs.ball_and_cup_sticky_table import BallAndCupStickyTableEnv from predicators.envs.cover import CoverEnv, CoverMultistepOptions from predicators.envs.pddl_env import ProceduralTasksGripperPDDLEnv, \ ProceduralTasksSpannerPDDLEnv @@ -3258,3 +3259,49 @@ def test_motion_planning(): # Test that query_to_goal_fn for BiRRT raises a NotImplementedError with pytest.raises(NotImplementedError): birrt.query_to_goal_fn(0, lambda: 1, lambda x: False) + + +def test_oracle_feature_selection(): + """Test the oracle feature selection code.""" + utils.reset_config({ + "env": "ball_and_cup_sticky_table", + "active_sampler_learning_feature_selection": "oracle" + }) + env = BallAndCupStickyTableEnv() + train_tasks = [t.task for t in env.get_train_tasks()] + state = train_tasks[0].init + options = get_gt_options(env.get_name()) + PlaceCupWithoutBallOnTable: Optional[ParameterizedOption] = None + NavigateToCup: Optional[ParameterizedOption] = None + for opt in options: + if "PlaceCupWithoutBallOnTable" in opt.name: + PlaceCupWithoutBallOnTable = opt + elif "NavigateToCup" in opt.name: + NavigateToCup = opt + assert PlaceCupWithoutBallOnTable is not None + params = [0.0, 0.0, 0.0, 0.0, 0.0] + # pylint:disable=protected-access + cup = state.get_objects(env._cup_type)[0] + robot = state.get_objects(env._robot_type)[0] + ball = state.get_objects(env._ball_type)[0] + table = state.get_objects(env._table_type)[0] + # Construct input for special place skill and test that it has only + # 10 features. + sampler_input = utils.construct_active_sampler_input( + state, [cup, robot, ball, table], params, PlaceCupWithoutBallOnTable) + assert len(sampler_input) == 10 + # Construct input for navigation skill and test that it has 12 + # features. + sampler_input = utils.construct_active_sampler_input( + state, [robot, cup], params, NavigateToCup) + assert len(sampler_input) == 12 + # Try a non-existent feature selection method and test that an + # error is raised. + utils.reset_config({ + "env": "not-a-real-env", + "active_sampler_learning_feature_selection": "oracle" + }) + with pytest.raises(NotImplementedError) as e: + utils.construct_active_sampler_input(state, [robot, cup], params, + NavigateToCup) + assert "Oracle feature selection" in str(e) From 00c7860fecc91b89a50a1b34111c0444bf78e8fb Mon Sep 17 00:00:00 2001 From: Tom Silver Date: Mon, 30 Oct 2023 10:07:44 -0400 Subject: [PATCH 8/8] allow third party users to define their own oracle NSRTs (#1578) * allow third party users to define their own oracle NSRTs * test fixes * mypy --- .../approaches/bilevel_planning_approach.py | 7 +- .../approaches/bridge_policy_approach.py | 19 ++-- predicators/approaches/oracle_approach.py | 23 +++-- predicators/option_model.py | 21 +++-- tests/approaches/test_oracle_approach.py | 89 +++++++++++++++++++ .../explorers/test_active_sampler_explorer.py | 12 +-- tests/explorers/test_base_explorer.py | 6 +- .../test_exploit_bilevel_planning_explorer.py | 6 +- tests/explorers/test_glib_explorer.py | 12 +-- .../test_greedy_lookahead_explorer.py | 12 +-- tests/test_option_model.py | 14 +-- tests/test_planning.py | 8 +- 12 files changed, 169 insertions(+), 60 deletions(-) diff --git a/predicators/approaches/bilevel_planning_approach.py b/predicators/approaches/bilevel_planning_approach.py index 66b701bf31..41d596a591 100644 --- a/predicators/approaches/bilevel_planning_approach.py +++ b/predicators/approaches/bilevel_planning_approach.py @@ -31,7 +31,8 @@ def __init__(self, train_tasks: List[Task], task_planning_heuristic: str = "default", max_skeletons_optimized: int = -1, - bilevel_plan_without_sim: Optional[bool] = None) -> None: + bilevel_plan_without_sim: Optional[bool] = None, + option_model: Optional[_OptionModelBase] = None) -> None: super().__init__(initial_predicates, initial_options, types, action_space, train_tasks) if task_planning_heuristic == "default": @@ -43,7 +44,9 @@ def __init__(self, self._task_planning_heuristic = task_planning_heuristic self._max_skeletons_optimized = max_skeletons_optimized self._plan_without_sim = bilevel_plan_without_sim - self._option_model = create_option_model(CFG.option_model_name) + if option_model is None: + option_model = create_option_model(CFG.option_model_name) + self._option_model = option_model self._num_calls = 0 self._last_plan: List[_Option] = [] # used if plan WITH sim self._last_nsrt_plan: List[_GroundNSRT] = [] # plan WITHOUT sim diff --git a/predicators/approaches/bridge_policy_approach.py b/predicators/approaches/bridge_policy_approach.py index d257bbddb3..46810f9110 100644 --- a/predicators/approaches/bridge_policy_approach.py +++ b/predicators/approaches/bridge_policy_approach.py @@ -51,8 +51,9 @@ from predicators.approaches.oracle_approach import OracleApproach from predicators.bridge_policies import BridgePolicyDone, create_bridge_policy from predicators.nsrt_learning.segmentation import segment_trajectory +from predicators.option_model import _OptionModelBase from predicators.settings import CFG -from predicators.structs import Action, BridgeDataset, DefaultState, \ +from predicators.structs import NSRT, Action, BridgeDataset, DefaultState, \ DemonstrationQuery, DemonstrationResponse, InteractionRequest, \ InteractionResult, ParameterizedOption, Predicate, Query, State, Task, \ Type, _Option @@ -69,10 +70,18 @@ def __init__(self, action_space: Box, train_tasks: List[Task], task_planning_heuristic: str = "default", - max_skeletons_optimized: int = -1) -> None: - super().__init__(initial_predicates, initial_options, types, - action_space, train_tasks, task_planning_heuristic, - max_skeletons_optimized) + max_skeletons_optimized: int = -1, + nsrts: Optional[Set[NSRT]] = None, + option_model: Optional[_OptionModelBase] = None) -> None: + super().__init__(initial_predicates, + initial_options, + types, + action_space, + train_tasks, + task_planning_heuristic, + max_skeletons_optimized, + nsrts=nsrts, + option_model=option_model) predicates = self._get_current_predicates() options = initial_options nsrts = self._get_current_nsrts() diff --git a/predicators/approaches/oracle_approach.py b/predicators/approaches/oracle_approach.py index c68f4faab5..ec2dc2907b 100644 --- a/predicators/approaches/oracle_approach.py +++ b/predicators/approaches/oracle_approach.py @@ -13,6 +13,7 @@ from predicators.approaches.bilevel_planning_approach import \ BilevelPlanningApproach from predicators.ground_truth_models import get_gt_nsrts +from predicators.option_model import _OptionModelBase from predicators.settings import CFG from predicators.structs import NSRT, ParameterizedOption, Predicate, Task, \ Type @@ -29,12 +30,22 @@ def __init__(self, train_tasks: List[Task], task_planning_heuristic: str = "default", max_skeletons_optimized: int = -1, - bilevel_plan_without_sim: Optional[bool] = None) -> None: - super().__init__(initial_predicates, initial_options, types, - action_space, train_tasks, task_planning_heuristic, - max_skeletons_optimized, bilevel_plan_without_sim) - self._nsrts = get_gt_nsrts(CFG.env, self._initial_predicates, - self._initial_options) + bilevel_plan_without_sim: Optional[bool] = None, + nsrts: Optional[Set[NSRT]] = None, + option_model: Optional[_OptionModelBase] = None) -> None: + super().__init__(initial_predicates, + initial_options, + types, + action_space, + train_tasks, + task_planning_heuristic, + max_skeletons_optimized, + bilevel_plan_without_sim, + option_model=option_model) + if nsrts is None: + nsrts = get_gt_nsrts(CFG.env, self._initial_predicates, + self._initial_options) + self._nsrts = nsrts @classmethod def get_name(cls) -> str: diff --git a/predicators/option_model.py b/predicators/option_model.py index 5e79f0d3d9..45d7ed244a 100644 --- a/predicators/option_model.py +++ b/predicators/option_model.py @@ -7,15 +7,16 @@ from __future__ import annotations import abc -from typing import Tuple +from typing import Callable, Set, Tuple import numpy as np from predicators import utils -from predicators.envs import BaseEnv, create_new_env +from predicators.envs import create_new_env from predicators.ground_truth_models import get_gt_options from predicators.settings import CFG -from predicators.structs import DefaultState, State, _Option +from predicators.structs import Action, DefaultState, ParameterizedOption, \ + State, _Option def create_option_model(name: str) -> _OptionModelBase: @@ -24,13 +25,15 @@ def create_option_model(name: str) -> _OptionModelBase: env = create_new_env(CFG.env, do_cache=False, use_gui=CFG.option_model_use_gui) - return _OracleOptionModel(env) + options = get_gt_options(env.get_name()) + return _OracleOptionModel(options, env.simulate) if name.startswith("oracle"): env_name = name[name.index("_") + 1:] env = create_new_env(env_name, do_cache=False, use_gui=CFG.option_model_use_gui) - return _OracleOptionModel(env) + options = get_gt_options(env.get_name()) + return _OracleOptionModel(options, env.simulate) raise NotImplementedError(f"Unknown option model: {name}") @@ -55,11 +58,11 @@ class _OracleOptionModel(_OptionModelBase): Runs options through this simulator to figure out the next state. """ - def __init__(self, env: BaseEnv) -> None: + def __init__(self, options: Set[ParameterizedOption], + simulator: Callable[[State, Action], State]) -> None: super().__init__() - gt_options = get_gt_options(env.get_name()) - self._name_to_parameterized_option = {o.name: o for o in gt_options} - self._simulator = env.simulate + self._name_to_parameterized_option = {o.name: o for o in options} + self._simulator = simulator def get_next_state_and_num_actions(self, state: State, option: _Option) -> Tuple[State, int]: diff --git a/tests/approaches/test_oracle_approach.py b/tests/approaches/test_oracle_approach.py index dccbbfcf69..13c8ec3454 100644 --- a/tests/approaches/test_oracle_approach.py +++ b/tests/approaches/test_oracle_approach.py @@ -4,6 +4,7 @@ import numpy as np import pytest +from gym.spaces import Box import predicators.envs.pddl_env from predicators import utils @@ -718,3 +719,91 @@ def test_playroom_get_gt_nsrts(): state, train_task.goal, rng) movedoortodoor_action = movedoortodoor_option.policy(state) assert env.action_space.contains(movedoortodoor_action.arr) + + +def test_external_oracle_approach(): + """Test that it's possible for an external user of predicators to define + their own environment and NSRTs and use the oracle approach.""" + + utils.reset_config({"num_train_tasks": 2, "num_test_tasks": 2}) + + class _ExternalBlocksEnv(BlocksEnv): + """To make sure that the test doesn't pass without using the new NSRTs, + reverse the action space.""" + + @classmethod + def get_name(cls) -> str: + return "external_blocks" + + @property + def action_space(self) -> Box: + original_space = super().action_space + return Box(original_space.low[::-1], + original_space.high[::-1], + dtype=np.float32) + + def simulate(self, state, action): + # Need to rewrite these lines here to avoid assertion in simulate + # that uses action_space. + x, y, z, fingers = action.arr[::-1] + # Infer which transition function to follow + if fingers < 0.5: + return self._transition_pick(state, x, y, z) + if z < self.table_height + self._block_size: + return self._transition_putontable(state, x, y, z) + return self._transition_stack(state, x, y, z) + + env = _ExternalBlocksEnv() + + # Create external options by modifying blocks options. + options = set() + old_option_to_new_option = {} + + def _reverse_policy(original_policy): + + def new_policy(state, memory, objects, params): + action = original_policy(state, memory, objects, params) + return Action(action.arr[::-1]) + + return new_policy + + original_options = get_gt_options("blocks") + for option in original_options: + new_policy = _reverse_policy(option.policy) + new_option = ParameterizedOption(f"external_{option.name}", + option.types, option.params_space, + new_policy, option.initiable, + option.terminal) + options.add(new_option) + old_option_to_new_option[option] = new_option + + # Create the option model. + option_model = _OracleOptionModel(options, env.simulate) + + # Create external NSRTs by just modifying blocks NSRTs. + nsrts = set() + for nsrt in get_gt_nsrts("blocks", env.predicates, original_options): + nsrt_option = old_option_to_new_option[nsrt.option] + sampler = nsrt._sampler # pylint: disable=protected-access + new_nsrt = NSRT(f"external_{nsrt.name}", nsrt.parameters, + nsrt.preconditions, nsrt.add_effects, + nsrt.delete_effects, nsrt.ignore_effects, nsrt_option, + nsrt.option_vars, sampler) + nsrts.add(new_nsrt) + + # Create oracle approach. + train_tasks = [t.task for t in env.get_train_tasks()] + approach = OracleApproach(env.predicates, + options, + env.types, + env.action_space, + train_tasks, + nsrts=nsrts, + option_model=option_model) + + # Get a policy for the first task. + task = train_tasks[0] + policy = approach.solve(task, timeout=500) + + # Verify the policy. + assert _policy_solves_task(policy, task, env.simulate) diff --git a/tests/explorers/test_active_sampler_explorer.py b/tests/explorers/test_active_sampler_explorer.py index a53f16aa29..91d20f7ec4 100644 --- a/tests/explorers/test_active_sampler_explorer.py +++ b/tests/explorers/test_active_sampler_explorer.py @@ -25,9 +25,9 @@ def test_active_sampler_explorer(): "sampler_learner": "oracle", }) env = RegionalBumpyCoverEnv() - nsrts = get_gt_nsrts(env.get_name(), env.predicates, - get_gt_options(env.get_name())) - option_model = _OracleOptionModel(env) + options = get_gt_options(env.get_name()) + nsrts = get_gt_nsrts(env.get_name(), env.predicates, options) + option_model = _OracleOptionModel(options, env.simulate) train_tasks = [t.task for t in env.get_train_tasks()] ground_op_hist = {} competence_models = {} @@ -168,9 +168,9 @@ def test_active_sampler_explorer(): "active_sampler_explore_task_strategy": "success_rate", }) env = RegionalBumpyCoverEnv() - nsrts = get_gt_nsrts(env.get_name(), env.predicates, - get_gt_options(env.get_name())) - option_model = _OracleOptionModel(env) + options = get_gt_options(env.get_name()) + nsrts = get_gt_nsrts(env.get_name(), env.predicates, options) + option_model = _OracleOptionModel(options, env.simulate) train_tasks = [t.task for t in env.get_train_tasks()] ground_op_hist = {} competence_models = {} diff --git a/tests/explorers/test_base_explorer.py b/tests/explorers/test_base_explorer.py index f627c68adf..305b69ffa8 100644 --- a/tests/explorers/test_base_explorer.py +++ b/tests/explorers/test_base_explorer.py @@ -12,9 +12,9 @@ def test_create_explorer(): """Tests for create_explorer.""" utils.reset_config({"env": "cover"}) env = CoverEnv() - nsrts = get_gt_nsrts(env.get_name(), env.predicates, - get_gt_options(env.get_name())) - option_model = _OracleOptionModel(env) + options = get_gt_options(env.get_name()) + nsrts = get_gt_nsrts(env.get_name(), env.predicates, options) + option_model = _OracleOptionModel(options, env.simulate) train_tasks = [t.task for t in env.get_train_tasks()] # Greedy lookahead explorer. state_score_fn = lambda _1, _2: 0.0 diff --git a/tests/explorers/test_exploit_bilevel_planning_explorer.py b/tests/explorers/test_exploit_bilevel_planning_explorer.py index f91b0e3d80..234cf1314c 100644 --- a/tests/explorers/test_exploit_bilevel_planning_explorer.py +++ b/tests/explorers/test_exploit_bilevel_planning_explorer.py @@ -15,9 +15,9 @@ def test_exploit_bilevel_planning_explorer(): "explorer": "exploit_planning", }) env = CoverEnv() - nsrts = get_gt_nsrts(env.get_name(), env.predicates, - get_gt_options(env.get_name())) - option_model = _OracleOptionModel(env) + options = get_gt_options(env.get_name()) + nsrts = get_gt_nsrts(env.get_name(), env.predicates, options) + option_model = _OracleOptionModel(options, env.simulate) train_tasks = [t.task for t in env.get_train_tasks()] explorer = create_explorer("exploit_planning", env.predicates, get_gt_options(env.get_name()), env.types, diff --git a/tests/explorers/test_glib_explorer.py b/tests/explorers/test_glib_explorer.py index 01affe6fc8..89a70d5072 100644 --- a/tests/explorers/test_glib_explorer.py +++ b/tests/explorers/test_glib_explorer.py @@ -17,9 +17,9 @@ def test_glib_explorer(target_predicate): "cover_initial_holding_prob": 0.0, }) env = CoverEnv() - nsrts = get_gt_nsrts(env.get_name(), env.predicates, - get_gt_options(env.get_name())) - option_model = _OracleOptionModel(env) + options = get_gt_options(env.get_name()) + nsrts = get_gt_nsrts(env.get_name(), env.predicates, options) + option_model = _OracleOptionModel(options, env.simulate) train_tasks = [t.task for t in env.get_train_tasks()] # For testing purposes, score everything except target predicate low. score_fn = lambda atoms: target_predicate in str(atoms) @@ -85,9 +85,9 @@ def test_glib_explorer_failure_cases(): "explorer": "glib", }) env = CoverEnv() - nsrts = get_gt_nsrts(env.get_name(), env.predicates, - get_gt_options(env.get_name())) - option_model = _OracleOptionModel(env) + options = get_gt_options(env.get_name()) + nsrts = get_gt_nsrts(env.get_name(), env.predicates, options) + option_model = _OracleOptionModel(options, env.simulate) train_tasks = [t.task for t in env.get_train_tasks()] score_fn = lambda _: 0.0 task_idx = 0 diff --git a/tests/explorers/test_greedy_lookahead_explorer.py b/tests/explorers/test_greedy_lookahead_explorer.py index 387234ce20..2c0340b9ee 100644 --- a/tests/explorers/test_greedy_lookahead_explorer.py +++ b/tests/explorers/test_greedy_lookahead_explorer.py @@ -18,9 +18,9 @@ def test_greedy_lookahead_explorer(target_predicate): "cover_initial_holding_prob": 0.0, }) env = CoverEnv() - nsrts = get_gt_nsrts(env.get_name(), env.predicates, - get_gt_options(env.get_name())) - option_model = _OracleOptionModel(env) + options = get_gt_options(env.get_name()) + nsrts = get_gt_nsrts(env.get_name(), env.predicates, options) + option_model = _OracleOptionModel(options, env.simulate) train_tasks = [t.task for t in env.get_train_tasks()] # For testing purposes, score everything except target predicate low. score_fn = lambda atoms, _: target_predicate in str(atoms) @@ -63,9 +63,9 @@ def test_greedy_lookahead_explorer_failure_cases(): "explorer": "greedy_lookahead", }) env = CoverEnv() - nsrts = get_gt_nsrts(env.get_name(), env.predicates, - get_gt_options(env.get_name())) - option_model = _OracleOptionModel(env) + options = get_gt_options(env.get_name()) + nsrts = get_gt_nsrts(env.get_name(), env.predicates, options) + option_model = _OracleOptionModel(options, env.simulate) train_tasks = [t.task for t in env.get_train_tasks()] state_score_fn = lambda _1, _2: 0.0 task_idx = 0 diff --git a/tests/test_option_model.py b/tests/test_option_model.py index c8e168571c..60a436f4d8 100644 --- a/tests/test_option_model.py +++ b/tests/test_option_model.py @@ -53,9 +53,9 @@ def simulate(state, action): class _MockOracleOptionModel(_OracleOptionModel): - def __init__(self, env) -> None: # pylint: disable=super-init-not-called + def __init__(self, simulator) -> None: # pylint: disable=super-init-not-called self._name_to_parameterized_option = {"Pick": parameterized_option} - self._simulator = env.simulate + self._simulator = simulator # Mock option. parameterized_option = ParameterizedOption("Pick", [], params_space, @@ -76,7 +76,7 @@ def __init__(self, env) -> None: # pylint: disable=super-init-not-called obj4: [8, 9, 10], obj9: [11, 12, 13] }) - model = _MockOracleOptionModel(env) + model = _MockOracleOptionModel(env.simulate) next_state, num_act = model.get_next_state_and_num_actions(state, option1) assert num_act == 5 # Test that the option's memory has not been updated. @@ -143,15 +143,15 @@ def simulate(state, action): class _MockOracleOptionModel(_OracleOptionModel): - def __init__(self, env) -> None: # pylint: disable=super-init-not-called + def __init__(self, simulator) -> None: # pylint: disable=super-init-not-called self._name_to_parameterized_option = { "InfiniteLearnedOption": infinite_param_opt } - self._simulator = env.simulate + self._simulator = simulator infinite_option = infinite_param_opt.ground([], params1) env = _NoopMockEnv() - model = _MockOracleOptionModel(env) + model = _MockOracleOptionModel(env.simulate) next_state, num_act = model.get_next_state_and_num_actions( state, infinite_option) assert next_state.allclose(state) @@ -163,7 +163,7 @@ def __init__(self, env) -> None: # pylint: disable=super-init-not-called "max_num_steps_option_rollout": 5, }) - model = _MockOracleOptionModel(env) + model = _MockOracleOptionModel(env.simulate) _, num_act = model.get_next_state_and_num_actions(state, infinite_option) assert num_act == 5 diff --git a/tests/test_planning.py b/tests/test_planning.py index f3a4a3508f..3262d18d98 100644 --- a/tests/test_planning.py +++ b/tests/test_planning.py @@ -410,14 +410,8 @@ def simulate(state, action): next_state[robby][1] = 1 return next_state - class _MockOracleOptionModel(_OracleOptionModel): - - def __init__(self, env) -> None: # pylint: disable=super-init-not-called - self._name_to_parameterized_option = {o.name: o for o in options} - self._simulator = env.simulate - env = _MockEnv() - option_model = _MockOracleOptionModel(env) + option_model = _OracleOptionModel(options, env.simulate) # Check that sesame_plan is deterministic, over both NSRTs and objects. plan1 = [ (act.name, act.objects) for act in sesame_plan(