Ball and Cup Sticky Table Env (#1576)

* initial commit that seems to run without error... * fix bug in placing logic * delete outdated comment * fix replanning bug * more data = better results??? * starting tests * try oracle feature selection? * fix buggy test * increase training time? * yapf + fix tom comment * fix reachability issue in placing * minor * more unit tests * fix and more tests * this should be interesting * see if this yields a difference * let's see what happens now * woops * try removing placing cup with the ball on the table * hail mary * minor changes + logging * run task repeat first * sticky table with moving radius * yay! try other approaches... * polar coordinates ftw! * try a simpler thing * let's see how this does. * try more probability of success * all baselines * try running grid row env * most things passing * try this * progress towards PR * should be ready! * revert unnecessary change * fix linting * tom comments --------- Co-authored-by: Tom Silver <[email protected]>
Learning-and-Intelligent-Systems · Oct 27, 2023 · 3090591 · 3090591
1 parent db29a6c
commit 3090591
Show file tree

Hide file tree

Showing 12 changed files with 1,740 additions and 61 deletions.
diff --git a/predicators/envs/ball_and_cup_sticky_table.py b/predicators/envs/ball_and_cup_sticky_table.py
diff --git a/predicators/explorers/active_sampler_explorer.py b/predicators/explorers/active_sampler_explorer.py
@@ -213,7 +213,11 @@ def generate_goals() -> Iterator[Set[GroundAtom]]:
                 for goal in generate_goals():
                     task = Task(state, goal)
                     logging.info(f"[Explorer] Replanning to {task.goal}")
-
+                    # If the goal is empty, then we can just recursively
+                    # call the policy, since we don't need to execute
+                    # anything.
+                    if len(goal) == 0:
+                        return _option_policy(state)  # pragma: no cover
                     # Add this task to the re-planning task queue.
                     self._replanning_tasks.append(task)
 

diff --git a/predicators/ground_truth_models/ball_and_cup_sticky_table/__init__.py b/predicators/ground_truth_models/ball_and_cup_sticky_table/__init__.py
@@ -0,0 +1,9 @@
+"""Ground-truth models for ball and cup sticky table environment."""
+
+from .nsrts import BallAndCupStickyTableGroundTruthNSRTFactory
+from .options import BallAndCupStickyTableGroundTruthOptionFactory
+
+__all__ = [
+    "BallAndCupStickyTableGroundTruthNSRTFactory",
+    "BallAndCupStickyTableGroundTruthOptionFactory"
+]
diff --git a/predicators/ground_truth_models/ball_and_cup_sticky_table/nsrts.py b/predicators/ground_truth_models/ball_and_cup_sticky_table/nsrts.py
diff --git a/predicators/ground_truth_models/ball_and_cup_sticky_table/options.py b/predicators/ground_truth_models/ball_and_cup_sticky_table/options.py
@@ -0,0 +1,198 @@
+"""Ground-truth options for the ball and cup sticky table environment."""
+
+from typing import Dict, Sequence, Set
+
+import numpy as np
+from gym.spaces import Box
+
+from predicators import utils
+from predicators.envs.ball_and_cup_sticky_table import BallAndCupStickyTableEnv
+from predicators.ground_truth_models import GroundTruthOptionFactory
+from predicators.structs import Action, Array, Object, ParameterizedOption, \
+    ParameterizedPolicy, Predicate, State, Type
+
+
+class BallAndCupStickyTableGroundTruthOptionFactory(GroundTruthOptionFactory):
+    """Ground-truth options for the sticky table environment."""
+
+    @classmethod
+    def get_env_names(cls) -> Set[str]:
+        return {"ball_and_cup_sticky_table"}
+
+    @classmethod
+    def get_options(cls, env_name: str, types: Dict[str, Type],
+                    predicates: Dict[str, Predicate],
+                    action_space: Box) -> Set[ParameterizedOption]:
+
+        cup_type = types["cup"]
+        ball_type = types["ball"]
+        table_type = types["table"]
+        # Parameters are move_or_pickplace, obj_type_id, ball_only,
+        # absolute x, y actions.
+        params_space = Box(
+            np.array([
+                0.0, 0.0, 0.0, BallAndCupStickyTableEnv.x_lb,
+                BallAndCupStickyTableEnv.y_lb
+            ]),
+            np.array([
+                1.0, 3.0, 1.0, BallAndCupStickyTableEnv.x_ub,
+                BallAndCupStickyTableEnv.y_ub
+            ]))
+        robot_type = types["robot"]
+
+        PickBallFromTable = utils.SingletonParameterizedOption(
+            # variables: [robot, ball, table]
+            "PickBallFromTable",
+            cls._create_pass_through_policy(action_space),
+            params_space=params_space,
+            types=[robot_type, ball_type, cup_type, table_type])
+
+        PickBallFromFloor = utils.SingletonParameterizedOption(
+            # variables: [robot, ball]
+            "PickBallFromFloor",
+            cls._create_pass_through_policy(action_space),
+            # Parameters are absolute x, y actions.
+            params_space=params_space,
+            types=[robot_type, ball_type, cup_type])
+
+        PlaceBallOnTable = utils.SingletonParameterizedOption(
+            # variables: [robot, ball, cup, table]
+            "PlaceBallOnTable",
+            cls._create_pass_through_policy(action_space),
+            # Parameters are absolute x, y actions.
+            params_space=params_space,
+            types=[robot_type, ball_type, cup_type, table_type])
+
+        PlaceBallOnFloor = utils.SingletonParameterizedOption(
+            # variables: [robot, cup, ball]
+            "PlaceBallOnFloor",
+            cls._create_pass_through_policy(action_space),
+            # Parameters are absolute x, y actions.
+            params_space=params_space,
+            types=[robot_type, cup_type, ball_type])
+
+        PickCupWithoutBallFromTable = utils.SingletonParameterizedOption(
+            # variables: [robot, cup, ball, table]
+            "PickCupWithoutBallFromTable",
+            cls._create_pass_through_policy(action_space),
+            params_space=params_space,
+            types=[robot_type, cup_type, ball_type, table_type])
+
+        PickCupWithBallFromTable = utils.SingletonParameterizedOption(
+            # variables: [robot, cup, ball, table]
+            "PickCupWithBallFromTable",
+            cls._create_pass_through_policy(action_space),
+            params_space=params_space,
+            types=[robot_type, cup_type, ball_type, table_type])
+
+        PickCupWithoutBallFromFloor = utils.SingletonParameterizedOption(
+            # variables: [robot, cup, ball]
+            "PickCupWithoutBallFromFloor",
+            cls._create_pass_through_policy(action_space),
+            # Parameters are absolute x, y actions.
+            params_space=params_space,
+            types=[robot_type, cup_type, ball_type])
+
+        PickCupWithBallFromFloor = utils.SingletonParameterizedOption(
+            # variables: [robot, cup, ball]
+            "PickCupWithBallFromFloor",
+            cls._create_pass_through_policy(action_space),
+            # Parameters are absolute x, y actions.
+            params_space=params_space,
+            types=[robot_type, cup_type, ball_type])
+
+        PlaceCupWithoutBallOnTable = utils.SingletonParameterizedOption(
+            # variables: [robot, ball, cup, table]
+            "PlaceCupWithoutBallOnTable",
+            cls._create_pass_through_policy(action_space),
+            # Parameters are absolute x, y actions.
+            params_space=params_space,
+            types=[robot_type, ball_type, cup_type, table_type])
+
+        PlaceCupWithBallOnFloor = utils.SingletonParameterizedOption(
+            # variables: [robot, ball, cup]
+            "PlaceCupWithBallOnFloor",
+            cls._create_pass_through_policy(action_space),
+            # Parameters are absolute x, y actions.
+            params_space=params_space,
+            types=[robot_type, ball_type, cup_type])
+
+        PlaceCupWithoutBallOnFloor = utils.SingletonParameterizedOption(
+            # variables: [robot, ball, cup]
+            "PlaceCupWithoutBallOnFloor",
+            cls._create_pass_through_policy(action_space),
+            # Parameters are absolute x, y actions.
+            params_space=params_space,
+            types=[robot_type, ball_type, cup_type])
+
+        PlaceBallInCupOnFloor = utils.SingletonParameterizedOption(
+            # variables: [robot, ball, cup]
+            "PlaceBallInCupOnFloor",
+            cls._create_pass_through_policy(action_space),
+            # Parameters are absolute x, y actions.
+            params_space=params_space,
+            types=[robot_type, ball_type, cup_type])
+
+        PlaceBallInCupOnTable = utils.SingletonParameterizedOption(
+            # variables: [robot, ball, cup]
+            "PlaceBallInCupOnTable",
+            cls._create_pass_through_policy(action_space),
+            # Parameters are absolute x, y actions.
+            params_space=params_space,
+            types=[robot_type, ball_type, cup_type, table_type])
+
+        NavigateToTable = utils.SingletonParameterizedOption(
+            # variables: [robot, table]
+            "NavigateToTable",
+            cls._create_pass_through_policy(action_space),
+            # Parameters are absolute x, y actions.
+            params_space=params_space,
+            types=[robot_type, table_type])
+
+        NavigateToBall = utils.SingletonParameterizedOption(
+            # variables: [robot, ball]
+            "NavigateToBall",
+            cls._create_pass_through_policy(action_space),
+            # Parameters are absolute x, y actions.
+            params_space=params_space,
+            types=[robot_type, ball_type])
+
+        NavigateToCup = utils.SingletonParameterizedOption(
+            # variables: [robot, cup]
+            "NavigateToCup",
+            cls._create_pass_through_policy(action_space),
+            # Parameters are absolute x, y actions.
+            params_space=params_space,
+            types=[robot_type, cup_type])
+
+        return {
+            NavigateToTable,
+            PickBallFromTable,
+            PickBallFromFloor,
+            PlaceBallOnTable,
+            PlaceBallOnFloor,
+            PickCupWithoutBallFromTable,
+            PickCupWithBallFromTable,
+            PickCupWithoutBallFromFloor,
+            PickCupWithBallFromFloor,  #PlaceCupWithBallOnTable,
+            PlaceCupWithoutBallOnTable,
+            PlaceCupWithBallOnFloor,
+            PlaceCupWithoutBallOnFloor,
+            PlaceBallInCupOnFloor,
+            PlaceBallInCupOnTable,
+            NavigateToBall,
+            NavigateToCup
+        }
+
+    @classmethod
+    def _create_pass_through_policy(cls,
+                                    action_space: Box) -> ParameterizedPolicy:
+
+        def policy(state: State, memory: Dict, objects: Sequence[Object],
+                   params: Array) -> Action:
+            del state, memory, objects  # unused
+            arr = np.array(params, dtype=np.float32)
+            arr = np.clip(arr, action_space.low, action_space.high)
+            return Action(arr)
+
+        return policy
diff --git a/predicators/settings.py b/predicators/settings.py
@@ -328,6 +328,12 @@ class GlobalSettings:
     sticky_table_place_sticky_fall_prob = 0.05
     sticky_table_pick_success_prob = 0.9
     sticky_table_tricky_floor_place_sticky_fall_prob = 0.5
+    sticky_table_num_tables = 5  # cannot be less than 3
+    sticky_table_place_smooth_fall_prob = 0.6
+    sticky_table_place_sticky_fall_prob = 0.00
+    sticky_table_place_ball_fall_prob = 1.00
+    sticky_table_pick_success_prob = 1.00
+    sticky_table_num_sticky_tables = 1  # must be less than the num_tables
 
     # grid row env parameters
     grid_row_num_cells = 100

diff --git a/predicators/utils.py b/predicators/utils.py
@@ -278,27 +278,54 @@ def construct_active_sampler_input(state: State, objects: Sequence[Object],
 
     else:
         assert CFG.active_sampler_learning_feature_selection == "oracle"
-        assert CFG.env == "bumpy_cover"
-        if param_option.name == "Pick":
-            # In this case, the x-data should be
-            # [block_bumpy, relative_pick_loc]
-            assert len(objects) == 1
-            block = objects[0]
-            block_pos = state[block][3]
-            block_bumpy = state[block][5]
-            sampler_input_lst.append(block_bumpy)
-            assert len(params) == 1
-            sampler_input_lst.append(params[0] - block_pos)
+        if CFG.env == "bumpy_cover":
+            if param_option.name == "Pick":
+                # In this case, the x-data should be
+                # [block_bumpy, relative_pick_loc]
+                assert len(objects) == 1
+                block = objects[0]
+                block_pos = state[block][3]
+                block_bumpy = state[block][5]
+                sampler_input_lst.append(block_bumpy)
+                assert len(params) == 1
+                sampler_input_lst.append(params[0] - block_pos)
+            else:
+                assert param_option.name == "Place"
+                assert len(objects) == 2
+                block, target = objects
+                target_pos = state[target][3]
+                grasp = state[block][4]
+                target_width = state[target][2]
+                sampler_input_lst.extend([grasp, target_width])
+                assert len(params) == 1
+                sampler_input_lst.append(params[0] - target_pos)
+        elif CFG.env == "ball_and_cup_sticky_table":
+            if "PlaceCup" in param_option.name and "Table" in param_option.name:
+                _, _, _, table = objects
+                table_y = state.get(table, "y")
+                table_x = state.get(table, "x")
+                sticky = state.get(table, "sticky")
+                sticky_region_x = state.get(table, "sticky_region_x_offset")
+                sticky_region_y = state.get(table, "sticky_region_y_offset")
+                sticky_region_radius = state.get(table, "sticky_region_radius")
+                table_radius = state.get(table, "radius")
+                _, _, _, param_x, param_y = params
+                sampler_input_lst.append(table_radius)
+                sampler_input_lst.append(sticky)
+                sampler_input_lst.append(sticky_region_x)
+                sampler_input_lst.append(sticky_region_y)
+                sampler_input_lst.append(sticky_region_radius)
+                sampler_input_lst.append(table_x)
+                sampler_input_lst.append(table_y)
+                sampler_input_lst.append(param_x)
+                sampler_input_lst.append(param_y)
+            else:  # Use all features.
+                for obj in objects:
+                    sampler_input_lst.extend(state[obj])
+                sampler_input_lst.extend(params)
         else:
-            assert param_option.name == "Place"
-            assert len(objects) == 2
-            block, target = objects
-            target_pos = state[target][3]
-            grasp = state[block][4]
-            target_width = state[target][2]
-            sampler_input_lst.extend([grasp, target_width])
-            assert len(params) == 1
-            sampler_input_lst.append(params[0] - target_pos)
+            raise NotImplementedError("Oracle feature selection not "
+                                      f"implemented for {CFG.env}")
 
     return np.array(sampler_input_lst)
 
@@ -362,6 +389,12 @@ def plot(self, ax: plt.Axes, **kwargs: Any) -> None:
     def contains_point(self, x: float, y: float) -> bool:
         return (x - self.x)**2 + (y - self.y)**2 <= self.radius**2
 
+    def contains_circle(self, other_circle: Circle) -> bool:
+        """Check whether this circle wholly contains another one."""
+        dist_between_centers = np.sqrt((other_circle.x - self.x)**2 +
+                                       (other_circle.y - self.y)**2)
+        return (dist_between_centers + other_circle.radius) <= self.radius
+
 
 @dataclass(frozen=True)
 class Triangle(_Geom2D):
@@ -3452,3 +3485,48 @@ def beta_from_mean_and_variance(mean: float,
     rv = BetaRV(alpha, beta)
     assert abs(rv.mean() - mean) < 1e-6
     return rv
+
+
+def _obs_to_state_pass_through(obs: Observation) -> State:
+    """Helper for run_ground_nsrt_with_assertions."""
+    assert isinstance(obs, State)
+    return obs
+
+
+def run_ground_nsrt_with_assertions(ground_nsrt: _GroundNSRT,
+                                    state: State,
+                                    env: BaseEnv,
+                                    rng: np.random.Generator,
+                                    override_params: Optional[Array] = None,
+                                    obs_to_state: Callable[
+                                        [Observation],
+                                        State] = _obs_to_state_pass_through,
+                                    assert_effects: bool = True,
+                                    max_steps: int = 400) -> State:
+    """Utility for tests.
+
+    NOTE: assumes that the internal state of env corresponds to state.
+    """
+    ground_nsrt_str = f"{ground_nsrt.name}{ground_nsrt.objects}"
+    for atom in ground_nsrt.preconditions:
+        assert atom.holds(state), \
+            f"Precondition for {ground_nsrt_str} failed: {atom}"
+    option = ground_nsrt.sample_option(state, set(), rng)
+    if override_params is not None:
+        option = option.parent.ground(option.objects,
+                                      override_params)  # pragma: no cover
+    assert option.initiable(state)
+    for _ in range(max_steps):
+        act = option.policy(state)
+        obs = env.step(act)
+        state = obs_to_state(obs)
+        if option.terminal(state):
+            break
+    if assert_effects:
+        for atom in ground_nsrt.add_effects:
+            assert atom.holds(state), \
+                f"Add effect for {ground_nsrt_str} failed: {atom}"
+        for atom in ground_nsrt.delete_effects:
+            assert not atom.holds(state), \
+                f"Delete effect for {ground_nsrt_str} failed: {atom}"
+    return state