From ac5d87df7ae7df5def5482d9a7d4b27e1b3813ba Mon Sep 17 00:00:00 2001
From: Nishanth Kumar <nishanth.kumar20@gmail.com>
Date: Wed, 18 Oct 2023 13:46:00 -0400
Subject: [PATCH] ready for new supercloud experiments

---
 .../active_sampler_learning_approach.py       |  6 +++
 .../approaches/bilevel_planning_approach.py   |  1 -
 predicators/envs/sticky_table.py              |  2 +-
 .../explorers/active_sampler_explorer.py      |  2 +-
 .../sticky_table/options.py                   |  4 +-
 predicators/utils.py                          | 43 +++++++++++++++----
 scripts/configs/active_sampler_learning.yaml  |  9 ++--
 7 files changed, 49 insertions(+), 18 deletions(-)

diff --git a/predicators/approaches/active_sampler_learning_approach.py b/predicators/approaches/active_sampler_learning_approach.py
index 932b17503f..f559024789 100644
--- a/predicators/approaches/active_sampler_learning_approach.py
+++ b/predicators/approaches/active_sampler_learning_approach.py
@@ -593,6 +593,12 @@ def _sample(state: State, goal: Set[GroundAtom], rng: np.random.Generator,
                 # Randomly select a sample to pick, following the epsilon
                 # greedy strategy!
                 idx = rng.integers(0, len(scores))
+            # logging.info("\n")
+            # logging.info(samples)
+            # logging.info(scores)
+            # logging.info("\n")
+            # if len(set(scores)) > 1:
+            #     import ipdb; ipdb.set_trace()
         else:
             raise NotImplementedError('Exploration strategy ' +
                                       f'{strategy} ' + 'is not implemented.')
diff --git a/predicators/approaches/bilevel_planning_approach.py b/predicators/approaches/bilevel_planning_approach.py
index e09f5895b8..cb876ea9c8 100644
--- a/predicators/approaches/bilevel_planning_approach.py
+++ b/predicators/approaches/bilevel_planning_approach.py
@@ -55,7 +55,6 @@ def _solve(self, task: Task, timeout: int) -> Callable[[State], Action]:
         seed = self._seed + self._num_calls
         nsrts = self._get_current_nsrts()
         preds = self._get_current_predicates()
-
         # Run task planning only and then greedily sample and execute in the
         # policy.
         if self._plan_without_sim:
diff --git a/predicators/envs/sticky_table.py b/predicators/envs/sticky_table.py
index 7b023b7e36..1267c975fe 100644
--- a/predicators/envs/sticky_table.py
+++ b/predicators/envs/sticky_table.py
@@ -191,7 +191,7 @@ def simulate(self, state: State, action: Action) -> State:
                         if self._table_is_sticky(table, state):
                             # Check if placing on the smooth side of the sticky table.
                             table_y = state.get(table, "y")
-                            if self.sticky_surface_mode == "half" and act_y < table_y + 0.15 * (state.get(table, "radius") - (state.get(cube, "size") / 2)):
+                            if self.sticky_surface_mode == "half" and act_y < table_y + 0.35 * (state.get(table, "radius") - (state.get(cube, "size") / 2)):
                                 if obj_being_held in [cube, cup]:
                                     fall_prob = self._place_smooth_fall_prob
                                 else:
diff --git a/predicators/explorers/active_sampler_explorer.py b/predicators/explorers/active_sampler_explorer.py
index 89610a7ea6..ac7054b80e 100644
--- a/predicators/explorers/active_sampler_explorer.py
+++ b/predicators/explorers/active_sampler_explorer.py
@@ -97,7 +97,7 @@ def _get_exploration_strategy(self, train_task_idx: int,
 
         assigned_task = self._train_tasks[train_task_idx]
         assigned_task_finished = False
-        assigned_task_horizon = CFG.horizon * 1.5
+        assigned_task_horizon = CFG.horizon
         current_policy: Optional[Callable[[State], _Option]] = None
         next_practice_nsrt: Optional[_GroundNSRT] = None
         using_random = False
diff --git a/predicators/ground_truth_models/sticky_table/options.py b/predicators/ground_truth_models/sticky_table/options.py
index 55f89d52b1..ee13284a39 100644
--- a/predicators/ground_truth_models/sticky_table/options.py
+++ b/predicators/ground_truth_models/sticky_table/options.py
@@ -143,7 +143,7 @@ def get_options(cls, env_name: str, types: Dict[str, Type],
             types=[robot_type, ball_type, cup_type, table_type])
 
         PlaceCupWithBallOnFloor = utils.SingletonParameterizedOption(
-            # variables: [robot, ball, cup, floor]
+            # variables: [robot, ball, cup]
             "PlaceCupWithBallOnFloor",
             cls._create_pass_through_policy(action_space),
             # Parameters are absolute x, y actions.
@@ -151,7 +151,7 @@ def get_options(cls, env_name: str, types: Dict[str, Type],
             types=[robot_type, ball_type, cup_type])
 
         PlaceCupWithoutBallOnFloor = utils.SingletonParameterizedOption(
-            # variables: [robot, ball, cup, floor]
+            # variables: [robot, ball, cup]
             "PlaceCupWithoutBallOnFloor",
             cls._create_pass_through_policy(action_space),
             # Parameters are absolute x, y actions.
diff --git a/predicators/utils.py b/predicators/utils.py
index 6b67cb2ecd..d495745c1b 100644
--- a/predicators/utils.py
+++ b/predicators/utils.py
@@ -275,7 +275,6 @@ def construct_active_sampler_input(state: State, objects: Sequence[Object],
         for obj in objects:
             sampler_input_lst.extend(state[obj])
         sampler_input_lst.extend(params)
-
     else:
         assert CFG.active_sampler_learning_feature_selection == "oracle"
         if CFG.env == "bumpy_cover":
@@ -300,22 +299,48 @@ def construct_active_sampler_input(state: State, objects: Sequence[Object],
                 assert len(params) == 1
                 sampler_input_lst.append(params[0] - target_pos)
         elif "sticky_table" in CFG.env:
-            if "Place" in param_option.name and "Table" in param_option.name:
-                table = objects[-1]
-                robot = objects[0]
+            if "PlaceCup" in param_option.name and "Table" in param_option.name:
+                robot, ball, cup, table = objects
                 robot_y = state.get(robot, "y")
                 robot_x = state.get(robot, "x")
                 table_y = state.get(table, "y")
                 table_x = state.get(table, "x")
                 sticky = state.get(table, "sticky")
                 table_radius = state.get(table, "radius")
-                _, _, _, param_x, param_y = params
-                sampler_input_lst.append(robot_x - table_x)
-                sampler_input_lst.append(robot_y - table_y)
+                a, b, c, param_x, param_y = params
                 sampler_input_lst.append(table_radius)
                 sampler_input_lst.append(sticky)
-                sampler_input_lst.append(param_x - table_x)
-                sampler_input_lst.append(param_y - table_y)
+                sampler_input_lst.append(robot_x)
+                sampler_input_lst.append(robot_y)
+                sampler_input_lst.append(table_x)
+                sampler_input_lst.append(table_y)
+                sampler_input_lst.append(a)
+                sampler_input_lst.append(b)
+                sampler_input_lst.append(c)
+                sampler_input_lst.append(param_x)
+                sampler_input_lst.append(param_y)
+            # if "Place" in param_option.name and "Table" in param_option.name:
+            #     table = objects[-1]
+            #     robot = objects[0]
+            #     robot_y = state.get(robot, "y")
+            #     robot_x = state.get(robot, "x")
+            #     table_y = state.get(table, "y")
+            #     table_x = state.get(table, "x")
+            #     sticky = state.get(table, "sticky")
+            #     table_radius = state.get(table, "radius")
+            #     _, _, _, param_x, param_y = params
+            #     # sampler_input_lst.append(robot_x - table_x)
+            #     # sampler_input_lst.append(robot_y - table_y)
+            #     sampler_input_lst.append(table_radius)
+            #     sampler_input_lst.append(sticky)
+            #     sampler_input_lst.append(robot_x)
+            #     sampler_input_lst.append(robot_y)
+            #     sampler_input_lst.append(table_x)
+            #     sampler_input_lst.append(table_y)
+            #     sampler_input_lst.append(param_x)
+            #     sampler_input_lst.append(param_y)
+            #     # sampler_input_lst.append(param_x - table_x)
+            #     # sampler_input_lst.append(param_y - table_y)
             elif "NavigateTo" in param_option.name:
                 _, obj = objects
                 obj_x = state.get(obj, "x")
diff --git a/scripts/configs/active_sampler_learning.yaml b/scripts/configs/active_sampler_learning.yaml
index 02c17cee4b..b926354cdf 100644
--- a/scripts/configs/active_sampler_learning.yaml
+++ b/scripts/configs/active_sampler_learning.yaml
@@ -48,9 +48,10 @@ ENVS:
       sticky_table_num_sticky_tables: 1
       sticky_table_num_tables: 5
       sticky_table_place_ball_fall_prob: 1.00
-      active_sampler_learning_explore_length_base: 12
+      active_sampler_learning_explore_length_base: 20
       active_sampler_learning_exploration_epsilon: 0.1
       skill_competence_model_optimistic_recency_size: 2
+      skill_competence_model_optimistic_window_size: 2
   # sticky_table_tricky_floor:
   #   NAME: "sticky_table_tricky_floor"
   # regional_bumpy_cover:
@@ -84,11 +85,11 @@ FLAGS:
   active_sampler_learning_model: "myopic_classifier_mlp"
   active_sampler_learning_use_teacher: False
   online_nsrt_learning_requests_per_cycle: 1
-  max_num_steps_interaction_request: 24
-  num_online_learning_cycles: 10
+  max_num_steps_interaction_request: 100
+  num_online_learning_cycles: 5
   sesame_task_planner: "fdopt-costs"
   sesame_grounder: "fd_translator"
-  horizon: 12
+  horizon: 8
   active_sampler_learning_feature_selection: oracle
 START_SEED: 456
 NUM_SEEDS: 10