From ac5d87df7ae7df5def5482d9a7d4b27e1b3813ba Mon Sep 17 00:00:00 2001 From: Nishanth Kumar Date: Wed, 18 Oct 2023 13:46:00 -0400 Subject: [PATCH] ready for new supercloud experiments --- .../active_sampler_learning_approach.py | 6 +++ .../approaches/bilevel_planning_approach.py | 1 - predicators/envs/sticky_table.py | 2 +- .../explorers/active_sampler_explorer.py | 2 +- .../sticky_table/options.py | 4 +- predicators/utils.py | 43 +++++++++++++++---- scripts/configs/active_sampler_learning.yaml | 9 ++-- 7 files changed, 49 insertions(+), 18 deletions(-) diff --git a/predicators/approaches/active_sampler_learning_approach.py b/predicators/approaches/active_sampler_learning_approach.py index 932b17503f..f559024789 100644 --- a/predicators/approaches/active_sampler_learning_approach.py +++ b/predicators/approaches/active_sampler_learning_approach.py @@ -593,6 +593,12 @@ def _sample(state: State, goal: Set[GroundAtom], rng: np.random.Generator, # Randomly select a sample to pick, following the epsilon # greedy strategy! idx = rng.integers(0, len(scores)) + # logging.info("\n") + # logging.info(samples) + # logging.info(scores) + # logging.info("\n") + # if len(set(scores)) > 1: + # import ipdb; ipdb.set_trace() else: raise NotImplementedError('Exploration strategy ' + f'{strategy} ' + 'is not implemented.') diff --git a/predicators/approaches/bilevel_planning_approach.py b/predicators/approaches/bilevel_planning_approach.py index e09f5895b8..cb876ea9c8 100644 --- a/predicators/approaches/bilevel_planning_approach.py +++ b/predicators/approaches/bilevel_planning_approach.py @@ -55,7 +55,6 @@ def _solve(self, task: Task, timeout: int) -> Callable[[State], Action]: seed = self._seed + self._num_calls nsrts = self._get_current_nsrts() preds = self._get_current_predicates() - # Run task planning only and then greedily sample and execute in the # policy. if self._plan_without_sim: diff --git a/predicators/envs/sticky_table.py b/predicators/envs/sticky_table.py index 7b023b7e36..1267c975fe 100644 --- a/predicators/envs/sticky_table.py +++ b/predicators/envs/sticky_table.py @@ -191,7 +191,7 @@ def simulate(self, state: State, action: Action) -> State: if self._table_is_sticky(table, state): # Check if placing on the smooth side of the sticky table. table_y = state.get(table, "y") - if self.sticky_surface_mode == "half" and act_y < table_y + 0.15 * (state.get(table, "radius") - (state.get(cube, "size") / 2)): + if self.sticky_surface_mode == "half" and act_y < table_y + 0.35 * (state.get(table, "radius") - (state.get(cube, "size") / 2)): if obj_being_held in [cube, cup]: fall_prob = self._place_smooth_fall_prob else: diff --git a/predicators/explorers/active_sampler_explorer.py b/predicators/explorers/active_sampler_explorer.py index 89610a7ea6..ac7054b80e 100644 --- a/predicators/explorers/active_sampler_explorer.py +++ b/predicators/explorers/active_sampler_explorer.py @@ -97,7 +97,7 @@ def _get_exploration_strategy(self, train_task_idx: int, assigned_task = self._train_tasks[train_task_idx] assigned_task_finished = False - assigned_task_horizon = CFG.horizon * 1.5 + assigned_task_horizon = CFG.horizon current_policy: Optional[Callable[[State], _Option]] = None next_practice_nsrt: Optional[_GroundNSRT] = None using_random = False diff --git a/predicators/ground_truth_models/sticky_table/options.py b/predicators/ground_truth_models/sticky_table/options.py index 55f89d52b1..ee13284a39 100644 --- a/predicators/ground_truth_models/sticky_table/options.py +++ b/predicators/ground_truth_models/sticky_table/options.py @@ -143,7 +143,7 @@ def get_options(cls, env_name: str, types: Dict[str, Type], types=[robot_type, ball_type, cup_type, table_type]) PlaceCupWithBallOnFloor = utils.SingletonParameterizedOption( - # variables: [robot, ball, cup, floor] + # variables: [robot, ball, cup] "PlaceCupWithBallOnFloor", cls._create_pass_through_policy(action_space), # Parameters are absolute x, y actions. @@ -151,7 +151,7 @@ def get_options(cls, env_name: str, types: Dict[str, Type], types=[robot_type, ball_type, cup_type]) PlaceCupWithoutBallOnFloor = utils.SingletonParameterizedOption( - # variables: [robot, ball, cup, floor] + # variables: [robot, ball, cup] "PlaceCupWithoutBallOnFloor", cls._create_pass_through_policy(action_space), # Parameters are absolute x, y actions. diff --git a/predicators/utils.py b/predicators/utils.py index 6b67cb2ecd..d495745c1b 100644 --- a/predicators/utils.py +++ b/predicators/utils.py @@ -275,7 +275,6 @@ def construct_active_sampler_input(state: State, objects: Sequence[Object], for obj in objects: sampler_input_lst.extend(state[obj]) sampler_input_lst.extend(params) - else: assert CFG.active_sampler_learning_feature_selection == "oracle" if CFG.env == "bumpy_cover": @@ -300,22 +299,48 @@ def construct_active_sampler_input(state: State, objects: Sequence[Object], assert len(params) == 1 sampler_input_lst.append(params[0] - target_pos) elif "sticky_table" in CFG.env: - if "Place" in param_option.name and "Table" in param_option.name: - table = objects[-1] - robot = objects[0] + if "PlaceCup" in param_option.name and "Table" in param_option.name: + robot, ball, cup, table = objects robot_y = state.get(robot, "y") robot_x = state.get(robot, "x") table_y = state.get(table, "y") table_x = state.get(table, "x") sticky = state.get(table, "sticky") table_radius = state.get(table, "radius") - _, _, _, param_x, param_y = params - sampler_input_lst.append(robot_x - table_x) - sampler_input_lst.append(robot_y - table_y) + a, b, c, param_x, param_y = params sampler_input_lst.append(table_radius) sampler_input_lst.append(sticky) - sampler_input_lst.append(param_x - table_x) - sampler_input_lst.append(param_y - table_y) + sampler_input_lst.append(robot_x) + sampler_input_lst.append(robot_y) + sampler_input_lst.append(table_x) + sampler_input_lst.append(table_y) + sampler_input_lst.append(a) + sampler_input_lst.append(b) + sampler_input_lst.append(c) + sampler_input_lst.append(param_x) + sampler_input_lst.append(param_y) + # if "Place" in param_option.name and "Table" in param_option.name: + # table = objects[-1] + # robot = objects[0] + # robot_y = state.get(robot, "y") + # robot_x = state.get(robot, "x") + # table_y = state.get(table, "y") + # table_x = state.get(table, "x") + # sticky = state.get(table, "sticky") + # table_radius = state.get(table, "radius") + # _, _, _, param_x, param_y = params + # # sampler_input_lst.append(robot_x - table_x) + # # sampler_input_lst.append(robot_y - table_y) + # sampler_input_lst.append(table_radius) + # sampler_input_lst.append(sticky) + # sampler_input_lst.append(robot_x) + # sampler_input_lst.append(robot_y) + # sampler_input_lst.append(table_x) + # sampler_input_lst.append(table_y) + # sampler_input_lst.append(param_x) + # sampler_input_lst.append(param_y) + # # sampler_input_lst.append(param_x - table_x) + # # sampler_input_lst.append(param_y - table_y) elif "NavigateTo" in param_option.name: _, obj = objects obj_x = state.get(obj, "x") diff --git a/scripts/configs/active_sampler_learning.yaml b/scripts/configs/active_sampler_learning.yaml index 02c17cee4b..b926354cdf 100644 --- a/scripts/configs/active_sampler_learning.yaml +++ b/scripts/configs/active_sampler_learning.yaml @@ -48,9 +48,10 @@ ENVS: sticky_table_num_sticky_tables: 1 sticky_table_num_tables: 5 sticky_table_place_ball_fall_prob: 1.00 - active_sampler_learning_explore_length_base: 12 + active_sampler_learning_explore_length_base: 20 active_sampler_learning_exploration_epsilon: 0.1 skill_competence_model_optimistic_recency_size: 2 + skill_competence_model_optimistic_window_size: 2 # sticky_table_tricky_floor: # NAME: "sticky_table_tricky_floor" # regional_bumpy_cover: @@ -84,11 +85,11 @@ FLAGS: active_sampler_learning_model: "myopic_classifier_mlp" active_sampler_learning_use_teacher: False online_nsrt_learning_requests_per_cycle: 1 - max_num_steps_interaction_request: 24 - num_online_learning_cycles: 10 + max_num_steps_interaction_request: 100 + num_online_learning_cycles: 5 sesame_task_planner: "fdopt-costs" sesame_grounder: "fd_translator" - horizon: 12 + horizon: 8 active_sampler_learning_feature_selection: oracle START_SEED: 456 NUM_SEEDS: 10