From 6519fa01c53f701606397c0030b0b69f94221b7b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9Cmatrixbalto=E2=80=9D?= <“ilfangliu@gmail.com”> Date: Sat, 11 Jan 2025 00:11:30 -0500 Subject: [PATCH] rapid learn fixed, coffe fixed --- .../approaches/bridge_policy_approach.py | 178 +++- predicators/approaches/maple_q_approach.py | 85 +- predicators/envs/coffee.py | 23 +- predicators/envs/grid_row.py | 2 +- .../ground_truth_models/coffee/options.py | 69 +- predicators/ml_models.py | 5 +- predicators/settings.py | 6 +- tests/envs/test_coffee.py | 803 +++++++++++++----- 8 files changed, 891 insertions(+), 280 deletions(-) diff --git a/predicators/approaches/bridge_policy_approach.py b/predicators/approaches/bridge_policy_approach.py index 3e02cba687..964ae969e7 100644 --- a/predicators/approaches/bridge_policy_approach.py +++ b/predicators/approaches/bridge_policy_approach.py @@ -608,6 +608,8 @@ def _termination_fn(s: State) -> bool: lambda s: None, _termination_fn) return request + + def learn_from_interaction_results( self, results: Sequence[InteractionResult]) -> None: # Turn state action pairs from results into trajectories @@ -670,11 +672,22 @@ def learn_from_interaction_results( # Find the last planner state before this point last_planner_idx = -1 + last_planner_state = None for i in range(j, -1, -1): if policy_log[i] == "planner": + last_planner_state = result.states[i] + break + last_planner_atoms = utils.abstract(last_planner_state, self._get_current_predicates()) + # last_planner_idx = next(j for j, atoms in enumerate(plan_log) if last_planner_atoms.issubset(atoms)) + for i in range(len(plan_log[j])): + if plan_log[j][i].issubset(last_planner_atoms): last_planner_idx = i break - + # import ipdb;ipdb.set_trace() + # try: + # last_planner_idx = plan_log[j].index(last_planner_atoms) + # except: + # import ipdb;ipdb.set_trace() if last_planner_idx != -1: # Get all states from last planner state onwards in the plan safe_states = plan_log[j][last_planner_idx+1:] @@ -687,7 +700,7 @@ def learn_from_interaction_results( print(future_state) print(current_atoms) print(safe_states) - # import ipdb;ipdb.set_trace() + import ipdb;ipdb.set_trace() rwd = 1000 # We found a match, this is a safe state num_rewards += 1 break @@ -695,8 +708,8 @@ def learn_from_interaction_results( reward_bonus.append(rwd) print("num rewards", num_rewards) - # if num_rewards > 1: - # import ipdb;ipdb.set_trace() + if num_rewards >= 1: + import ipdb;ipdb.set_trace() reward_bonuses.append(reward_bonus) mapleq_states.append(result.states[-1]) new_traj = LowLevelTrajectory(mapleq_states, mapleq_actions) @@ -994,9 +1007,21 @@ def _policy(s: State) -> Action: self._current_control = "planner" self._current_policy = self.planning_policy # import ipdb;ipdb.set_trace() - print("bridge done") - print(self._current_plan) + # print("bridge done") + # print(self._current_plan) break + if self.bridge_done: + current_task = Task(s, self._current_task.goal) + task_list, option_policy = self._get_option_policy_by_planning( + current_task, CFG.timeout) + self._current_plan = task_list + self._current_policy = utils.option_policy_to_policy( + option_policy, + max_option_steps=CFG.max_num_steps_option_rollout, + raise_error_on_repeated_state=True, + ) + + self.bridge_done = False # # Normal execution. Either keep executing the current option, or # # switch to the next option if it has terminated. @@ -1034,8 +1059,8 @@ def _policy(s: State) -> Action: print("current state", s) print(f"Option execution failed: {e}") print(f"Current control: {self._current_control}") - print(f"Bridge done: {self.bridge_done}") - if self.bridge_done or self._current_control == "planner": + print(f"Failed option: {e.info['last_failed_option']}") + if self._current_control == "planner": # Get the next option from the option policy try: action = self._current_policy(s) @@ -1049,26 +1074,26 @@ def _policy(s: State) -> Action: memory = e.info.get("memory", {}) action = current_option.policy(s) - else: - action = self.last_action - print("planner execution error", action) + # else: + # action = self.last_action + # print("planner execution error", action) # Switch control from planner to bridge. # assert self._current_control == "planner" - if not self.bridge_done: - print("switching to bridge", self.bridge_done) - self._current_control = "bridge" - self.num_bridge_steps=0 - self.mapleq._q_function._last_planner_state = s - - self._bridge_called_state = s - self._current_policy = self.mapleq._solve( # pylint: disable=protected-access - task, timeout, train_or_test) - action = self._current_policy(s) - if self._current_control == "bridge": - self.num_bridge_steps += 1 + + print("switching to bridge") + self._current_control = "bridge" + self.num_bridge_steps=0 + self.mapleq._q_function._last_planner_state = s + + self._bridge_called_state = s + self._current_policy = self.mapleq._solve( # pylint: disable=protected-access + task, timeout, train_or_test) + action = self._current_policy(s) + if self._current_control == "bridge": + self.num_bridge_steps += 1 if train_or_test == "train": @@ -1078,3 +1103,110 @@ def _policy(s: State) -> Action: return _policy + + + def learn_from_interaction_results( + self, results: Sequence[InteractionResult]) -> None: + # Turn state action pairs from results into trajectories + # If we haven't collected any new results on this cycle, skip learning + # for efficiency. + if not results: + return None + all_states = [] + all_actions = [] + policy_logs = self._policy_logs + plan_logs = self._plan_logs + reward_bonuses = [] + count=-1 + mapleq_states = [] + mapleq_actions = [] + reward_bonuses = [] + for i in range(len(results)): + reward_bonus = [] + result = results[i] + policy_log = policy_logs[:len(result.states[:-1])] + plan_log = plan_logs[:len(result.states[:-1])] + # mapleq_states = [ + # state for j, state in enumerate(result.states[:-1]) + # if policy_log[j] == "bridge" + # or policy_log[max(j - 1, 0)] == "bridge" + # ] + # mapleq_actions = [ + # action for j, action in enumerate(result.actions) + # if policy_log[j] == "bridge" + # or policy_log[max(j - 1, 0)] == "bridge" + # ] + + + + add_to_traj = False + + num_rewards = 0 + for j in range(len(result.states)): + if j= 1: + # import ipdb;ipdb.set_trace() + all_states.extend(mapleq_states) + all_actions.extend(mapleq_actions) + policy_logs = policy_logs[len(result.states) - 1:] + plan_logs = plan_logs[len(result.states) - 1:] + self.mapleq.get_interaction_requests() + self.mapleq._learn_nsrts(self._trajs, 0, [] * len(self._trajs), reward_bonuses) # pylint: disable=protected-access + self._policy_logs = [] + self._plan_logs = [] + return None diff --git a/predicators/approaches/maple_q_approach.py b/predicators/approaches/maple_q_approach.py index e3a25b627d..3a74847f31 100644 --- a/predicators/approaches/maple_q_approach.py +++ b/predicators/approaches/maple_q_approach.py @@ -253,40 +253,73 @@ def _update_maple_data(self, reward_bonuses) -> None: start_idx = self._last_seen_segment_traj_idx + 1 new_trajs = self._segmented_trajs[start_idx:] - goal_offset = CFG.max_initial_demos - assert len(self._segmented_trajs) == goal_offset + \ - len(self._interaction_goals) - new_traj_goals = self._interaction_goals[goal_offset + start_idx:] - + if CFG.rl_rwd_shape: + for traj_i, segmented_traj in enumerate(new_trajs): + self._last_seen_segment_traj_idx += 1 + reward_bonus = reward_bonuses[traj_i] + # if sum(reward_bonus) > 0: + # import ipdb;ipdb.set_trace() + for seg_i, segment in enumerate(segmented_traj): + s = segment.states[0] + goal = None + o = segment.get_option() + ns = segment.states[-1] + # reward = 1.0 if goal.issubset(segment.final_atoms) else 0.0 + reward = 0 + # if CFG.use_callplanner and o.parent == self.CallPlanner and reward == 1: + # reward += 0.5 - for traj_i, segmented_traj in enumerate(new_trajs): - self._last_seen_segment_traj_idx += 1 - reward_bonus = reward_bonuses[traj_i] - # if sum(reward_bonus) > 0: - # import ipdb;ipdb.set_trace() - for seg_i, segment in enumerate(segmented_traj): - s = segment.states[0] - goal = new_traj_goals[traj_i] - o = segment.get_option() - ns = segment.states[-1] - reward = 1.0 if goal.issubset(segment.final_atoms) else 0.0 - if CFG.use_callplanner and o.parent == self.CallPlanner and reward == 1: - reward += 0.5 - if CFG.rl_rwd_shape: reward += reward_bonus[0] reward_bonus.pop(0) + + - - # if reward > 0: - # print(s, o, reward) - terminal = reward > 0 or seg_i == len(segmented_traj) - 1 + # if reward > 0: + # print(s, o, reward) + + terminal = reward > 0 or seg_i == len(segmented_traj) - 1 + + + # if reward >= 1: + # import ipdb;ipdb.set_trace() + self._q_function.add_datum_to_replay_buffer( + (s, goal, o, ns, reward, terminal)) + + + else: + goal_offset = CFG.max_initial_demos + assert len(self._segmented_trajs) == goal_offset + \ + len(self._interaction_goals) + new_traj_goals = self._interaction_goals[goal_offset + start_idx:] - # if reward >= 1: + + + for traj_i, segmented_traj in enumerate(new_trajs): + self._last_seen_segment_traj_idx += 1 + reward_bonus = reward_bonuses[traj_i] + # if sum(reward_bonus) > 0: # import ipdb;ipdb.set_trace() + for seg_i, segment in enumerate(segmented_traj): + s = segment.states[0] + goal = new_traj_goals[traj_i] + o = segment.get_option() + ns = segment.states[-1] + reward = 1.0 if goal.issubset(segment.final_atoms) else 0.0 + if CFG.use_callplanner and o.parent == self.CallPlanner and reward == 1: + reward += 0.5 + + # if reward > 0: + # print(s, o, reward) + + terminal = reward > 0 or seg_i == len(segmented_traj) - 1 - self._q_function.add_datum_to_replay_buffer( - (s, goal, o, ns, reward, terminal)) \ No newline at end of file + + # if reward >= 1: + # import ipdb;ipdb.set_trace() + + self._q_function.add_datum_to_replay_buffer( + (s, goal, o, ns, reward, terminal)) \ No newline at end of file diff --git a/predicators/envs/coffee.py b/predicators/envs/coffee.py index 0437f9f91a..c8d5e07a92 100644 --- a/predicators/envs/coffee.py +++ b/predicators/envs/coffee.py @@ -709,10 +709,10 @@ class CoffeeLidEnv(CoffeeEnv): def __init__(self, use_gui: bool = True) -> None: super().__init__(use_gui) - #to remove lid you need a low level action that gets the stickiness exactly right - self._lid_type = Type( - "lid", - ["x", "y", "stickiness", "on_cup"]) + # #to remove lid you need a low level action that gets the stickiness exactly right + # self._lid_type = Type( + # "lid", + # ["x", "y", "stickiness", "on_cup"]) @classmethod def get_name(cls) -> str: @@ -722,7 +722,8 @@ def get_name(cls) -> str: def types(self) -> Set[Type]: return { self._cup_type, self._jug_type, self._machine_type, - self._robot_type, self._lid_type + self._robot_type, + # self._lid_type } def simulate(self, state: State, action: Action) -> State: @@ -826,10 +827,10 @@ def simulate(self, state: State, action: Action) -> State: elif abs(fingers - self.closed_fingers) < self.grasp_finger_tol and \ sq_dist_to_handle < self.grasp_position_tol and \ abs(jug_rot) < self.pick_jug_rot_tol and norm_dwrist!=-1: - print("ig we r grasping") - print("sq_dist_to_handle < self.grasp_position_tol", sq_dist_to_handle < self.grasp_position_tol) - print("abs(jug_rot) < self.pick_jug_rot_tol", abs(jug_rot) < self.pick_jug_rot_tol) - print("abs(fingers - self.closed_fingers) < self.grasp_finger_tol") + # print("ig we r grasping") + # print("sq_dist_to_handle < self.grasp_position_tol", sq_dist_to_handle < self.grasp_position_tol) + # print("abs(jug_rot) < self.pick_jug_rot_tol", abs(jug_rot) < self.pick_jug_rot_tol) + # print("abs(fingers - self.closed_fingers) < self.grasp_finger_tol") # Snap to the handle. handle_x, handle_y, handle_z = handle_pos next_state.set(self._robot, "x", handle_x) @@ -841,12 +842,14 @@ def simulate(self, state: State, action: Action) -> State: next_state.set(self._jug, "is_held", 1.0) # Check if the jug should be rotated. elif self._Twisting_holds(state, [self._robot, self._jug]): - print("WE ROTATE THE JUG with dwrist", dwrist, "and rot", state.get(self._jug, "rot")) + # print("WE ROTATE THE JUG with dwrist", dwrist, "and rot", state.get(self._jug, "rot")) # Rotate the jug. rot = state.get(self._jug, "rot") next_state.set(self._jug, "rot", rot + dwrist) # If the jug is close enough to the dispense area and the machine is # on, the jug should get filled. + # print("so uhhhh we did not grasp the jug List[EnvironmentTask]: new_doors = 2*CFG.num_doors+1 num_doors = rng.integers(CFG.num_doors, new_doors) print("NUM DOORS", num_doors) - num_doors = CFG.test_num_doors + # num_doors = CFG.test_num_doors for door_num in range(num_doors): door = Object(f"door{door_num}", self._door_type) door_list.append(door) diff --git a/predicators/ground_truth_models/coffee/options.py b/predicators/ground_truth_models/coffee/options.py index 0eab6127b9..af796f0656 100644 --- a/predicators/ground_truth_models/coffee/options.py +++ b/predicators/ground_truth_models/coffee/options.py @@ -101,7 +101,9 @@ def _PickJug_terminal(state: State, memory: Dict, del memory, params # unused robot, jug = objects # print("ARE WE HOLDING THE JUG", Holding.holds(state, [robot, jug]) ) - return Holding.holds(state, [robot, jug]) or cls.repeat_state + return Holding.holds(state, [robot, jug]) + # or cls.repeat_state + # or cls.repeat_state PickJug = ParameterizedOption( "PickJug", @@ -207,40 +209,54 @@ def _create_twist_jug_policy(cls) -> ParameterizedPolicy: previous_state = None cls.repeat_state = False def policy(state: State, memory: Dict, objects: Sequence[Object], - params: Array) -> Action: - # This policy twists until the jug is in the desired rotation, and - # then moves up to break contact with the jug. - del memory # unused + params: Array) -> Action: nonlocal previous_state - if previous_state is not None and previous_state.pretty_str() == state.pretty_str(): cls.repeat_state = True previous_state = state + robot, jug = objects current_rot = state.get(jug, "rot") - norm_desired_rot, = params - desired_rot = norm_desired_rot * CFG.coffee_jug_init_rot_amt - delta_rot = np.clip(desired_rot - current_rot, + fingers = state.get(robot, "fingers") + + # Step 1: If fingers are open, close them to grasp + if abs(fingers - CoffeeEnv.open_fingers) < cls.twist_policy_tol: + print("TwistJug: Closing fingers to grasp") + return Action(np.array([0.0, 0.0, 0.0, 0.0, 0.0, -1.0], dtype=np.float32)) + + # Step 2: If we're at the right rotation and stillgrasping, release + if abs(current_rot) < np.pi / 3 and abs(fingers - CoffeeEnv.closed_fingers) < cls.twist_policy_tol: + print("TwistJug: Opening fingers to release") + return Action(np.array([0.0, 0.0, 0.0, 0.0, 0.0, 1.0], dtype=np.float32)) + + # Step 3: If we're still holding (fingersclosed), do the twist + if abs(fingers - CoffeeEnv.closed_fingers) < cls.twist_policy_tol: + print(f"TwistJug: Twisting, current_rot={current_rot}") + norm_desired_rot, = params + desired_rot = norm_desired_rot * CFG.coffee_jug_init_rot_amt + # SaNITY CHECK + # desired_rot = np.pi / 4 + print("desired rot", norm_desired_rot) + delta_rot = np.clip(desired_rot - current_rot, -CoffeeEnv.max_angular_vel, CoffeeEnv.max_angular_vel) - # print("desired rot", desired_rot) - # print("current rot", current_rot) - # print("delta rot", delta_rot) - - if abs(delta_rot) < cls.twist_policy_tol: - # Move up to stop twisting. - x = state.get(robot, "x") - y = state.get(robot, "y") - z = state.get(robot, "z") - robot_pos = (x, y, z) - return cls._get_move_action((x, y, CoffeeEnv.robot_init_z), - robot_pos) - dtwist = delta_rot / CoffeeEnv.max_angular_vel - return Action( - np.array([0.0, 0.0, 0.0, 0.0, dtwist, 0.0], dtype=np.float32)) + + # If twist is complete, move up (original logic) + if abs(delta_rot) < cls.twist_policy_tol: + x = state.get(robot, "x") + y = state.get(robot, "y") + z = state.get(robot, "z") + robot_pos = (x, y, z) + return cls._get_move_action((x, y, CoffeeEnv.robot_init_z), + robot_pos) + + # Otherwise keep twisting + dtwist = delta_rot / CoffeeEnv.max_angular_vel + return Action(np.array([0.0, 0.0, 0.0, 0.0, dtwist, 0.0], dtype=np.float32)) return policy + @classmethod def _create_pick_jug_policy(cls) -> ParameterizedPolicy: previous_state = None @@ -268,6 +284,7 @@ def policy(state: State, memory: Dict, objects: Sequence[Object], sq_dist_to_handle = np.sum(np.subtract(handle_pos, robot_pos)**2) # print("CAN WE PICK", sq_dist_to_handle < cls.pick_policy_tol) if sq_dist_to_handle < cls.pick_policy_tol: + # print("PICKING") return Action( np.array([0.0, 0.0, 0.0, 0.0, 0.0, -1.0], dtype=np.float32)) @@ -282,18 +299,22 @@ def policy(state: State, memory: Dict, objects: Sequence[Object], # If at the correct x and z position and behind in the y direction, # move directly toward the target. if target_y > y and xz_handle_sq_dist < cls.pick_policy_tol: + # print("going toward the target") return cls._get_move_action(handle_pos, robot_pos) # If close enough to the penultimate waypoint in the x/y plane, # move to the waypoint (in the z direction). if xy_waypoint_sq_dist < cls.pick_policy_tol: + # print("going toward the waypoint") return cls._get_move_action((target_x, waypoint_y, target_z), robot_pos) # If at a safe height, move to the position above the penultimate # waypoint, still at a safe height. if safe_z_sq_dist < CoffeeEnv.safe_z_tol: + # print("going toward the position above the penultimate waypoint") return cls._get_move_action( (target_x, waypoint_y, CoffeeEnv.robot_init_z), robot_pos) # Move up to a safe height. + # print("going toward the safe height") return cls._get_move_action((x, y, CoffeeEnv.robot_init_z), robot_pos) diff --git a/predicators/ml_models.py b/predicators/ml_models.py index 5286d79bb6..ad3bcfdf3b 100644 --- a/predicators/ml_models.py +++ b/predicators/ml_models.py @@ -1231,6 +1231,9 @@ def _get_torch_device(use_torch_gpu: bool) -> torch.device: def _normalize_data(data: Array, scale_clip: float = 1) -> Tuple[Array, Array, Array]: + if data.size == 0: + # Return appropriate values for empty data + return data, np.array([]), np.array([]) shift = np.min(data, axis=0) scale = np.max(data - shift, axis=0) scale = np.clip(scale, scale_clip, None) @@ -1906,7 +1909,7 @@ def __init__(self, self._qfunc_init = False self._last_planner_state = None - self._ep_reduction = (self._epsilon-self._min_epsilon) \ + self._ep_reduction = CFG.epsilon_reduction*(self._epsilon-self._min_epsilon) \ /(CFG.num_online_learning_cycles*CFG.max_num_steps_interaction_request \ *CFG.interactive_num_requests_per_cycle) self._counter = 0 diff --git a/predicators/settings.py b/predicators/settings.py index 212d2dfe90..fae96e5822 100644 --- a/predicators/settings.py +++ b/predicators/settings.py @@ -352,9 +352,9 @@ class GlobalSettings: # grid row env parameters grid_row_num_cells = 5 - test_grid_row_num_cells = 5 + test_grid_row_num_cells = 10 num_doors = 1 - test_num_doors = 1 + # test_num_doors = 1 # burger env parameters gridworld_num_rows = 4 @@ -605,6 +605,8 @@ class GlobalSettings: # shivam vats inspo rl_rwd_shape = False max_num_bridge_steps = 90 + # default epsilon reduction factor + epsilon_reduction = 2 # skill competence model parameters skill_competence_model = "optimistic" skill_competence_model_num_em_iters = 3 diff --git a/tests/envs/test_coffee.py b/tests/envs/test_coffee.py index 1e23a60bd1..40d63f4a03 100644 --- a/tests/envs/test_coffee.py +++ b/tests/envs/test_coffee.py @@ -1,3 +1,408 @@ +"""Test cases for the coffee environment.""" +import numpy as np + +from predicators import utils +from predicators.envs.coffee import CoffeeEnv, CoffeeLidEnv +from predicators.ground_truth_models import get_gt_options +from predicators.structs import Action, GroundAtom + + +# def test_coffee(): +# """Tests for CoffeeEnv().""" +# utils.reset_config({ +# "env": "coffee", +# "coffee_num_cups_test": [4], # used to assure 4 cups in custom state +# "video_fps": 10, # for faster debugging videos +# }) +# env = CoffeeEnv() +# for task in env.get_train_tasks(): +# for obj in task.init: +# assert len(obj.type.feature_names) == len(task.init[obj]) +# for task in env.get_test_tasks(): +# for obj in task.init: +# assert len(obj.type.feature_names) == len(task.init[obj]) +# assert len(env.predicates) == 13 +# assert len(env.goal_predicates) == 1 +# pred_name_to_pred = {p.name: p for p in env.predicates} +# CupFilled = pred_name_to_pred["CupFilled"] +# JugInMachine = pred_name_to_pred["JugInMachine"] +# OnTable = pred_name_to_pred["OnTable"] +# NotAboveCup = pred_name_to_pred["NotAboveCup"] +# assert len(get_gt_options(env.get_name())) == 6 +# option_name_to_option = {o.name: o for o in get_gt_options(env.get_name())} +# assert len(env.types) == 4 +# type_name_to_type = {t.name: t for t in env.types} +# cup_type = type_name_to_type["cup"] +# jug_type = type_name_to_type["jug"] +# machine_type = type_name_to_type["machine"] +# robot_type = type_name_to_type["robot"] +# assert env.action_space.shape == (6, ) +# # Create a custom initial state, with cups positions at the extremes of +# # their possible initial positions. +# state = env.get_test_tasks()[0].init.copy() +# robot, = state.get_objects(robot_type) +# jug, = state.get_objects(jug_type) +# machine, = state.get_objects(machine_type) +# cups = state.get_objects(cup_type) +# assert len(cups) == 4 +# # Reposition the cups. +# cup_corners = [ +# (env.cup_init_x_lb, env.cup_init_y_lb), +# (env.cup_init_x_lb, env.cup_init_y_ub), +# (env.cup_init_x_ub, env.cup_init_y_ub), +# (env.cup_init_x_ub, env.cup_init_y_lb), +# ] +# for cup, (x, y) in zip(cups, cup_corners): +# state.set(cup, "x", x) +# state.set(cup, "y", y) +# # Reposition the jug just so we know exactly where it is. +# state.set(jug, "x", env.jug_init_x_ub) +# state.set(jug, "y", env.jug_init_y_ub) +# state.set(jug, "rot", 0.0) +# env.render_state(state, task) + +# ## Test simulate ## + +# # Helper function to compute a sequence of actions that moves the robot +# # from an initial position to a target position in a straight line. +# def _get_position_action_arrs(init_x, init_y, init_z, final_x, final_y, +# final_z): +# action_arrs = [] +# current_pos = np.array([init_x, init_y, init_z]) +# delta = np.subtract((final_x, final_y, final_z), current_pos) +# pos_norm = float(np.linalg.norm(delta)) +# if pos_norm > env.max_position_vel: +# delta = env.max_position_vel * (delta / pos_norm) +# num_max_steps = int(np.floor(pos_norm / env.max_position_vel)) +# for _ in range(num_max_steps): +# dx, dy, dz = delta / env.max_position_vel +# action_arrs.append( +# np.array([dx, dy, dz, 0.0, 0.0, 0.0], dtype=np.float32)) +# current_pos = current_pos + delta +# delta = np.subtract((final_x, final_y, final_z), current_pos) +# pos_norm = float(np.linalg.norm(delta)) +# if pos_norm > 0: +# delta = delta / env.max_position_vel +# dx, dy, dz = delta +# action_arrs.append( +# np.array([dx, dy, dz, 0.0, 0.0, 0.0], dtype=np.float32)) +# return action_arrs + +# # Test twisting the jug. +# target_x = state.get(jug, "x") +# target_y = state.get(jug, "y") +# target_z = env.jug_height +# action_arrs = _get_position_action_arrs(state.get(robot, "x"), +# state.get(robot, "y"), +# state.get(robot, "z"), target_x, +# target_y, target_z) +# num_twists = 2 +# twist_act_arr = np.array([0.0, 0.0, 0.0, 0.0, 1.0, 0.0], dtype=np.float32) +# action_arrs.extend([twist_act_arr for _ in range(num_twists)]) +# policy = utils.action_arrs_to_policy(action_arrs) +# traj = utils.run_policy_with_simulator(policy, +# env.simulate, +# state, +# lambda _: False, +# max_num_steps=len(action_arrs)) +# twist_amt = num_twists * env.max_angular_vel +# assert abs(traj.states[-1].get(jug, "rot") - twist_amt) < 1e-6 +# s = traj.states[-1] + +# # The jug is too twisted now, so picking it up should fail. +# target_x, target_y, target_z = env._get_jug_handle_grasp(s, jug) # pylint: disable=protected-access +# move_action_arrs = _get_position_action_arrs(s.get(robot, "x"), +# s.get(robot, "y"), +# s.get(robot, "z"), target_x, +# target_y, target_z) +# action_arrs.extend(move_action_arrs) +# pick_act_arr = np.array([0.0, 0.0, 0.0, 0.0, 0.0, -1.0], dtype=np.float32) +# action_arrs.append(pick_act_arr) + +# policy = utils.action_arrs_to_policy(action_arrs) +# traj = utils.run_policy_with_simulator(policy, +# env.simulate, +# state, +# lambda _: False, +# max_num_steps=len(action_arrs)) +# assert traj.states[-1].get(jug, "is_held") < 0.5 + +# # Test picking up the jug. +# target_x, target_y, target_z = env._get_jug_handle_grasp(state, jug) # pylint: disable=protected-access +# action_arrs = _get_position_action_arrs(state.get(robot, "x"), +# state.get(robot, "y"), +# state.get(robot, "z"), target_x, +# target_y, target_z) +# pick_act_arr = np.array([0.0, 0.0, 0.0, 0.0, 0.0, -1.0], dtype=np.float32) +# action_arrs.append(pick_act_arr) + +# policy = utils.action_arrs_to_policy(action_arrs) +# traj = utils.run_policy_with_simulator(policy, +# env.simulate, +# state, +# lambda _: False, +# max_num_steps=len(action_arrs)) +# assert traj.states[-2].get(jug, "is_held") < 0.5 +# assert traj.states[-1].get(jug, "is_held") > 0.5 +# assert GroundAtom(OnTable, [jug]).holds(traj.states[-2]) +# assert not GroundAtom(OnTable, [jug]).holds(traj.states[-1]) +# s = traj.states[-1] + +# # Test moving and placing the jug at the machine. +# # Offset based on the grasp. +# target_x = env.dispense_area_x - (s.get(jug, "x") - s.get(robot, "x")) +# target_y = env.dispense_area_y - (s.get(jug, "y") - s.get(robot, "y")) +# target_z = s.get(robot, "z") +# move_jug_act_arrs = _get_position_action_arrs(s.get(robot, "x"), +# s.get(robot, "y"), +# s.get(robot, "z"), target_x, +# target_y, target_z) +# action_arrs.extend(move_jug_act_arrs) +# # Drop the jug. +# place_act_arr = np.array([0.0, 0.0, 0.0, 0.0, 0.0, 1.0], dtype=np.float32) +# action_arrs.append(place_act_arr) + +# policy = utils.action_arrs_to_policy(action_arrs) +# traj = utils.run_policy_with_simulator(policy, +# env.simulate, +# state, +# lambda _: False, +# max_num_steps=len(action_arrs)) +# assert traj.states[-2].get(jug, "is_held") > 0.5 +# assert traj.states[-1].get(jug, "is_held") < 0.5 +# s = traj.states[-1] + +# # Test pressing the machine button. +# # First move to above the button. +# move_to_above_button_act_arrs = _get_position_action_arrs( +# s.get(robot, "x"), s.get(robot, "y"), s.get(robot, "z"), env.button_x, +# env.button_y + 1.0, env.button_z) +# action_arrs.extend(move_to_above_button_act_arrs) +# # Move forward to press the button. +# move_to_press_button_act_arrs = _get_position_action_arrs( +# env.button_x, +# env.button_y + 1.0, +# env.button_z, +# env.button_x, +# env.button_y, +# env.button_z, +# ) +# action_arrs.extend(move_to_press_button_act_arrs) + +# policy = utils.action_arrs_to_policy(action_arrs) +# traj = utils.run_policy_with_simulator(policy, +# env.simulate, +# state, +# lambda _: False, +# max_num_steps=len(action_arrs)) +# assert traj.states[-2].get(machine, "is_on") < 0.5 +# assert traj.states[-1].get(machine, "is_on") > 0.5 +# # The jug should also now be filled. +# assert traj.states[-2].get(jug, "is_filled") < 0.5 +# assert traj.states[-1].get(jug, "is_filled") > 0.5 +# s = traj.states[-1] + +# # Test picking up the filled jug. +# target_x, target_y, target_z = env._get_jug_handle_grasp(s, jug) # pylint: disable=protected-access +# move_to_pick_act_arrs = _get_position_action_arrs(s.get(robot, "x"), +# s.get(robot, "y"), +# s.get(robot, +# "z"), target_x, +# target_y, target_z) +# action_arrs.extend(move_to_pick_act_arrs) +# pick_act_arr = np.array([0.0, 0.0, 0.0, 0.0, 0.0, -1.0], dtype=np.float32) +# action_arrs.append(pick_act_arr) + +# policy = utils.action_arrs_to_policy(action_arrs) +# traj = utils.run_policy_with_simulator(policy, +# env.simulate, +# state, +# lambda _: False, +# max_num_steps=len(action_arrs)) +# assert traj.states[-2].get(jug, "is_held") < 0.5 +# assert traj.states[-1].get(jug, "is_held") > 0.5 +# assert GroundAtom(NotAboveCup, [robot, jug]).holds(traj.states[-1]) +# s = traj.states[-1] + +# # Check for noop when pouring into nothing. +# pour_act_lst = [0.0, 0.0, 0.0, 1.0, 0.0, 0.0] +# pour_act_arr = np.array(pour_act_lst, dtype=np.float32) +# next_s = env.simulate(s, Action(pour_act_arr)) +# assert s.allclose(next_s) + +# # Test pouring in each of the cups. +# for cup in cups: +# jug_target_x, jug_target_y, jug_target_z = env._get_pour_position( # pylint: disable=protected-access +# s, cup) +# target_x = jug_target_x - (s.get(jug, "x") - s.get(robot, "x")) +# target_y = jug_target_y - (s.get(jug, "y") - s.get(robot, "y")) +# target_z = jug_target_z + env.jug_handle_height +# move_to_pour_act_arrs = _get_position_action_arrs( +# s.get(robot, "x"), s.get(robot, "y"), s.get(robot, "z"), target_x, +# target_y, target_z) +# action_arrs.extend(move_to_pour_act_arrs) +# target_liquid = state.get(cup, "target_liquid") +# num_pour_steps = int(np.ceil(target_liquid / env.pour_velocity)) +# # Start pouring. +# action_arrs.append(pour_act_arr) +# # Keep pouring. +# action_arrs.extend([pour_act_arr for _ in range(num_pour_steps - 1)]) +# # Stop pouring. +# action_arrs.append(-1 * pour_act_arr) + +# policy = utils.action_arrs_to_policy(action_arrs) +# traj = utils.run_policy_with_simulator(policy, +# env.simulate, +# state, +# lambda _: False, +# max_num_steps=len(action_arrs)) +# assert not GroundAtom(CupFilled, [cup]).holds(traj.states[-3]) +# assert GroundAtom(CupFilled, [cup]).holds(traj.states[-1]) +# assert not GroundAtom(NotAboveCup, [robot, jug]).holds(traj.states[-1]) +# s = traj.states[-1] +# # Render a state where we are in the process of pouring. +# env.render_state(traj.states[-2], task) + +# # Check for noop when overfilling a cup. +# next_s = env.simulate(s, Action(pour_act_arr)) +# assert s.allclose(next_s) + +# # Uncomment for debugging. +# # policy = utils.action_arrs_to_policy(action_arrs) +# # monitor = utils.SimulateVideoMonitor(task, env.render_state) +# # traj = utils.run_policy_with_simulator(policy, +# # env.simulate, +# # state, +# # lambda _: False, +# # max_num_steps=len(action_arrs), +# # monitor=monitor) +# # video = monitor.get_video() +# # outfile = "hardcoded_actions_coffee.mp4" +# # utils.save_video(outfile, video) + +# ## Test options ## + +# # Test MoveToTwistJug and TwistJug. +# MoveToTwistJug = option_name_to_option["MoveToTwistJug"] +# TwistJug = option_name_to_option["TwistJug"] +# option1 = MoveToTwistJug.ground([robot, jug], []) +# option2 = TwistJug.ground([robot, jug], np.array([1.0], dtype=np.float32)) +# option_plan = [option1, option2] +# policy = utils.option_plan_to_policy(option_plan) +# traj = utils.run_policy_with_simulator( +# policy, +# env.simulate, +# state, +# lambda _: False, +# max_num_steps=1000, +# exceptions_to_break_on={utils.OptionExecutionFailure}) +# expected_rot = env.jug_init_rot_ub +# assert abs(traj.states[-1].get(jug, "rot") - expected_rot) < 1e-6 + +# # Test PickJug. +# PickJug = option_name_to_option["PickJug"] +# option = PickJug.ground([robot, jug], []) +# option_plan = [option] + +# policy = utils.option_plan_to_policy(option_plan) +# traj = utils.run_policy_with_simulator( +# policy, +# env.simulate, +# state, +# lambda _: False, +# max_num_steps=1000, +# exceptions_to_break_on={utils.OptionExecutionFailure}) +# assert traj.states[-2].get(jug, "is_held") < 0.5 +# assert traj.states[-1].get(jug, "is_held") > 0.5 + +# # Test PlaceJugInMachine. +# PlaceJugInMachine = option_name_to_option["PlaceJugInMachine"] +# option = PlaceJugInMachine.ground([robot, jug, machine], []) +# option_plan.append(option) + +# policy = utils.option_plan_to_policy(option_plan) +# traj = utils.run_policy_with_simulator( +# policy, +# env.simulate, +# state, +# lambda _: False, +# max_num_steps=1000, +# exceptions_to_break_on={utils.OptionExecutionFailure}) +# assert traj.states[-2].get(jug, "is_held") > 0.5 +# assert traj.states[-1].get(jug, "is_held") < 0.5 +# assert GroundAtom(JugInMachine, [jug, machine]).holds(traj.states[-1]) + +# # Test TurnMachineOn. +# TurnMachineOn = option_name_to_option["TurnMachineOn"] +# option = TurnMachineOn.ground([robot, machine], []) +# option_plan.append(option) + +# policy = utils.option_plan_to_policy(option_plan) +# traj = utils.run_policy_with_simulator( +# policy, +# env.simulate, +# state, +# lambda _: False, +# max_num_steps=1000, +# exceptions_to_break_on={utils.OptionExecutionFailure}) +# assert traj.states[-2].get(machine, "is_on") < 0.5 +# assert traj.states[-1].get(machine, "is_on") > 0.5 + +# # Test PickJug from the dispense area. +# option = PickJug.ground([robot, jug], []) +# option_plan.append(option) + +# policy = utils.option_plan_to_policy(option_plan) +# traj = utils.run_policy_with_simulator( +# policy, +# env.simulate, +# state, +# lambda _: False, +# max_num_steps=1000, +# exceptions_to_break_on={utils.OptionExecutionFailure}) +# assert traj.states[-2].get(jug, "is_held") < 0.5 +# assert traj.states[-1].get(jug, "is_held") > 0.5 + +# # Test Pour into each of the cups. +# Pour = option_name_to_option["Pour"] +# for cup in cups: +# option = Pour.ground([robot, jug, cup], []) +# option_plan.append(option) +# policy = utils.option_plan_to_policy(option_plan) +# traj = utils.run_policy_with_simulator( +# policy, +# env.simulate, +# state, +# lambda _: False, +# max_num_steps=1000, +# exceptions_to_break_on={utils.OptionExecutionFailure}) +# assert not GroundAtom(CupFilled, [cup]).holds(traj.states[-3]) +# assert GroundAtom(CupFilled, [cup]).holds(traj.states[-1]) +# s = traj.states[-1] + +# # Uncomment for debugging. +# # policy = utils.option_plan_to_policy(option_plan) +# # monitor = utils.SimulateVideoMonitor(task, env.render_state) +# # traj = utils.run_policy_with_simulator( +# # policy, +# # env.simulate, +# # state, +# # lambda _: False, +# # max_num_steps=1000, +# # exceptions_to_break_on={utils.OptionExecutionFailure}, +# # monitor=monitor) +# # video = monitor.get_video() +# # outfile = "hardcoded_options_coffee.mp4" +# # utils.save_video(outfile, video) + + + + + + + + + """Test cases for the coffee environment.""" import numpy as np @@ -7,14 +412,14 @@ from predicators.structs import Action, GroundAtom -def test_coffee(): - """Tests for CoffeeEnv().""" +def test_coffeelid(): + """Tests for CoffeeLidEnv().""" utils.reset_config({ "env": "coffee", "coffee_num_cups_test": [4], # used to assure 4 cups in custom state "video_fps": 10, # for faster debugging videos }) - env = CoffeeEnv() + env = CoffeeLidEnv() for task in env.get_train_tasks(): for obj in task.init: assert len(obj.type.feature_names) == len(task.init[obj]) @@ -58,7 +463,7 @@ def test_coffee(): # Reposition the jug just so we know exactly where it is. state.set(jug, "x", env.jug_init_x_ub) state.set(jug, "y", env.jug_init_y_ub) - state.set(jug, "rot", 0.0) + state.set(jug, "rot", 2.0) env.render_state(state, task) ## Test simulate ## @@ -89,183 +494,186 @@ def _get_position_action_arrs(init_x, init_y, init_z, final_x, final_y, return action_arrs # Test twisting the jug. - target_x = state.get(jug, "x") - target_y = state.get(jug, "y") - target_z = env.jug_height - action_arrs = _get_position_action_arrs(state.get(robot, "x"), - state.get(robot, "y"), - state.get(robot, "z"), target_x, - target_y, target_z) - num_twists = 2 - twist_act_arr = np.array([0.0, 0.0, 0.0, 0.0, 1.0, 0.0], dtype=np.float32) - action_arrs.extend([twist_act_arr for _ in range(num_twists)]) - policy = utils.action_arrs_to_policy(action_arrs) - traj = utils.run_policy_with_simulator(policy, - env.simulate, - state, - lambda _: False, - max_num_steps=len(action_arrs)) - twist_amt = num_twists * env.max_angular_vel - assert abs(traj.states[-1].get(jug, "rot") - twist_amt) < 1e-6 - s = traj.states[-1] - - # The jug is too twisted now, so picking it up should fail. - target_x, target_y, target_z = env._get_jug_handle_grasp(s, jug) # pylint: disable=protected-access - move_action_arrs = _get_position_action_arrs(s.get(robot, "x"), - s.get(robot, "y"), - s.get(robot, "z"), target_x, - target_y, target_z) - action_arrs.extend(move_action_arrs) - pick_act_arr = np.array([0.0, 0.0, 0.0, 0.0, 0.0, -1.0], dtype=np.float32) - action_arrs.append(pick_act_arr) - - policy = utils.action_arrs_to_policy(action_arrs) - traj = utils.run_policy_with_simulator(policy, - env.simulate, - state, - lambda _: False, - max_num_steps=len(action_arrs)) - assert traj.states[-1].get(jug, "is_held") < 0.5 + # target_x = state.get(jug, "x") + # target_y = state.get(jug, "y") + # target_z = env.jug_height + # action_arrs = _get_position_action_arrs(state.get(robot, "x"), + # state.get(robot, "y"), + # state.get(robot, "z"), target_x, + # target_y, target_z) + # num_twists = 2 + # twist_act_arr = np.array([0.0, 0.0, 0.0, 0.0, 1.0, 0.0], dtype=np.float32) + # action_arrs.extend([twist_act_arr for _ in range(num_twists)]) + # policy = utils.action_arrs_to_policy(action_arrs) + # traj = utils.run_policy_with_simulator(policy, + # env.simulate, + # state, + # lambda _: False, + # max_num_steps=len(action_arrs)) + # twist_amt = num_twists * env.max_angular_vel + # try: + # assert abs(traj.states[-1].get(jug, "rot") - twist_amt) < 1e-6 + # except: + # import ipdb;ipdb.set_trace() + # s = traj.states[-1] + + # # The jug is too twisted now, so picking it up should fail. + # target_x, target_y, target_z = env._get_jug_handle_grasp(s, jug) # pylint: disable=protected-access + # move_action_arrs = _get_position_action_arrs(s.get(robot, "x"), + # s.get(robot, "y"), + # s.get(robot, "z"), target_x, + # target_y, target_z) + # action_arrs.extend(move_action_arrs) + # pick_act_arr = np.array([0.0, 0.0, 0.0, 0.0, 0.0, -1.0], dtype=np.float32) + # action_arrs.append(pick_act_arr) - # Test picking up the jug. - target_x, target_y, target_z = env._get_jug_handle_grasp(state, jug) # pylint: disable=protected-access - action_arrs = _get_position_action_arrs(state.get(robot, "x"), - state.get(robot, "y"), - state.get(robot, "z"), target_x, - target_y, target_z) - pick_act_arr = np.array([0.0, 0.0, 0.0, 0.0, 0.0, -1.0], dtype=np.float32) - action_arrs.append(pick_act_arr) - - policy = utils.action_arrs_to_policy(action_arrs) - traj = utils.run_policy_with_simulator(policy, - env.simulate, - state, - lambda _: False, - max_num_steps=len(action_arrs)) - assert traj.states[-2].get(jug, "is_held") < 0.5 - assert traj.states[-1].get(jug, "is_held") > 0.5 - assert GroundAtom(OnTable, [jug]).holds(traj.states[-2]) - assert not GroundAtom(OnTable, [jug]).holds(traj.states[-1]) - s = traj.states[-1] - - # Test moving and placing the jug at the machine. - # Offset based on the grasp. - target_x = env.dispense_area_x - (s.get(jug, "x") - s.get(robot, "x")) - target_y = env.dispense_area_y - (s.get(jug, "y") - s.get(robot, "y")) - target_z = s.get(robot, "z") - move_jug_act_arrs = _get_position_action_arrs(s.get(robot, "x"), - s.get(robot, "y"), - s.get(robot, "z"), target_x, - target_y, target_z) - action_arrs.extend(move_jug_act_arrs) - # Drop the jug. - place_act_arr = np.array([0.0, 0.0, 0.0, 0.0, 0.0, 1.0], dtype=np.float32) - action_arrs.append(place_act_arr) - - policy = utils.action_arrs_to_policy(action_arrs) - traj = utils.run_policy_with_simulator(policy, - env.simulate, - state, - lambda _: False, - max_num_steps=len(action_arrs)) - assert traj.states[-2].get(jug, "is_held") > 0.5 - assert traj.states[-1].get(jug, "is_held") < 0.5 - s = traj.states[-1] - - # Test pressing the machine button. - # First move to above the button. - move_to_above_button_act_arrs = _get_position_action_arrs( - s.get(robot, "x"), s.get(robot, "y"), s.get(robot, "z"), env.button_x, - env.button_y + 1.0, env.button_z) - action_arrs.extend(move_to_above_button_act_arrs) - # Move forward to press the button. - move_to_press_button_act_arrs = _get_position_action_arrs( - env.button_x, - env.button_y + 1.0, - env.button_z, - env.button_x, - env.button_y, - env.button_z, - ) - action_arrs.extend(move_to_press_button_act_arrs) - - policy = utils.action_arrs_to_policy(action_arrs) - traj = utils.run_policy_with_simulator(policy, - env.simulate, - state, - lambda _: False, - max_num_steps=len(action_arrs)) - assert traj.states[-2].get(machine, "is_on") < 0.5 - assert traj.states[-1].get(machine, "is_on") > 0.5 - # The jug should also now be filled. - assert traj.states[-2].get(jug, "is_filled") < 0.5 - assert traj.states[-1].get(jug, "is_filled") > 0.5 - s = traj.states[-1] - - # Test picking up the filled jug. - target_x, target_y, target_z = env._get_jug_handle_grasp(s, jug) # pylint: disable=protected-access - move_to_pick_act_arrs = _get_position_action_arrs(s.get(robot, "x"), - s.get(robot, "y"), - s.get(robot, - "z"), target_x, - target_y, target_z) - action_arrs.extend(move_to_pick_act_arrs) - pick_act_arr = np.array([0.0, 0.0, 0.0, 0.0, 0.0, -1.0], dtype=np.float32) - action_arrs.append(pick_act_arr) - - policy = utils.action_arrs_to_policy(action_arrs) - traj = utils.run_policy_with_simulator(policy, - env.simulate, - state, - lambda _: False, - max_num_steps=len(action_arrs)) - assert traj.states[-2].get(jug, "is_held") < 0.5 - assert traj.states[-1].get(jug, "is_held") > 0.5 - assert GroundAtom(NotAboveCup, [robot, jug]).holds(traj.states[-1]) - s = traj.states[-1] + # policy = utils.action_arrs_to_policy(action_arrs) + # traj = utils.run_policy_with_simulator(policy, + # env.simulate, + # state, + # lambda _: False, + # max_num_steps=len(action_arrs)) + # assert traj.states[-1].get(jug, "is_held") < 0.5 + + # # Test picking up the jug. + # target_x, target_y, target_z = env._get_jug_handle_grasp(state, jug) # pylint: disable=protected-access + # action_arrs = _get_position_action_arrs(state.get(robot, "x"), + # state.get(robot, "y"), + # state.get(robot, "z"), target_x, + # target_y, target_z) + # pick_act_arr = np.array([0.0, 0.0, 0.0, 0.0, 0.0, -1.0], dtype=np.float32) + # action_arrs.append(pick_act_arr) + + # policy = utils.action_arrs_to_policy(action_arrs) + # traj = utils.run_policy_with_simulator(policy, + # env.simulate, + # state, + # lambda _: False, + # max_num_steps=len(action_arrs)) + # assert traj.states[-2].get(jug, "is_held") < 0.5 + # assert traj.states[-1].get(jug, "is_held") > 0.5 + # assert GroundAtom(OnTable, [jug]).holds(traj.states[-2]) + # assert not GroundAtom(OnTable, [jug]).holds(traj.states[-1]) + # s = traj.states[-1] + + # # Test moving and placing the jug at the machine. + # # Offset based on the grasp. + # target_x = env.dispense_area_x - (s.get(jug, "x") - s.get(robot, "x")) + # target_y = env.dispense_area_y - (s.get(jug, "y") - s.get(robot, "y")) + # target_z = s.get(robot, "z") + # move_jug_act_arrs = _get_position_action_arrs(s.get(robot, "x"), + # s.get(robot, "y"), + # s.get(robot, "z"), target_x, + # target_y, target_z) + # action_arrs.extend(move_jug_act_arrs) + # # Drop the jug. + # place_act_arr = np.array([0.0, 0.0, 0.0, 0.0, 0.0, 1.0], dtype=np.float32) + # action_arrs.append(place_act_arr) - # Check for noop when pouring into nothing. - pour_act_lst = [0.0, 0.0, 0.0, 1.0, 0.0, 0.0] - pour_act_arr = np.array(pour_act_lst, dtype=np.float32) - next_s = env.simulate(s, Action(pour_act_arr)) - assert s.allclose(next_s) + # policy = utils.action_arrs_to_policy(action_arrs) + # traj = utils.run_policy_with_simulator(policy, + # env.simulate, + # state, + # lambda _: False, + # max_num_steps=len(action_arrs)) + # assert traj.states[-2].get(jug, "is_held") > 0.5 + # assert traj.states[-1].get(jug, "is_held") < 0.5 + # s = traj.states[-1] + + # # Test pressing the machine button. + # # First move to above the button. + # move_to_above_button_act_arrs = _get_position_action_arrs( + # s.get(robot, "x"), s.get(robot, "y"), s.get(robot, "z"), env.button_x, + # env.button_y + 1.0, env.button_z) + # action_arrs.extend(move_to_above_button_act_arrs) + # # Move forward to press the button. + # move_to_press_button_act_arrs = _get_position_action_arrs( + # env.button_x, + # env.button_y + 1.0, + # env.button_z, + # env.button_x, + # env.button_y, + # env.button_z, + # ) + # action_arrs.extend(move_to_press_button_act_arrs) - # Test pouring in each of the cups. - for cup in cups: - jug_target_x, jug_target_y, jug_target_z = env._get_pour_position( # pylint: disable=protected-access - s, cup) - target_x = jug_target_x - (s.get(jug, "x") - s.get(robot, "x")) - target_y = jug_target_y - (s.get(jug, "y") - s.get(robot, "y")) - target_z = jug_target_z + env.jug_handle_height - move_to_pour_act_arrs = _get_position_action_arrs( - s.get(robot, "x"), s.get(robot, "y"), s.get(robot, "z"), target_x, - target_y, target_z) - action_arrs.extend(move_to_pour_act_arrs) - target_liquid = state.get(cup, "target_liquid") - num_pour_steps = int(np.ceil(target_liquid / env.pour_velocity)) - # Start pouring. - action_arrs.append(pour_act_arr) - # Keep pouring. - action_arrs.extend([pour_act_arr for _ in range(num_pour_steps - 1)]) - # Stop pouring. - action_arrs.append(-1 * pour_act_arr) - - policy = utils.action_arrs_to_policy(action_arrs) - traj = utils.run_policy_with_simulator(policy, - env.simulate, - state, - lambda _: False, - max_num_steps=len(action_arrs)) - assert not GroundAtom(CupFilled, [cup]).holds(traj.states[-3]) - assert GroundAtom(CupFilled, [cup]).holds(traj.states[-1]) - assert not GroundAtom(NotAboveCup, [robot, jug]).holds(traj.states[-1]) - s = traj.states[-1] - # Render a state where we are in the process of pouring. - env.render_state(traj.states[-2], task) + # policy = utils.action_arrs_to_policy(action_arrs) + # traj = utils.run_policy_with_simulator(policy, + # env.simulate, + # state, + # lambda _: False, + # max_num_steps=len(action_arrs)) + # assert traj.states[-2].get(machine, "is_on") < 0.5 + # assert traj.states[-1].get(machine, "is_on") > 0.5 + # # The jug should also now be filled. + # assert traj.states[-2].get(jug, "is_filled") < 0.5 + # assert traj.states[-1].get(jug, "is_filled") > 0.5 + # s = traj.states[-1] + + # # Test picking up the filled jug. + # target_x, target_y, target_z = env._get_jug_handle_grasp(s, jug) # pylint: disable=protected-access + # move_to_pick_act_arrs = _get_position_action_arrs(s.get(robot, "x"), + # s.get(robot, "y"), + # s.get(robot, + # "z"), target_x, + # target_y, target_z) + # action_arrs.extend(move_to_pick_act_arrs) + # pick_act_arr = np.array([0.0, 0.0, 0.0, 0.0, 0.0, -1.0], dtype=np.float32) + # action_arrs.append(pick_act_arr) - # Check for noop when overfilling a cup. - next_s = env.simulate(s, Action(pour_act_arr)) - assert s.allclose(next_s) + # policy = utils.action_arrs_to_policy(action_arrs) + # traj = utils.run_policy_with_simulator(policy, + # env.simulate, + # state, + # lambda _: False, + # max_num_steps=len(action_arrs)) + # assert traj.states[-2].get(jug, "is_held") < 0.5 + # assert traj.states[-1].get(jug, "is_held") > 0.5 + # assert GroundAtom(NotAboveCup, [robot, jug]).holds(traj.states[-1]) + # s = traj.states[-1] + + # # Check for noop when pouring into nothing. + # pour_act_lst = [0.0, 0.0, 0.0, 1.0, 0.0, 0.0] + # pour_act_arr = np.array(pour_act_lst, dtype=np.float32) + # next_s = env.simulate(s, Action(pour_act_arr)) + # assert s.allclose(next_s) + + # # Test pouring in each of the cups. + # for cup in cups: + # jug_target_x, jug_target_y, jug_target_z = env._get_pour_position( # pylint: disable=protected-access + # s, cup) + # target_x = jug_target_x - (s.get(jug, "x") - s.get(robot, "x")) + # target_y = jug_target_y - (s.get(jug, "y") - s.get(robot, "y")) + # target_z = jug_target_z + env.jug_handle_height + # move_to_pour_act_arrs = _get_position_action_arrs( + # s.get(robot, "x"), s.get(robot, "y"), s.get(robot, "z"), target_x, + # target_y, target_z) + # action_arrs.extend(move_to_pour_act_arrs) + # target_liquid = state.get(cup, "target_liquid") + # num_pour_steps = int(np.ceil(target_liquid / env.pour_velocity)) + # # Start pouring. + # action_arrs.append(pour_act_arr) + # # Keep pouring. + # action_arrs.extend([pour_act_arr for _ in range(num_pour_steps - 1)]) + # # Stop pouring. + # action_arrs.append(-1 * pour_act_arr) + + # policy = utils.action_arrs_to_policy(action_arrs) + # traj = utils.run_policy_with_simulator(policy, + # env.simulate, + # state, + # lambda _: False, + # max_num_steps=len(action_arrs)) + # assert not GroundAtom(CupFilled, [cup]).holds(traj.states[-3]) + # assert GroundAtom(CupFilled, [cup]).holds(traj.states[-1]) + # assert not GroundAtom(NotAboveCup, [robot, jug]).holds(traj.states[-1]) + # s = traj.states[-1] + # # Render a state where we are in the process of pouring. + # env.render_state(traj.states[-2], task) + + # # Check for noop when overfilling a cup. + # next_s = env.simulate(s, Action(pour_act_arr)) + # assert s.allclose(next_s) # Uncomment for debugging. # policy = utils.action_arrs_to_policy(action_arrs) @@ -284,6 +692,7 @@ def _get_position_action_arrs(init_x, init_y, init_z, final_x, final_y, # Test MoveToTwistJug and TwistJug. MoveToTwistJug = option_name_to_option["MoveToTwistJug"] + state.set(jug, "rot", 2.0) TwistJug = option_name_to_option["TwistJug"] option1 = MoveToTwistJug.ground([robot, jug], []) option2 = TwistJug.ground([robot, jug], np.array([1.0], dtype=np.float32)) @@ -297,8 +706,12 @@ def _get_position_action_arrs(init_x, init_y, init_z, final_x, final_y, max_num_steps=1000, exceptions_to_break_on={utils.OptionExecutionFailure}) expected_rot = env.jug_init_rot_ub - assert abs(traj.states[-1].get(jug, "rot") - expected_rot) < 1e-6 + try: + assert abs(traj.states[-1].get(jug, "rot")) < np.pi/3 + except: + import ipdb;ipdb.set_trace() + state.set(jug, "rot", 0.0) # Test PickJug. PickJug = option_name_to_option["PickJug"] option = PickJug.ground([robot, jug], []) @@ -312,8 +725,12 @@ def _get_position_action_arrs(init_x, init_y, init_z, final_x, final_y, lambda _: False, max_num_steps=1000, exceptions_to_break_on={utils.OptionExecutionFailure}) - assert traj.states[-2].get(jug, "is_held") < 0.5 - assert traj.states[-1].get(jug, "is_held") > 0.5 + + try: + assert traj.states[-2].get(jug, "is_held") < 0.5 + assert traj.states[-1].get(jug, "is_held") > 0.5 + except: + import ipdb;ipdb.set_trace() # Test PlaceJugInMachine. PlaceJugInMachine = option_name_to_option["PlaceJugInMachine"] @@ -381,16 +798,16 @@ def _get_position_action_arrs(init_x, init_y, init_z, final_x, final_y, s = traj.states[-1] # Uncomment for debugging. - # policy = utils.option_plan_to_policy(option_plan) - # monitor = utils.SimulateVideoMonitor(task, env.render_state) - # traj = utils.run_policy_with_simulator( - # policy, - # env.simulate, - # state, - # lambda _: False, - # max_num_steps=1000, - # exceptions_to_break_on={utils.OptionExecutionFailure}, - # monitor=monitor) - # video = monitor.get_video() - # outfile = "hardcoded_options_coffee.mp4" - # utils.save_video(outfile, video) + policy = utils.option_plan_to_policy(option_plan) + monitor = utils.SimulateVideoMonitor(task, env.render_state) + traj = utils.run_policy_with_simulator( + policy, + env.simulate, + state, + lambda _: False, + max_num_steps=1000, + exceptions_to_break_on={utils.OptionExecutionFailure}, + monitor=monitor) + video = monitor.get_video() + outfile = "hardcoded_options_coffee.mp4" + utils.save_video(outfile, video)