From 9ac61885955c26dd7d53352eaf96fb7095159ffd Mon Sep 17 00:00:00 2001
From: NishanthJKumar <nishanth.kumar20@gmail.com>
Date: Wed, 20 Nov 2024 21:39:44 -0500
Subject: [PATCH 01/12] start hacking on interpret

---
 scripts/cluster_utils.py                |   1 +
 scripts/configs/pred_invention_vlm.yaml | 270 ++++++++++++------------
 2 files changed, 136 insertions(+), 135 deletions(-)

diff --git a/scripts/cluster_utils.py b/scripts/cluster_utils.py
index 94f1ece871..e765c9c8fa 100644
--- a/scripts/cluster_utils.py
+++ b/scripts/cluster_utils.py
@@ -145,6 +145,7 @@ def generate_run_configs(config_filename: str,
 def get_cmds_to_prep_repo(branch: str) -> List[str]:
     """Get the commands that should be run while already in the repository but
     before launching the experiments."""
+    return []
     old_dir_pattern = " ".join(f"{d}/" for d in SAVE_DIRS)
     return [
         "git stash",
diff --git a/scripts/configs/pred_invention_vlm.yaml b/scripts/configs/pred_invention_vlm.yaml
index cae3e30911..da625a1063 100644
--- a/scripts/configs/pred_invention_vlm.yaml
+++ b/scripts/configs/pred_invention_vlm.yaml
@@ -1,148 +1,148 @@
 # Experiments to test predicate invention with VLMs
 ---
 APPROACHES:
-  ours:
-    NAME: "grammar_search_invention"
-    FLAGS:
-      grammar_search_vlm_atom_proposal_prompt_type: options_labels_whole_traj_diverse
-      offline_data_method: geo_and_demo_with_vlm_imgs
-      grammar_search_invent_geo_predicates_only: False
-  ours-vlm-subselection:
-    NAME: "grammar_search_invention"
-    FLAGS:
-      grammar_search_vlm_atom_proposal_prompt_type: options_labels_whole_traj_specific
-      offline_data_method: geo_and_demo_with_vlm_imgs
-      grammar_search_invent_geo_predicates_only: True
-  ours-no-subselection:
-    NAME: "grammar_search_invention"
-    FLAGS:
-      grammar_search_vlm_atom_proposal_prompt_type: options_labels_whole_traj_diverse
-      offline_data_method: geo_and_demo_with_vlm_imgs
-      grammar_search_pred_selection_approach: no_select
-      grammar_search_invent_geo_predicates_only: False
-  ours-no-invent:
-    NAME: "nsrt_learning"
-    FLAGS: {}
-  ours-no-visual:
-    NAME: "grammar_search_invention"
-    FLAGS: 
-      offline_data_method: demo
-  ours-no-geo:
-    NAME: "grammar_search_invention"
-    FLAGS:
-      grammar_search_vlm_atom_proposal_prompt_type: demo_with_vlm_imgs
-      grammar_search_invent_geo_predicates_only: False
+  # ours:
+  #   NAME: "grammar_search_invention"
+  #   FLAGS:
+  #     grammar_search_vlm_atom_proposal_prompt_type: options_labels_whole_traj_diverse
+  #     offline_data_method: geo_and_demo_with_vlm_imgs
+  #     grammar_search_invent_geo_predicates_only: False
+  # ours-vlm-subselection:
+  #   NAME: "grammar_search_invention"
+  #   FLAGS:
+  #     grammar_search_vlm_atom_proposal_prompt_type: options_labels_whole_traj_specific
+  #     offline_data_method: geo_and_demo_with_vlm_imgs
+  #     grammar_search_invent_geo_predicates_only: True
+  # ours-no-subselection:
+  #   NAME: "grammar_search_invention"
+  #   FLAGS:
+  #     grammar_search_vlm_atom_proposal_prompt_type: options_labels_whole_traj_diverse
+  #     offline_data_method: geo_and_demo_with_vlm_imgs
+  #     grammar_search_pred_selection_approach: no_select
+  #     grammar_search_invent_geo_predicates_only: False
+  # ours-no-invent:
+  #   NAME: "nsrt_learning"
+  #   FLAGS: {}
+  # ours-no-visual:
+  #   NAME: "grammar_search_invention"
+  #   FLAGS: 
+  #     offline_data_method: demo
+  # ours-no-geo:
+  #   NAME: "grammar_search_invention"
+  #   FLAGS:
+  #     grammar_search_vlm_atom_proposal_prompt_type: demo_with_vlm_imgs
+  #     grammar_search_invent_geo_predicates_only: False
   interpret:
     NAME: "grammar_search_invention"
     FLAGS:
       offline_data_method: demo_with_vlm_imgs
       vlm_predicate_vision_api_generate_ground_atoms: True
-  vila-with-fewshot:
-    NAME: "vlm_open_loop"
-    FLAGS:
-      vlm_open_loop_use_training_demos: True
-  vila-pure:
-    NAME: "vlm_open_loop"
-    FLAGS:
-      vlm_open_loop_use_training_demos: False
+  # vila-with-fewshot:
+  #   NAME: "vlm_open_loop"
+  #   FLAGS:
+  #     vlm_open_loop_use_training_demos: True
+  # vila-pure:
+  #   NAME: "vlm_open_loop"
+  #   FLAGS:
+  #     vlm_open_loop_use_training_demos: False
 
 ENVS:
-  burger_no_move_more_stacks:
-    NAME: "burger_no_move"
-    FLAGS:
-      burger_no_move_task_type: "more_stacks"
-      bilevel_plan_without_sim: True
-      segmenter: option_changes
-      grammar_search_vlm_atom_label_prompt_type: img_option_diffs_label_history_burger
-      grammar_search_task_planning_timeout: 10.0
-      sesame_max_skeletons_optimized: 200
-      disable_harmlessness_check: True
-      sesame_task_planner: fdopt
-      excluded_predicates: all
-      option_model_terminate_on_repeat: False
-      grammar_search_vlm_atom_proposal_use_debug: False
-      allow_exclude_goal_predicates: True
-      grammar_search_prune_redundant_preds: True
-      grammar_search_predicate_cost_upper_bound: 13
-      allow_state_allclose_comparison_despite_simulator_state: True
-      grammar_search_max_predicates: 100
-      grammar_search_parallelize_vlm_labeling: True
-      grammar_search_use_handcoded_debug_grammar: False
-      grammar_search_select_all_debug: False
-      cluster_and_intersect_soft_intersection_for_preconditions: True
-      vlm_include_cropped_images: True
-      timeout: 80
-      grammar_search_grammar_includes_givens: False
-      cluster_and_intersect_prune_low_data_pnads: True
-      cluster_and_intersect_min_datastore_fraction: 0.05
-      num_train_tasks: 12 # 8 for VILA
-      precondition_soft_intersection_threshold_percent: 0.8
-      grammar_search_early_termination_heuristic_thresh: 2000
-      vlm_double_check_output: True
-  burger_no_move_fatter_burger:
-    NAME: "burger_no_move"
-    FLAGS:
-      burger_no_move_task_type: "fatter_burger"
-      bilevel_plan_without_sim: True
-      segmenter: option_changes
-      grammar_search_vlm_atom_label_prompt_type: img_option_diffs_label_history_burger
-      grammar_search_task_planning_timeout: 10.0
-      sesame_max_skeletons_optimized: 200
-      disable_harmlessness_check: True
-      sesame_task_planner: fdopt
-      excluded_predicates: all
-      option_model_terminate_on_repeat: False
-      grammar_search_vlm_atom_proposal_use_debug: False
-      allow_exclude_goal_predicates: True
-      grammar_search_prune_redundant_preds: True
-      grammar_search_predicate_cost_upper_bound: 13
-      allow_state_allclose_comparison_despite_simulator_state: True
-      grammar_search_max_predicates: 100
-      grammar_search_parallelize_vlm_labeling: True
-      grammar_search_use_handcoded_debug_grammar: False
-      grammar_search_select_all_debug: False
-      cluster_and_intersect_soft_intersection_for_preconditions: True
-      vlm_include_cropped_images: True
-      timeout: 80
-      grammar_search_grammar_includes_givens: False
-      cluster_and_intersect_prune_low_data_pnads: True
-      cluster_and_intersect_min_datastore_fraction: 0.05
-      num_train_tasks: 12
-      precondition_soft_intersection_threshold_percent: 0.8
-      grammar_search_early_termination_heuristic_thresh: 2000
-      vlm_double_check_output: True
-  burger_no_move_combo_burger:
-    NAME: "burger_no_move"
-    FLAGS:
-      burger_no_move_task_type: "combo_burger"
-      bilevel_plan_without_sim: True
-      segmenter: option_changes
-      grammar_search_vlm_atom_label_prompt_type: img_option_diffs_label_history_burger
-      grammar_search_task_planning_timeout: 10.0
-      sesame_max_skeletons_optimized: 200
-      disable_harmlessness_check: True
-      sesame_task_planner: fdopt
-      excluded_predicates: all
-      option_model_terminate_on_repeat: False
-      grammar_search_vlm_atom_proposal_use_debug: False
-      allow_exclude_goal_predicates: True
-      grammar_search_prune_redundant_preds: True
-      grammar_search_predicate_cost_upper_bound: 13
-      allow_state_allclose_comparison_despite_simulator_state: True
-      grammar_search_max_predicates: 100
-      grammar_search_parallelize_vlm_labeling: True
-      grammar_search_use_handcoded_debug_grammar: False
-      grammar_search_select_all_debug: False
-      cluster_and_intersect_soft_intersection_for_preconditions: True
-      vlm_include_cropped_images: True
-      timeout: 80
-      grammar_search_grammar_includes_givens: False
-      cluster_and_intersect_prune_low_data_pnads: True
-      cluster_and_intersect_min_datastore_fraction: 0.05
-      num_train_tasks: 12
-      precondition_soft_intersection_threshold_percent: 0.8
-      grammar_search_early_termination_heuristic_thresh: 2000
-      vlm_double_check_output: True
+  # burger_no_move_more_stacks:
+  #   NAME: "burger_no_move"
+  #   FLAGS:
+  #     burger_no_move_task_type: "more_stacks"
+  #     bilevel_plan_without_sim: True
+  #     segmenter: option_changes
+  #     grammar_search_vlm_atom_label_prompt_type: img_option_diffs_label_history_burger
+  #     grammar_search_task_planning_timeout: 10.0
+  #     sesame_max_skeletons_optimized: 200
+  #     disable_harmlessness_check: True
+  #     sesame_task_planner: fdopt
+  #     excluded_predicates: all
+  #     option_model_terminate_on_repeat: False
+  #     grammar_search_vlm_atom_proposal_use_debug: False
+  #     allow_exclude_goal_predicates: True
+  #     grammar_search_prune_redundant_preds: True
+  #     grammar_search_predicate_cost_upper_bound: 13
+  #     allow_state_allclose_comparison_despite_simulator_state: True
+  #     grammar_search_max_predicates: 100
+  #     grammar_search_parallelize_vlm_labeling: True
+  #     grammar_search_use_handcoded_debug_grammar: False
+  #     grammar_search_select_all_debug: False
+  #     cluster_and_intersect_soft_intersection_for_preconditions: True
+  #     vlm_include_cropped_images: True
+  #     timeout: 80
+  #     grammar_search_grammar_includes_givens: False
+  #     cluster_and_intersect_prune_low_data_pnads: True
+  #     cluster_and_intersect_min_datastore_fraction: 0.05
+  #     num_train_tasks: 12 # 8 for VILA
+  #     precondition_soft_intersection_threshold_percent: 0.8
+  #     grammar_search_early_termination_heuristic_thresh: 2000
+  #     vlm_double_check_output: True
+  # burger_no_move_fatter_burger:
+  #   NAME: "burger_no_move"
+  #   FLAGS:
+  #     burger_no_move_task_type: "fatter_burger"
+  #     bilevel_plan_without_sim: True
+  #     segmenter: option_changes
+  #     grammar_search_vlm_atom_label_prompt_type: img_option_diffs_label_history_burger
+  #     grammar_search_task_planning_timeout: 10.0
+  #     sesame_max_skeletons_optimized: 200
+  #     disable_harmlessness_check: True
+  #     sesame_task_planner: fdopt
+  #     excluded_predicates: all
+  #     option_model_terminate_on_repeat: False
+  #     grammar_search_vlm_atom_proposal_use_debug: False
+  #     allow_exclude_goal_predicates: True
+  #     grammar_search_prune_redundant_preds: True
+  #     grammar_search_predicate_cost_upper_bound: 13
+  #     allow_state_allclose_comparison_despite_simulator_state: True
+  #     grammar_search_max_predicates: 100
+  #     grammar_search_parallelize_vlm_labeling: True
+  #     grammar_search_use_handcoded_debug_grammar: False
+  #     grammar_search_select_all_debug: False
+  #     cluster_and_intersect_soft_intersection_for_preconditions: True
+  #     vlm_include_cropped_images: True
+  #     timeout: 80
+  #     grammar_search_grammar_includes_givens: False
+  #     cluster_and_intersect_prune_low_data_pnads: True
+  #     cluster_and_intersect_min_datastore_fraction: 0.05
+  #     num_train_tasks: 12
+  #     precondition_soft_intersection_threshold_percent: 0.8
+  #     grammar_search_early_termination_heuristic_thresh: 2000
+  #     vlm_double_check_output: True
+  # burger_no_move_combo_burger:
+  #   NAME: "burger_no_move"
+  #   FLAGS:
+  #     burger_no_move_task_type: "combo_burger"
+  #     bilevel_plan_without_sim: True
+  #     segmenter: option_changes
+  #     grammar_search_vlm_atom_label_prompt_type: img_option_diffs_label_history_burger
+  #     grammar_search_task_planning_timeout: 10.0
+  #     sesame_max_skeletons_optimized: 200
+  #     disable_harmlessness_check: True
+  #     sesame_task_planner: fdopt
+  #     excluded_predicates: all
+  #     option_model_terminate_on_repeat: False
+  #     grammar_search_vlm_atom_proposal_use_debug: False
+  #     allow_exclude_goal_predicates: True
+  #     grammar_search_prune_redundant_preds: True
+  #     grammar_search_predicate_cost_upper_bound: 13
+  #     allow_state_allclose_comparison_despite_simulator_state: True
+  #     grammar_search_max_predicates: 100
+  #     grammar_search_parallelize_vlm_labeling: True
+  #     grammar_search_use_handcoded_debug_grammar: False
+  #     grammar_search_select_all_debug: False
+  #     cluster_and_intersect_soft_intersection_for_preconditions: True
+  #     vlm_include_cropped_images: True
+  #     timeout: 80
+  #     grammar_search_grammar_includes_givens: False
+  #     cluster_and_intersect_prune_low_data_pnads: True
+  #     cluster_and_intersect_min_datastore_fraction: 0.05
+  #     num_train_tasks: 12
+  #     precondition_soft_intersection_threshold_percent: 0.8
+  #     grammar_search_early_termination_heuristic_thresh: 2000
+  #     vlm_double_check_output: True
   kitchen_boil_kettle:
     NAME: "kitchen"
     FLAGS:

From 0d929747fd587ad581bc6058e5748d0c5eceba13 Mon Sep 17 00:00:00 2001
From: NishanthJKumar <nishanth.kumar20@gmail.com>
Date: Wed, 20 Nov 2024 21:40:19 -0500
Subject: [PATCH 02/12] change yaml

---
 scripts/configs/pred_invention_vlm.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/configs/pred_invention_vlm.yaml b/scripts/configs/pred_invention_vlm.yaml
index da625a1063..f0b84ee832 100644
--- a/scripts/configs/pred_invention_vlm.yaml
+++ b/scripts/configs/pred_invention_vlm.yaml
@@ -182,5 +182,5 @@ FLAGS:
   num_test_tasks: 10
   save_eval_trajs: False
 START_SEED: 0
-NUM_SEEDS: 5
+NUM_SEEDS: 1
 ...

From aef0db0978d3863dc2c662ae55766b1d29661ecf Mon Sep 17 00:00:00 2001
From: NishanthJKumar <nishanth.kumar20@gmail.com>
Date: Wed, 20 Nov 2024 22:03:51 -0500
Subject: [PATCH 03/12] hmmmmm - really not sure why interpret is failling...

---
 predicators/datasets/generate_atom_trajs_with_vlm.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/predicators/datasets/generate_atom_trajs_with_vlm.py b/predicators/datasets/generate_atom_trajs_with_vlm.py
index f7e62d5b12..4056efd7eb 100644
--- a/predicators/datasets/generate_atom_trajs_with_vlm.py
+++ b/predicators/datasets/generate_atom_trajs_with_vlm.py
@@ -831,6 +831,7 @@ def _generate_ground_atoms_with_vlm_oo_code_gen(
             ground_atoms = utils.abstract(state, candidates | known_predicates)
             ground_atoms_traj.append(ground_atoms)
         ground_atoms_trajs.append(ground_atoms_traj)
+    import ipdb; ipdb.set_trace()
     return ground_atoms_trajs
 
 

From 360594a1df2ac8e2f946f62351c34e64c465fd1c Mon Sep 17 00:00:00 2001
From: NishanthJKumar <nishanth.kumar20@gmail.com>
Date: Thu, 21 Nov 2024 10:30:24 -0500
Subject: [PATCH 04/12] found bug with interpret and fixed it!

---
 predicators/datasets/generate_atom_trajs_with_vlm.py | 11 ++++++++---
 predicators/structs.py                               |  4 ++--
 setup.py                                             |  2 ++
 3 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/predicators/datasets/generate_atom_trajs_with_vlm.py b/predicators/datasets/generate_atom_trajs_with_vlm.py
index 4056efd7eb..ee9e31b1fc 100644
--- a/predicators/datasets/generate_atom_trajs_with_vlm.py
+++ b/predicators/datasets/generate_atom_trajs_with_vlm.py
@@ -831,7 +831,6 @@ def _generate_ground_atoms_with_vlm_oo_code_gen(
             ground_atoms = utils.abstract(state, candidates | known_predicates)
             ground_atoms_traj.append(ground_atoms)
         ground_atoms_trajs.append(ground_atoms_traj)
-    import ipdb; ipdb.set_trace()
     return ground_atoms_trajs
 
 
@@ -870,12 +869,18 @@ def _create_prompt_from_image_option_traj(
     for i, a in enumerate(image_option_traj.actions):
         state = image_option_traj.states[i]
         demo_str.append(f"state {i}:")
-        demo_str.append(state.dict_str(indent=2, object_features=True))
+        # NOTE: it's important to set the round_feat_vals argument to False
+        # here. If we set it to True, then the VLM might mistakenly propose
+        # predicates that work given rounding, but fail otherwise.
+        # So for instance, a predicate classifier that does `== 0` would
+        # work for a value 0.00123 rounded to a single decimal place,
+        # but wouldn't actually work when deployed on the number 0.00123!
+        demo_str.append(state.dict_str(indent=2, object_features=True, round_feat_vals=False))
         demo_str.append(f"action {i}: {a.name}")
     num_states = len(image_option_traj.states)
     state = image_option_traj.states[-1]
     demo_str.append(f"state {num_states}:")
-    demo_str.append(state.dict_str(indent=2, object_features=True))
+    demo_str.append(state.dict_str(indent=2, object_features=True, round_feat_vals=False))
     demo_str_ = '\n'.join(demo_str)
     template = template.replace("[DEMO_TRAJECTORY]", demo_str_)
 
diff --git a/predicators/structs.py b/predicators/structs.py
index 18abbe0834..ad55b6c3a1 100644
--- a/predicators/structs.py
+++ b/predicators/structs.py
@@ -215,14 +215,14 @@ def pretty_str(self) -> str:
         suffix = "\n" + "#" * ll + "\n"
         return prefix + "\n\n".join(table_strs) + suffix
 
-    def dict_str(self, indent: int = 0, object_features: bool = True) -> str:
+    def dict_str(self, indent: int = 0, object_features: bool = True, round_feat_vals: bool = True) -> str:
         """Return a dictionary representation of the state."""
         state_dict = {}
         for obj in self:
             obj_dict = {}
             if obj.type.name == "robot" or object_features:
                 for attribute, value in zip(obj.type.feature_names, self[obj]):
-                    if isinstance(value, (float, int, np.float32)):
+                    if isinstance(value, (float, int, np.float32)) and round_feat_vals:
                         value = round(float(value), 1)
                     obj_dict[attribute] = value
             obj_name = obj.name
diff --git a/setup.py b/setup.py
index fd83c01e8c..39af59aa2b 100644
--- a/setup.py
+++ b/setup.py
@@ -38,6 +38,8 @@
         "ImageHash",
         "google-generativeai",
         "tenacity",
+        "opencv-python",
+        "torchvision"
     ],
     include_package_data=True,
     extras_require={

From 0d4630e435e79a888a5bc38629c9b941363ca5ef Mon Sep 17 00:00:00 2001
From: Nishanth Kumar <nishanth.kumar20@gmail.com>
Date: Thu, 21 Nov 2024 11:19:31 -0500
Subject: [PATCH 05/12] update to run kitchen first

---
 scripts/configs/pred_invention_vlm.yaml | 66 ++++++++++++-------------
 1 file changed, 33 insertions(+), 33 deletions(-)

diff --git a/scripts/configs/pred_invention_vlm.yaml b/scripts/configs/pred_invention_vlm.yaml
index f0b84ee832..bb43a528dc 100644
--- a/scripts/configs/pred_invention_vlm.yaml
+++ b/scripts/configs/pred_invention_vlm.yaml
@@ -47,6 +47,38 @@ APPROACHES:
   #     vlm_open_loop_use_training_demos: False
 
 ENVS:
+  kitchen_boil_kettle:
+    NAME: "kitchen"
+    FLAGS:
+      perceiver: "kitchen"
+      kitchen_goals: "boil_kettle"
+      kitchen_use_perfect_samplers: True
+      kitchen_render_set_of_marks: True
+      kitchen_use_combo_move_nsrts: True
+      kitchen_randomize_init_state: True
+      bilevel_plan_without_sim: True
+      segmenter: option_changes
+      grammar_search_vlm_atom_label_prompt_type: img_option_diffs_label_history
+      grammar_search_task_planning_timeout: 3.0
+      sesame_max_skeletons_optimized: 5
+      disable_harmlessness_check: True
+      excluded_predicates: all
+      grammar_search_vlm_atom_proposal_use_debug: False
+      grammar_search_prune_redundant_preds: True
+      grammar_search_predicate_cost_upper_bound: 13
+      allow_state_allclose_comparison_despite_simulator_state: True
+      grammar_search_max_predicates: 100
+      grammar_search_parallelize_vlm_labeling: True
+      grammar_search_use_handcoded_debug_grammar: False
+      grammar_search_select_all_debug: False
+      cluster_and_intersect_soft_intersection_for_preconditions: True
+      grammar_search_grammar_includes_givens: False
+      cluster_and_intersect_prune_low_data_pnads: True
+      cluster_and_intersect_min_datastore_fraction: 0.05
+      num_train_tasks: 3
+      precondition_soft_intersection_threshold_percent: 0.8
+      vlm_double_check_output: True
+      grammar_search_early_termination_heuristic_thresh: 100
   # burger_no_move_more_stacks:
   #   NAME: "burger_no_move"
   #   FLAGS:
@@ -143,38 +175,6 @@ ENVS:
   #     precondition_soft_intersection_threshold_percent: 0.8
   #     grammar_search_early_termination_heuristic_thresh: 2000
   #     vlm_double_check_output: True
-  kitchen_boil_kettle:
-    NAME: "kitchen"
-    FLAGS:
-      perceiver: "kitchen"
-      kitchen_goals: "boil_kettle"
-      kitchen_use_perfect_samplers: True
-      kitchen_render_set_of_marks: True
-      kitchen_use_combo_move_nsrts: True
-      kitchen_randomize_init_state: True
-      bilevel_plan_without_sim: True
-      segmenter: option_changes
-      grammar_search_vlm_atom_label_prompt_type: img_option_diffs_label_history
-      grammar_search_task_planning_timeout: 3.0
-      sesame_max_skeletons_optimized: 5
-      disable_harmlessness_check: True
-      excluded_predicates: all
-      grammar_search_vlm_atom_proposal_use_debug: False
-      grammar_search_prune_redundant_preds: True
-      grammar_search_predicate_cost_upper_bound: 13
-      allow_state_allclose_comparison_despite_simulator_state: True
-      grammar_search_max_predicates: 100
-      grammar_search_parallelize_vlm_labeling: True
-      grammar_search_use_handcoded_debug_grammar: False
-      grammar_search_select_all_debug: False
-      cluster_and_intersect_soft_intersection_for_preconditions: True
-      grammar_search_grammar_includes_givens: False
-      cluster_and_intersect_prune_low_data_pnads: True
-      cluster_and_intersect_min_datastore_fraction: 0.05
-      num_train_tasks: 3
-      precondition_soft_intersection_threshold_percent: 0.8
-      vlm_double_check_output: True
-      grammar_search_early_termination_heuristic_thresh: 100
 
 ARGS: []
 FLAGS: 
@@ -182,5 +182,5 @@ FLAGS:
   num_test_tasks: 10
   save_eval_trajs: False
 START_SEED: 0
-NUM_SEEDS: 1
+NUM_SEEDS: 5
 ...

From 550c51bfb46ba5cec48b5586edf43c0bc87729b9 Mon Sep 17 00:00:00 2001
From: Nishanth Kumar <nishanth.kumar20@gmail.com>
Date: Thu, 21 Nov 2024 11:23:37 -0500
Subject: [PATCH 06/12] more stacks

---
 scripts/configs/pred_invention_vlm.yaml | 90 ++++++++++++-------------
 1 file changed, 45 insertions(+), 45 deletions(-)

diff --git a/scripts/configs/pred_invention_vlm.yaml b/scripts/configs/pred_invention_vlm.yaml
index bb43a528dc..3993c971f4 100644
--- a/scripts/configs/pred_invention_vlm.yaml
+++ b/scripts/configs/pred_invention_vlm.yaml
@@ -47,53 +47,23 @@ APPROACHES:
   #     vlm_open_loop_use_training_demos: False
 
 ENVS:
-  kitchen_boil_kettle:
-    NAME: "kitchen"
-    FLAGS:
-      perceiver: "kitchen"
-      kitchen_goals: "boil_kettle"
-      kitchen_use_perfect_samplers: True
-      kitchen_render_set_of_marks: True
-      kitchen_use_combo_move_nsrts: True
-      kitchen_randomize_init_state: True
-      bilevel_plan_without_sim: True
-      segmenter: option_changes
-      grammar_search_vlm_atom_label_prompt_type: img_option_diffs_label_history
-      grammar_search_task_planning_timeout: 3.0
-      sesame_max_skeletons_optimized: 5
-      disable_harmlessness_check: True
-      excluded_predicates: all
-      grammar_search_vlm_atom_proposal_use_debug: False
-      grammar_search_prune_redundant_preds: True
-      grammar_search_predicate_cost_upper_bound: 13
-      allow_state_allclose_comparison_despite_simulator_state: True
-      grammar_search_max_predicates: 100
-      grammar_search_parallelize_vlm_labeling: True
-      grammar_search_use_handcoded_debug_grammar: False
-      grammar_search_select_all_debug: False
-      cluster_and_intersect_soft_intersection_for_preconditions: True
-      grammar_search_grammar_includes_givens: False
-      cluster_and_intersect_prune_low_data_pnads: True
-      cluster_and_intersect_min_datastore_fraction: 0.05
-      num_train_tasks: 3
-      precondition_soft_intersection_threshold_percent: 0.8
-      vlm_double_check_output: True
-      grammar_search_early_termination_heuristic_thresh: 100
-  # burger_no_move_more_stacks:
-  #   NAME: "burger_no_move"
+  # kitchen_boil_kettle:
+  #   NAME: "kitchen"
   #   FLAGS:
-  #     burger_no_move_task_type: "more_stacks"
+  #     perceiver: "kitchen"
+  #     kitchen_goals: "boil_kettle"
+  #     kitchen_use_perfect_samplers: True
+  #     kitchen_render_set_of_marks: True
+  #     kitchen_use_combo_move_nsrts: True
+  #     kitchen_randomize_init_state: True
   #     bilevel_plan_without_sim: True
   #     segmenter: option_changes
-  #     grammar_search_vlm_atom_label_prompt_type: img_option_diffs_label_history_burger
-  #     grammar_search_task_planning_timeout: 10.0
-  #     sesame_max_skeletons_optimized: 200
+  #     grammar_search_vlm_atom_label_prompt_type: img_option_diffs_label_history
+  #     grammar_search_task_planning_timeout: 3.0
+  #     sesame_max_skeletons_optimized: 5
   #     disable_harmlessness_check: True
-  #     sesame_task_planner: fdopt
   #     excluded_predicates: all
-  #     option_model_terminate_on_repeat: False
   #     grammar_search_vlm_atom_proposal_use_debug: False
-  #     allow_exclude_goal_predicates: True
   #     grammar_search_prune_redundant_preds: True
   #     grammar_search_predicate_cost_upper_bound: 13
   #     allow_state_allclose_comparison_despite_simulator_state: True
@@ -102,15 +72,45 @@ ENVS:
   #     grammar_search_use_handcoded_debug_grammar: False
   #     grammar_search_select_all_debug: False
   #     cluster_and_intersect_soft_intersection_for_preconditions: True
-  #     vlm_include_cropped_images: True
-  #     timeout: 80
   #     grammar_search_grammar_includes_givens: False
   #     cluster_and_intersect_prune_low_data_pnads: True
   #     cluster_and_intersect_min_datastore_fraction: 0.05
-  #     num_train_tasks: 12 # 8 for VILA
+  #     num_train_tasks: 3
   #     precondition_soft_intersection_threshold_percent: 0.8
-  #     grammar_search_early_termination_heuristic_thresh: 2000
   #     vlm_double_check_output: True
+  #     grammar_search_early_termination_heuristic_thresh: 100
+  burger_no_move_more_stacks:
+    NAME: "burger_no_move"
+    FLAGS:
+      burger_no_move_task_type: "more_stacks"
+      bilevel_plan_without_sim: True
+      segmenter: option_changes
+      grammar_search_vlm_atom_label_prompt_type: img_option_diffs_label_history_burger
+      grammar_search_task_planning_timeout: 10.0
+      sesame_max_skeletons_optimized: 200
+      disable_harmlessness_check: True
+      sesame_task_planner: fdopt
+      excluded_predicates: all
+      option_model_terminate_on_repeat: False
+      grammar_search_vlm_atom_proposal_use_debug: False
+      allow_exclude_goal_predicates: True
+      grammar_search_prune_redundant_preds: True
+      grammar_search_predicate_cost_upper_bound: 13
+      allow_state_allclose_comparison_despite_simulator_state: True
+      grammar_search_max_predicates: 100
+      grammar_search_parallelize_vlm_labeling: True
+      grammar_search_use_handcoded_debug_grammar: False
+      grammar_search_select_all_debug: False
+      cluster_and_intersect_soft_intersection_for_preconditions: True
+      vlm_include_cropped_images: True
+      timeout: 80
+      grammar_search_grammar_includes_givens: False
+      cluster_and_intersect_prune_low_data_pnads: True
+      cluster_and_intersect_min_datastore_fraction: 0.05
+      num_train_tasks: 12 # 8 for VILA
+      precondition_soft_intersection_threshold_percent: 0.8
+      grammar_search_early_termination_heuristic_thresh: 2000
+      vlm_double_check_output: True
   # burger_no_move_fatter_burger:
   #   NAME: "burger_no_move"
   #   FLAGS:

From ed2464150dd76e8830aee4abc5d492b07b9bc885 Mon Sep 17 00:00:00 2001
From: Nishanth Kumar <nishanth.kumar20@gmail.com>
Date: Thu, 21 Nov 2024 11:24:07 -0500
Subject: [PATCH 07/12] fatter-burger

---
 scripts/configs/pred_invention_vlm.yaml | 70 ++++++++++++-------------
 1 file changed, 35 insertions(+), 35 deletions(-)

diff --git a/scripts/configs/pred_invention_vlm.yaml b/scripts/configs/pred_invention_vlm.yaml
index 3993c971f4..e1ebe3aaf3 100644
--- a/scripts/configs/pred_invention_vlm.yaml
+++ b/scripts/configs/pred_invention_vlm.yaml
@@ -79,42 +79,10 @@ ENVS:
   #     precondition_soft_intersection_threshold_percent: 0.8
   #     vlm_double_check_output: True
   #     grammar_search_early_termination_heuristic_thresh: 100
-  burger_no_move_more_stacks:
-    NAME: "burger_no_move"
-    FLAGS:
-      burger_no_move_task_type: "more_stacks"
-      bilevel_plan_without_sim: True
-      segmenter: option_changes
-      grammar_search_vlm_atom_label_prompt_type: img_option_diffs_label_history_burger
-      grammar_search_task_planning_timeout: 10.0
-      sesame_max_skeletons_optimized: 200
-      disable_harmlessness_check: True
-      sesame_task_planner: fdopt
-      excluded_predicates: all
-      option_model_terminate_on_repeat: False
-      grammar_search_vlm_atom_proposal_use_debug: False
-      allow_exclude_goal_predicates: True
-      grammar_search_prune_redundant_preds: True
-      grammar_search_predicate_cost_upper_bound: 13
-      allow_state_allclose_comparison_despite_simulator_state: True
-      grammar_search_max_predicates: 100
-      grammar_search_parallelize_vlm_labeling: True
-      grammar_search_use_handcoded_debug_grammar: False
-      grammar_search_select_all_debug: False
-      cluster_and_intersect_soft_intersection_for_preconditions: True
-      vlm_include_cropped_images: True
-      timeout: 80
-      grammar_search_grammar_includes_givens: False
-      cluster_and_intersect_prune_low_data_pnads: True
-      cluster_and_intersect_min_datastore_fraction: 0.05
-      num_train_tasks: 12 # 8 for VILA
-      precondition_soft_intersection_threshold_percent: 0.8
-      grammar_search_early_termination_heuristic_thresh: 2000
-      vlm_double_check_output: True
-  # burger_no_move_fatter_burger:
+  # burger_no_move_more_stacks:
   #   NAME: "burger_no_move"
   #   FLAGS:
-  #     burger_no_move_task_type: "fatter_burger"
+  #     burger_no_move_task_type: "more_stacks"
   #     bilevel_plan_without_sim: True
   #     segmenter: option_changes
   #     grammar_search_vlm_atom_label_prompt_type: img_option_diffs_label_history_burger
@@ -139,10 +107,42 @@ ENVS:
   #     grammar_search_grammar_includes_givens: False
   #     cluster_and_intersect_prune_low_data_pnads: True
   #     cluster_and_intersect_min_datastore_fraction: 0.05
-  #     num_train_tasks: 12
+  #     num_train_tasks: 12 # 8 for VILA
   #     precondition_soft_intersection_threshold_percent: 0.8
   #     grammar_search_early_termination_heuristic_thresh: 2000
   #     vlm_double_check_output: True
+  burger_no_move_fatter_burger:
+    NAME: "burger_no_move"
+    FLAGS:
+      burger_no_move_task_type: "fatter_burger"
+      bilevel_plan_without_sim: True
+      segmenter: option_changes
+      grammar_search_vlm_atom_label_prompt_type: img_option_diffs_label_history_burger
+      grammar_search_task_planning_timeout: 10.0
+      sesame_max_skeletons_optimized: 200
+      disable_harmlessness_check: True
+      sesame_task_planner: fdopt
+      excluded_predicates: all
+      option_model_terminate_on_repeat: False
+      grammar_search_vlm_atom_proposal_use_debug: False
+      allow_exclude_goal_predicates: True
+      grammar_search_prune_redundant_preds: True
+      grammar_search_predicate_cost_upper_bound: 13
+      allow_state_allclose_comparison_despite_simulator_state: True
+      grammar_search_max_predicates: 100
+      grammar_search_parallelize_vlm_labeling: True
+      grammar_search_use_handcoded_debug_grammar: False
+      grammar_search_select_all_debug: False
+      cluster_and_intersect_soft_intersection_for_preconditions: True
+      vlm_include_cropped_images: True
+      timeout: 80
+      grammar_search_grammar_includes_givens: False
+      cluster_and_intersect_prune_low_data_pnads: True
+      cluster_and_intersect_min_datastore_fraction: 0.05
+      num_train_tasks: 12
+      precondition_soft_intersection_threshold_percent: 0.8
+      grammar_search_early_termination_heuristic_thresh: 2000
+      vlm_double_check_output: True
   # burger_no_move_combo_burger:
   #   NAME: "burger_no_move"
   #   FLAGS:

From 592c7e87763ae184fc7571962505aa99b4598cfe Mon Sep 17 00:00:00 2001
From: Nishanth Kumar <nishanth.kumar20@gmail.com>
Date: Thu, 21 Nov 2024 11:24:46 -0500
Subject: [PATCH 08/12] combo burger

---
 scripts/configs/pred_invention_vlm.yaml | 68 ++++++++++++-------------
 1 file changed, 34 insertions(+), 34 deletions(-)

diff --git a/scripts/configs/pred_invention_vlm.yaml b/scripts/configs/pred_invention_vlm.yaml
index e1ebe3aaf3..321203c04d 100644
--- a/scripts/configs/pred_invention_vlm.yaml
+++ b/scripts/configs/pred_invention_vlm.yaml
@@ -111,42 +111,10 @@ ENVS:
   #     precondition_soft_intersection_threshold_percent: 0.8
   #     grammar_search_early_termination_heuristic_thresh: 2000
   #     vlm_double_check_output: True
-  burger_no_move_fatter_burger:
-    NAME: "burger_no_move"
-    FLAGS:
-      burger_no_move_task_type: "fatter_burger"
-      bilevel_plan_without_sim: True
-      segmenter: option_changes
-      grammar_search_vlm_atom_label_prompt_type: img_option_diffs_label_history_burger
-      grammar_search_task_planning_timeout: 10.0
-      sesame_max_skeletons_optimized: 200
-      disable_harmlessness_check: True
-      sesame_task_planner: fdopt
-      excluded_predicates: all
-      option_model_terminate_on_repeat: False
-      grammar_search_vlm_atom_proposal_use_debug: False
-      allow_exclude_goal_predicates: True
-      grammar_search_prune_redundant_preds: True
-      grammar_search_predicate_cost_upper_bound: 13
-      allow_state_allclose_comparison_despite_simulator_state: True
-      grammar_search_max_predicates: 100
-      grammar_search_parallelize_vlm_labeling: True
-      grammar_search_use_handcoded_debug_grammar: False
-      grammar_search_select_all_debug: False
-      cluster_and_intersect_soft_intersection_for_preconditions: True
-      vlm_include_cropped_images: True
-      timeout: 80
-      grammar_search_grammar_includes_givens: False
-      cluster_and_intersect_prune_low_data_pnads: True
-      cluster_and_intersect_min_datastore_fraction: 0.05
-      num_train_tasks: 12
-      precondition_soft_intersection_threshold_percent: 0.8
-      grammar_search_early_termination_heuristic_thresh: 2000
-      vlm_double_check_output: True
-  # burger_no_move_combo_burger:
+  # burger_no_move_fatter_burger:
   #   NAME: "burger_no_move"
   #   FLAGS:
-  #     burger_no_move_task_type: "combo_burger"
+  #     burger_no_move_task_type: "fatter_burger"
   #     bilevel_plan_without_sim: True
   #     segmenter: option_changes
   #     grammar_search_vlm_atom_label_prompt_type: img_option_diffs_label_history_burger
@@ -175,6 +143,38 @@ ENVS:
   #     precondition_soft_intersection_threshold_percent: 0.8
   #     grammar_search_early_termination_heuristic_thresh: 2000
   #     vlm_double_check_output: True
+  burger_no_move_combo_burger:
+    NAME: "burger_no_move"
+    FLAGS:
+      burger_no_move_task_type: "combo_burger"
+      bilevel_plan_without_sim: True
+      segmenter: option_changes
+      grammar_search_vlm_atom_label_prompt_type: img_option_diffs_label_history_burger
+      grammar_search_task_planning_timeout: 10.0
+      sesame_max_skeletons_optimized: 200
+      disable_harmlessness_check: True
+      sesame_task_planner: fdopt
+      excluded_predicates: all
+      option_model_terminate_on_repeat: False
+      grammar_search_vlm_atom_proposal_use_debug: False
+      allow_exclude_goal_predicates: True
+      grammar_search_prune_redundant_preds: True
+      grammar_search_predicate_cost_upper_bound: 13
+      allow_state_allclose_comparison_despite_simulator_state: True
+      grammar_search_max_predicates: 100
+      grammar_search_parallelize_vlm_labeling: True
+      grammar_search_use_handcoded_debug_grammar: False
+      grammar_search_select_all_debug: False
+      cluster_and_intersect_soft_intersection_for_preconditions: True
+      vlm_include_cropped_images: True
+      timeout: 80
+      grammar_search_grammar_includes_givens: False
+      cluster_and_intersect_prune_low_data_pnads: True
+      cluster_and_intersect_min_datastore_fraction: 0.05
+      num_train_tasks: 12
+      precondition_soft_intersection_threshold_percent: 0.8
+      grammar_search_early_termination_heuristic_thresh: 2000
+      vlm_double_check_output: True
 
 ARGS: []
 FLAGS: 

From dbf8a970ef59afdef05542497ce333bafd3e92af Mon Sep 17 00:00:00 2001
From: Nishanth Kumar <nishanth.kumar20@gmail.com>
Date: Thu, 21 Nov 2024 15:18:27 -0500
Subject: [PATCH 09/12] good to go

---
 .../datasets/generate_atom_trajs_with_vlm.py  |   8 +-
 predicators/structs.py                        |   9 +-
 scripts/cluster_utils.py                      |   1 -
 scripts/configs/pred_invention_vlm.yaml       | 270 +++++++++---------
 setup.py                                      |   2 -
 5 files changed, 148 insertions(+), 142 deletions(-)

diff --git a/predicators/datasets/generate_atom_trajs_with_vlm.py b/predicators/datasets/generate_atom_trajs_with_vlm.py
index ee9e31b1fc..9371fb4b81 100644
--- a/predicators/datasets/generate_atom_trajs_with_vlm.py
+++ b/predicators/datasets/generate_atom_trajs_with_vlm.py
@@ -875,12 +875,16 @@ def _create_prompt_from_image_option_traj(
         # So for instance, a predicate classifier that does `== 0` would
         # work for a value 0.00123 rounded to a single decimal place,
         # but wouldn't actually work when deployed on the number 0.00123!
-        demo_str.append(state.dict_str(indent=2, object_features=True, round_feat_vals=False))
+        demo_str.append(
+            state.dict_str(indent=2,
+                           object_features=True,
+                           round_feat_vals=False))
         demo_str.append(f"action {i}: {a.name}")
     num_states = len(image_option_traj.states)
     state = image_option_traj.states[-1]
     demo_str.append(f"state {num_states}:")
-    demo_str.append(state.dict_str(indent=2, object_features=True, round_feat_vals=False))
+    demo_str.append(
+        state.dict_str(indent=2, object_features=True, round_feat_vals=False))
     demo_str_ = '\n'.join(demo_str)
     template = template.replace("[DEMO_TRAJECTORY]", demo_str_)
 
diff --git a/predicators/structs.py b/predicators/structs.py
index ad55b6c3a1..7da68b6ef1 100644
--- a/predicators/structs.py
+++ b/predicators/structs.py
@@ -215,14 +215,19 @@ def pretty_str(self) -> str:
         suffix = "\n" + "#" * ll + "\n"
         return prefix + "\n\n".join(table_strs) + suffix
 
-    def dict_str(self, indent: int = 0, object_features: bool = True, round_feat_vals: bool = True) -> str:
+    def dict_str(self,
+                 indent: int = 0,
+                 object_features: bool = True,
+                 round_feat_vals: bool = True) -> str:
         """Return a dictionary representation of the state."""
         state_dict = {}
         for obj in self:
             obj_dict = {}
             if obj.type.name == "robot" or object_features:
                 for attribute, value in zip(obj.type.feature_names, self[obj]):
-                    if isinstance(value, (float, int, np.float32)) and round_feat_vals:
+                    if isinstance(
+                            value,
+                        (float, int, np.float32)) and round_feat_vals:
                         value = round(float(value), 1)
                     obj_dict[attribute] = value
             obj_name = obj.name
diff --git a/scripts/cluster_utils.py b/scripts/cluster_utils.py
index e765c9c8fa..94f1ece871 100644
--- a/scripts/cluster_utils.py
+++ b/scripts/cluster_utils.py
@@ -145,7 +145,6 @@ def generate_run_configs(config_filename: str,
 def get_cmds_to_prep_repo(branch: str) -> List[str]:
     """Get the commands that should be run while already in the repository but
     before launching the experiments."""
-    return []
     old_dir_pattern = " ".join(f"{d}/" for d in SAVE_DIRS)
     return [
         "git stash",
diff --git a/scripts/configs/pred_invention_vlm.yaml b/scripts/configs/pred_invention_vlm.yaml
index 321203c04d..da4850fc5f 100644
--- a/scripts/configs/pred_invention_vlm.yaml
+++ b/scripts/configs/pred_invention_vlm.yaml
@@ -1,148 +1,148 @@
 # Experiments to test predicate invention with VLMs
 ---
 APPROACHES:
-  # ours:
-  #   NAME: "grammar_search_invention"
-  #   FLAGS:
-  #     grammar_search_vlm_atom_proposal_prompt_type: options_labels_whole_traj_diverse
-  #     offline_data_method: geo_and_demo_with_vlm_imgs
-  #     grammar_search_invent_geo_predicates_only: False
-  # ours-vlm-subselection:
-  #   NAME: "grammar_search_invention"
-  #   FLAGS:
-  #     grammar_search_vlm_atom_proposal_prompt_type: options_labels_whole_traj_specific
-  #     offline_data_method: geo_and_demo_with_vlm_imgs
-  #     grammar_search_invent_geo_predicates_only: True
-  # ours-no-subselection:
-  #   NAME: "grammar_search_invention"
-  #   FLAGS:
-  #     grammar_search_vlm_atom_proposal_prompt_type: options_labels_whole_traj_diverse
-  #     offline_data_method: geo_and_demo_with_vlm_imgs
-  #     grammar_search_pred_selection_approach: no_select
-  #     grammar_search_invent_geo_predicates_only: False
-  # ours-no-invent:
-  #   NAME: "nsrt_learning"
-  #   FLAGS: {}
-  # ours-no-visual:
-  #   NAME: "grammar_search_invention"
-  #   FLAGS: 
-  #     offline_data_method: demo
-  # ours-no-geo:
-  #   NAME: "grammar_search_invention"
-  #   FLAGS:
-  #     grammar_search_vlm_atom_proposal_prompt_type: demo_with_vlm_imgs
-  #     grammar_search_invent_geo_predicates_only: False
+  ours:
+    NAME: "grammar_search_invention"
+    FLAGS:
+      grammar_search_vlm_atom_proposal_prompt_type: options_labels_whole_traj_diverse
+      offline_data_method: geo_and_demo_with_vlm_imgs
+      grammar_search_invent_geo_predicates_only: False
+  ours-vlm-subselection:
+    NAME: "grammar_search_invention"
+    FLAGS:
+      grammar_search_vlm_atom_proposal_prompt_type: options_labels_whole_traj_specific
+      offline_data_method: geo_and_demo_with_vlm_imgs
+      grammar_search_invent_geo_predicates_only: True
+  ours-no-subselection:
+    NAME: "grammar_search_invention"
+    FLAGS:
+      grammar_search_vlm_atom_proposal_prompt_type: options_labels_whole_traj_diverse
+      offline_data_method: geo_and_demo_with_vlm_imgs
+      grammar_search_pred_selection_approach: no_select
+      grammar_search_invent_geo_predicates_only: False
+  ours-no-invent:
+    NAME: "nsrt_learning"
+    FLAGS: {}
+  ours-no-visual:
+    NAME: "grammar_search_invention"
+    FLAGS: 
+      offline_data_method: demo
+  ours-no-geo:
+    NAME: "grammar_search_invention"
+    FLAGS:
+      grammar_search_vlm_atom_proposal_prompt_type: demo_with_vlm_imgs
+      grammar_search_invent_geo_predicates_only: False
   interpret:
     NAME: "grammar_search_invention"
     FLAGS:
       offline_data_method: demo_with_vlm_imgs
       vlm_predicate_vision_api_generate_ground_atoms: True
-  # vila-with-fewshot:
-  #   NAME: "vlm_open_loop"
-  #   FLAGS:
-  #     vlm_open_loop_use_training_demos: True
-  # vila-pure:
-  #   NAME: "vlm_open_loop"
-  #   FLAGS:
-  #     vlm_open_loop_use_training_demos: False
+  vila-with-fewshot:
+    NAME: "vlm_open_loop"
+    FLAGS:
+      vlm_open_loop_use_training_demos: True
+  vila-pure:
+    NAME: "vlm_open_loop"
+    FLAGS:
+      vlm_open_loop_use_training_demos: False
 
 ENVS:
-  # kitchen_boil_kettle:
-  #   NAME: "kitchen"
-  #   FLAGS:
-  #     perceiver: "kitchen"
-  #     kitchen_goals: "boil_kettle"
-  #     kitchen_use_perfect_samplers: True
-  #     kitchen_render_set_of_marks: True
-  #     kitchen_use_combo_move_nsrts: True
-  #     kitchen_randomize_init_state: True
-  #     bilevel_plan_without_sim: True
-  #     segmenter: option_changes
-  #     grammar_search_vlm_atom_label_prompt_type: img_option_diffs_label_history
-  #     grammar_search_task_planning_timeout: 3.0
-  #     sesame_max_skeletons_optimized: 5
-  #     disable_harmlessness_check: True
-  #     excluded_predicates: all
-  #     grammar_search_vlm_atom_proposal_use_debug: False
-  #     grammar_search_prune_redundant_preds: True
-  #     grammar_search_predicate_cost_upper_bound: 13
-  #     allow_state_allclose_comparison_despite_simulator_state: True
-  #     grammar_search_max_predicates: 100
-  #     grammar_search_parallelize_vlm_labeling: True
-  #     grammar_search_use_handcoded_debug_grammar: False
-  #     grammar_search_select_all_debug: False
-  #     cluster_and_intersect_soft_intersection_for_preconditions: True
-  #     grammar_search_grammar_includes_givens: False
-  #     cluster_and_intersect_prune_low_data_pnads: True
-  #     cluster_and_intersect_min_datastore_fraction: 0.05
-  #     num_train_tasks: 3
-  #     precondition_soft_intersection_threshold_percent: 0.8
-  #     vlm_double_check_output: True
-  #     grammar_search_early_termination_heuristic_thresh: 100
-  # burger_no_move_more_stacks:
-  #   NAME: "burger_no_move"
-  #   FLAGS:
-  #     burger_no_move_task_type: "more_stacks"
-  #     bilevel_plan_without_sim: True
-  #     segmenter: option_changes
-  #     grammar_search_vlm_atom_label_prompt_type: img_option_diffs_label_history_burger
-  #     grammar_search_task_planning_timeout: 10.0
-  #     sesame_max_skeletons_optimized: 200
-  #     disable_harmlessness_check: True
-  #     sesame_task_planner: fdopt
-  #     excluded_predicates: all
-  #     option_model_terminate_on_repeat: False
-  #     grammar_search_vlm_atom_proposal_use_debug: False
-  #     allow_exclude_goal_predicates: True
-  #     grammar_search_prune_redundant_preds: True
-  #     grammar_search_predicate_cost_upper_bound: 13
-  #     allow_state_allclose_comparison_despite_simulator_state: True
-  #     grammar_search_max_predicates: 100
-  #     grammar_search_parallelize_vlm_labeling: True
-  #     grammar_search_use_handcoded_debug_grammar: False
-  #     grammar_search_select_all_debug: False
-  #     cluster_and_intersect_soft_intersection_for_preconditions: True
-  #     vlm_include_cropped_images: True
-  #     timeout: 80
-  #     grammar_search_grammar_includes_givens: False
-  #     cluster_and_intersect_prune_low_data_pnads: True
-  #     cluster_and_intersect_min_datastore_fraction: 0.05
-  #     num_train_tasks: 12 # 8 for VILA
-  #     precondition_soft_intersection_threshold_percent: 0.8
-  #     grammar_search_early_termination_heuristic_thresh: 2000
-  #     vlm_double_check_output: True
-  # burger_no_move_fatter_burger:
-  #   NAME: "burger_no_move"
-  #   FLAGS:
-  #     burger_no_move_task_type: "fatter_burger"
-  #     bilevel_plan_without_sim: True
-  #     segmenter: option_changes
-  #     grammar_search_vlm_atom_label_prompt_type: img_option_diffs_label_history_burger
-  #     grammar_search_task_planning_timeout: 10.0
-  #     sesame_max_skeletons_optimized: 200
-  #     disable_harmlessness_check: True
-  #     sesame_task_planner: fdopt
-  #     excluded_predicates: all
-  #     option_model_terminate_on_repeat: False
-  #     grammar_search_vlm_atom_proposal_use_debug: False
-  #     allow_exclude_goal_predicates: True
-  #     grammar_search_prune_redundant_preds: True
-  #     grammar_search_predicate_cost_upper_bound: 13
-  #     allow_state_allclose_comparison_despite_simulator_state: True
-  #     grammar_search_max_predicates: 100
-  #     grammar_search_parallelize_vlm_labeling: True
-  #     grammar_search_use_handcoded_debug_grammar: False
-  #     grammar_search_select_all_debug: False
-  #     cluster_and_intersect_soft_intersection_for_preconditions: True
-  #     vlm_include_cropped_images: True
-  #     timeout: 80
-  #     grammar_search_grammar_includes_givens: False
-  #     cluster_and_intersect_prune_low_data_pnads: True
-  #     cluster_and_intersect_min_datastore_fraction: 0.05
-  #     num_train_tasks: 12
-  #     precondition_soft_intersection_threshold_percent: 0.8
-  #     grammar_search_early_termination_heuristic_thresh: 2000
-  #     vlm_double_check_output: True
+  kitchen_boil_kettle:
+    NAME: "kitchen"
+    FLAGS:
+      perceiver: "kitchen"
+      kitchen_goals: "boil_kettle"
+      kitchen_use_perfect_samplers: True
+      kitchen_render_set_of_marks: True
+      kitchen_use_combo_move_nsrts: True
+      kitchen_randomize_init_state: True
+      bilevel_plan_without_sim: True
+      segmenter: option_changes
+      grammar_search_vlm_atom_label_prompt_type: img_option_diffs_label_history
+      grammar_search_task_planning_timeout: 3.0
+      sesame_max_skeletons_optimized: 5
+      disable_harmlessness_check: True
+      excluded_predicates: all
+      grammar_search_vlm_atom_proposal_use_debug: False
+      grammar_search_prune_redundant_preds: True
+      grammar_search_predicate_cost_upper_bound: 13
+      allow_state_allclose_comparison_despite_simulator_state: True
+      grammar_search_max_predicates: 100
+      grammar_search_parallelize_vlm_labeling: True
+      grammar_search_use_handcoded_debug_grammar: False
+      grammar_search_select_all_debug: False
+      cluster_and_intersect_soft_intersection_for_preconditions: True
+      grammar_search_grammar_includes_givens: False
+      cluster_and_intersect_prune_low_data_pnads: True
+      cluster_and_intersect_min_datastore_fraction: 0.05
+      num_train_tasks: 3
+      precondition_soft_intersection_threshold_percent: 0.8
+      vlm_double_check_output: True
+      grammar_search_early_termination_heuristic_thresh: 100
+  burger_no_move_more_stacks:
+    NAME: "burger_no_move"
+    FLAGS:
+      burger_no_move_task_type: "more_stacks"
+      bilevel_plan_without_sim: True
+      segmenter: option_changes
+      grammar_search_vlm_atom_label_prompt_type: img_option_diffs_label_history_burger
+      grammar_search_task_planning_timeout: 10.0
+      sesame_max_skeletons_optimized: 200
+      disable_harmlessness_check: True
+      sesame_task_planner: fdopt
+      excluded_predicates: all
+      option_model_terminate_on_repeat: False
+      grammar_search_vlm_atom_proposal_use_debug: False
+      allow_exclude_goal_predicates: True
+      grammar_search_prune_redundant_preds: True
+      grammar_search_predicate_cost_upper_bound: 13
+      allow_state_allclose_comparison_despite_simulator_state: True
+      grammar_search_max_predicates: 100
+      grammar_search_parallelize_vlm_labeling: True
+      grammar_search_use_handcoded_debug_grammar: False
+      grammar_search_select_all_debug: False
+      cluster_and_intersect_soft_intersection_for_preconditions: True
+      vlm_include_cropped_images: True
+      timeout: 80
+      grammar_search_grammar_includes_givens: False
+      cluster_and_intersect_prune_low_data_pnads: True
+      cluster_and_intersect_min_datastore_fraction: 0.05
+      num_train_tasks: 12 # 8 for VILA
+      precondition_soft_intersection_threshold_percent: 0.8
+      grammar_search_early_termination_heuristic_thresh: 2000
+      vlm_double_check_output: True
+  burger_no_move_fatter_burger:
+    NAME: "burger_no_move"
+    FLAGS:
+      burger_no_move_task_type: "fatter_burger"
+      bilevel_plan_without_sim: True
+      segmenter: option_changes
+      grammar_search_vlm_atom_label_prompt_type: img_option_diffs_label_history_burger
+      grammar_search_task_planning_timeout: 10.0
+      sesame_max_skeletons_optimized: 200
+      disable_harmlessness_check: True
+      sesame_task_planner: fdopt
+      excluded_predicates: all
+      option_model_terminate_on_repeat: False
+      grammar_search_vlm_atom_proposal_use_debug: False
+      allow_exclude_goal_predicates: True
+      grammar_search_prune_redundant_preds: True
+      grammar_search_predicate_cost_upper_bound: 13
+      allow_state_allclose_comparison_despite_simulator_state: True
+      grammar_search_max_predicates: 100
+      grammar_search_parallelize_vlm_labeling: True
+      grammar_search_use_handcoded_debug_grammar: False
+      grammar_search_select_all_debug: False
+      cluster_and_intersect_soft_intersection_for_preconditions: True
+      vlm_include_cropped_images: True
+      timeout: 80
+      grammar_search_grammar_includes_givens: False
+      cluster_and_intersect_prune_low_data_pnads: True
+      cluster_and_intersect_min_datastore_fraction: 0.05
+      num_train_tasks: 12
+      precondition_soft_intersection_threshold_percent: 0.8
+      grammar_search_early_termination_heuristic_thresh: 2000
+      vlm_double_check_output: True
   burger_no_move_combo_burger:
     NAME: "burger_no_move"
     FLAGS:
diff --git a/setup.py b/setup.py
index 39af59aa2b..fd83c01e8c 100644
--- a/setup.py
+++ b/setup.py
@@ -38,8 +38,6 @@
         "ImageHash",
         "google-generativeai",
         "tenacity",
-        "opencv-python",
-        "torchvision"
     ],
     include_package_data=True,
     extras_require={

From b035b108e02bc9ba5bab6db4b2ca473dab57a4cd Mon Sep 17 00:00:00 2001
From: Nishanth Kumar <nishanth.kumar20@gmail.com>
Date: Thu, 21 Nov 2024 15:28:07 -0500
Subject: [PATCH 10/12] try this

---
 scripts/configs/pred_invention_vlm.yaml | 63 ++++++++++++-------------
 1 file changed, 31 insertions(+), 32 deletions(-)

diff --git a/scripts/configs/pred_invention_vlm.yaml b/scripts/configs/pred_invention_vlm.yaml
index da4850fc5f..1eea06cbe4 100644
--- a/scripts/configs/pred_invention_vlm.yaml
+++ b/scripts/configs/pred_invention_vlm.yaml
@@ -47,23 +47,21 @@ APPROACHES:
       vlm_open_loop_use_training_demos: False
 
 ENVS:
-  kitchen_boil_kettle:
-    NAME: "kitchen"
+  burger_no_move_more_stacks:
+    NAME: "burger_no_move"
     FLAGS:
-      perceiver: "kitchen"
-      kitchen_goals: "boil_kettle"
-      kitchen_use_perfect_samplers: True
-      kitchen_render_set_of_marks: True
-      kitchen_use_combo_move_nsrts: True
-      kitchen_randomize_init_state: True
+      burger_no_move_task_type: "more_stacks"
       bilevel_plan_without_sim: True
       segmenter: option_changes
-      grammar_search_vlm_atom_label_prompt_type: img_option_diffs_label_history
-      grammar_search_task_planning_timeout: 3.0
-      sesame_max_skeletons_optimized: 5
+      grammar_search_vlm_atom_label_prompt_type: img_option_diffs_label_history_burger
+      grammar_search_task_planning_timeout: 10.0
+      sesame_max_skeletons_optimized: 200
       disable_harmlessness_check: True
+      sesame_task_planner: fdopt
       excluded_predicates: all
+      option_model_terminate_on_repeat: False
       grammar_search_vlm_atom_proposal_use_debug: False
+      allow_exclude_goal_predicates: True
       grammar_search_prune_redundant_preds: True
       grammar_search_predicate_cost_upper_bound: 13
       allow_state_allclose_comparison_despite_simulator_state: True
@@ -72,17 +70,19 @@ ENVS:
       grammar_search_use_handcoded_debug_grammar: False
       grammar_search_select_all_debug: False
       cluster_and_intersect_soft_intersection_for_preconditions: True
+      vlm_include_cropped_images: True
+      timeout: 80
       grammar_search_grammar_includes_givens: False
       cluster_and_intersect_prune_low_data_pnads: True
       cluster_and_intersect_min_datastore_fraction: 0.05
-      num_train_tasks: 3
+      num_train_tasks: 12 # 8 for VILA
       precondition_soft_intersection_threshold_percent: 0.8
+      grammar_search_early_termination_heuristic_thresh: 2000
       vlm_double_check_output: True
-      grammar_search_early_termination_heuristic_thresh: 100
-  burger_no_move_more_stacks:
+  burger_no_move_fatter_burger:
     NAME: "burger_no_move"
     FLAGS:
-      burger_no_move_task_type: "more_stacks"
+      burger_no_move_task_type: "fatter_burger"
       bilevel_plan_without_sim: True
       segmenter: option_changes
       grammar_search_vlm_atom_label_prompt_type: img_option_diffs_label_history_burger
@@ -107,14 +107,14 @@ ENVS:
       grammar_search_grammar_includes_givens: False
       cluster_and_intersect_prune_low_data_pnads: True
       cluster_and_intersect_min_datastore_fraction: 0.05
-      num_train_tasks: 12 # 8 for VILA
+      num_train_tasks: 12
       precondition_soft_intersection_threshold_percent: 0.8
       grammar_search_early_termination_heuristic_thresh: 2000
       vlm_double_check_output: True
-  burger_no_move_fatter_burger:
+  burger_no_move_combo_burger:
     NAME: "burger_no_move"
     FLAGS:
-      burger_no_move_task_type: "fatter_burger"
+      burger_no_move_task_type: "combo_burger"
       bilevel_plan_without_sim: True
       segmenter: option_changes
       grammar_search_vlm_atom_label_prompt_type: img_option_diffs_label_history_burger
@@ -143,21 +143,23 @@ ENVS:
       precondition_soft_intersection_threshold_percent: 0.8
       grammar_search_early_termination_heuristic_thresh: 2000
       vlm_double_check_output: True
-  burger_no_move_combo_burger:
-    NAME: "burger_no_move"
+  kitchen_boil_kettle:
+    NAME: "kitchen"
     FLAGS:
-      burger_no_move_task_type: "combo_burger"
+      perceiver: "kitchen"
+      kitchen_goals: "boil_kettle"
+      kitchen_use_perfect_samplers: True
+      kitchen_render_set_of_marks: True
+      kitchen_use_combo_move_nsrts: True
+      kitchen_randomize_init_state: True
       bilevel_plan_without_sim: True
       segmenter: option_changes
-      grammar_search_vlm_atom_label_prompt_type: img_option_diffs_label_history_burger
-      grammar_search_task_planning_timeout: 10.0
-      sesame_max_skeletons_optimized: 200
+      grammar_search_vlm_atom_label_prompt_type: img_option_diffs_label_history
+      grammar_search_task_planning_timeout: 3.0
+      sesame_max_skeletons_optimized: 5
       disable_harmlessness_check: True
-      sesame_task_planner: fdopt
       excluded_predicates: all
-      option_model_terminate_on_repeat: False
       grammar_search_vlm_atom_proposal_use_debug: False
-      allow_exclude_goal_predicates: True
       grammar_search_prune_redundant_preds: True
       grammar_search_predicate_cost_upper_bound: 13
       allow_state_allclose_comparison_despite_simulator_state: True
@@ -166,16 +168,13 @@ ENVS:
       grammar_search_use_handcoded_debug_grammar: False
       grammar_search_select_all_debug: False
       cluster_and_intersect_soft_intersection_for_preconditions: True
-      vlm_include_cropped_images: True
-      timeout: 80
       grammar_search_grammar_includes_givens: False
       cluster_and_intersect_prune_low_data_pnads: True
       cluster_and_intersect_min_datastore_fraction: 0.05
-      num_train_tasks: 12
+      num_train_tasks: 3
       precondition_soft_intersection_threshold_percent: 0.8
-      grammar_search_early_termination_heuristic_thresh: 2000
       vlm_double_check_output: True
-
+      grammar_search_early_termination_heuristic_thresh: 100
 ARGS: []
 FLAGS: 
   vlm_model_name: gpt-4o

From e97dd480f52df9870c355b7d8602703d638b22cb Mon Sep 17 00:00:00 2001
From: Nishanth Kumar <nishanth.kumar20@gmail.com>
Date: Thu, 21 Nov 2024 15:28:29 -0500
Subject: [PATCH 11/12] remove unnec change

---
 scripts/configs/pred_invention_vlm.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scripts/configs/pred_invention_vlm.yaml b/scripts/configs/pred_invention_vlm.yaml
index 1eea06cbe4..cae3e30911 100644
--- a/scripts/configs/pred_invention_vlm.yaml
+++ b/scripts/configs/pred_invention_vlm.yaml
@@ -175,6 +175,7 @@ ENVS:
       precondition_soft_intersection_threshold_percent: 0.8
       vlm_double_check_output: True
       grammar_search_early_termination_heuristic_thresh: 100
+
 ARGS: []
 FLAGS: 
   vlm_model_name: gpt-4o

From a3b71a140139a91be308d3faff0db8a85a1a513e Mon Sep 17 00:00:00 2001
From: Nishanth Kumar <nishanth.kumar20@gmail.com>
Date: Thu, 21 Nov 2024 15:31:54 -0500
Subject: [PATCH 12/12] simpler fix to interpret issue

---
 .../datasets/generate_atom_trajs_with_vlm.py       | 14 ++------------
 predicators/structs.py                             |  9 +--------
 2 files changed, 3 insertions(+), 20 deletions(-)

diff --git a/predicators/datasets/generate_atom_trajs_with_vlm.py b/predicators/datasets/generate_atom_trajs_with_vlm.py
index 9371fb4b81..f7e62d5b12 100644
--- a/predicators/datasets/generate_atom_trajs_with_vlm.py
+++ b/predicators/datasets/generate_atom_trajs_with_vlm.py
@@ -869,22 +869,12 @@ def _create_prompt_from_image_option_traj(
     for i, a in enumerate(image_option_traj.actions):
         state = image_option_traj.states[i]
         demo_str.append(f"state {i}:")
-        # NOTE: it's important to set the round_feat_vals argument to False
-        # here. If we set it to True, then the VLM might mistakenly propose
-        # predicates that work given rounding, but fail otherwise.
-        # So for instance, a predicate classifier that does `== 0` would
-        # work for a value 0.00123 rounded to a single decimal place,
-        # but wouldn't actually work when deployed on the number 0.00123!
-        demo_str.append(
-            state.dict_str(indent=2,
-                           object_features=True,
-                           round_feat_vals=False))
+        demo_str.append(state.dict_str(indent=2, object_features=True))
         demo_str.append(f"action {i}: {a.name}")
     num_states = len(image_option_traj.states)
     state = image_option_traj.states[-1]
     demo_str.append(f"state {num_states}:")
-    demo_str.append(
-        state.dict_str(indent=2, object_features=True, round_feat_vals=False))
+    demo_str.append(state.dict_str(indent=2, object_features=True))
     demo_str_ = '\n'.join(demo_str)
     template = template.replace("[DEMO_TRAJECTORY]", demo_str_)
 
diff --git a/predicators/structs.py b/predicators/structs.py
index 7da68b6ef1..d7a107e501 100644
--- a/predicators/structs.py
+++ b/predicators/structs.py
@@ -215,20 +215,13 @@ def pretty_str(self) -> str:
         suffix = "\n" + "#" * ll + "\n"
         return prefix + "\n\n".join(table_strs) + suffix
 
-    def dict_str(self,
-                 indent: int = 0,
-                 object_features: bool = True,
-                 round_feat_vals: bool = True) -> str:
+    def dict_str(self, indent: int = 0, object_features: bool = True) -> str:
         """Return a dictionary representation of the state."""
         state_dict = {}
         for obj in self:
             obj_dict = {}
             if obj.type.name == "robot" or object_features:
                 for attribute, value in zip(obj.type.feature_names, self[obj]):
-                    if isinstance(
-                            value,
-                        (float, int, np.float32)) and round_feat_vals:
-                        value = round(float(value), 1)
                     obj_dict[attribute] = value
             obj_name = obj.name
             state_dict[f"{obj_name}:{obj.type.name}"] = obj_dict