diff --git a/tmp.ipynb b/tmp.ipynb index 8bf2692c2..28e622999 100644 --- a/tmp.ipynb +++ b/tmp.ipynb @@ -556,6 +556,113 @@ "image" ] }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from agential.benchmarks.computer_use.osworld.data_manager import OSWorldDataManager\n", + "\n", + "manager = OSWorldDataManager(\n", + " mode=\"benchmark\", test_type=\"test_small\",\n", + " path_to_google_settings=\"C:/Users/tuvin/OneDrive/Desktop/agential/settings/google\",\n", + " path_to_googledrive_settings=\"C:/Users/tuvin/OneDrive/Desktop/agential/settings/googledrive\"\n", + ")\n", + "\n", + "tasks = manager.get_data()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Starting VMware VM...\n" + ] + } + ], + "source": [ + "from agential.benchmarks.computer_use.osworld.osworld import OSWorld\n", + "\n", + "env = OSWorld(action_space=\"pyautogui\", path_to_vm=\"C:/Users/tuvin/OneDrive/Desktop/agential/vmware_vm_data/Ubuntu0/Ubuntu0.vmx\")" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Starting VMware VM...\n" + ] + } + ], + "source": [ + "example = {\n", + " \"id\": \"9656a811-9b5b-4ddf-99c7-5117bcef0626\",\n", + " \"snapshot\": \"chrome\",\n", + " \"instruction\": \"I want Chrome to warn me whenever I visit a potentially harmful or unsafe website. Can you enable this safety feature?\",\n", + " \"source\": \"https://www.quora.com/How-do-I-set-the-security-settings-for-the-Google-Chrome-browser-for-the-best-security#:~:text=Enable%20Safe%20Browsing:%20Chrome%20has%20a%20built%2Din,Security%20%3E%20Security%20%3E%20Enable%20Safe%20Browsing.\",\n", + " \"config\": [\n", + " {\n", + " \"type\": \"launch\",\n", + " \"parameters\": {\n", + " \"command\": [\n", + " \"google-chrome\",\n", + " \"--remote-debugging-port=1337\"\n", + " ]\n", + " }\n", + " },\n", + " {\n", + " \"type\": \"launch\",\n", + " \"parameters\": {\n", + " \"command\": [\n", + " \"socat\",\n", + " \"tcp-listen:9222,fork\",\n", + " \"tcp:localhost:1337\"\n", + " ]\n", + " }\n", + " }\n", + " ],\n", + " \"trajectory\": \"trajectories/\",\n", + " \"related_apps\": [\n", + " \"chrome\"\n", + " ],\n", + " \"evaluator\": {\n", + " \"postconfig\": [\n", + " {\n", + " \"type\": \"execute\",\n", + " \"parameters\": {\n", + " \"command\": \"pkill chrome\",\n", + " \"shell\": \"true\"\n", + " }\n", + " }\n", + " ],\n", + " \"func\": \"exact_match\",\n", + " \"result\": {\n", + " \"type\": \"enable_enhanced_safety_browsing\"\n", + " },\n", + " \"expected\": {\n", + " \"type\": \"rule\",\n", + " \"rules\": {\n", + " \"expected\": \"true\"\n", + " }\n", + " }\n", + " }\n", + "}\n", + "\n", + "\n", + "obs = env.reset(example)" + ] + }, { "cell_type": "code", "execution_count": 6,