diff --git a/notebooks/vertex_genai/solutions/vertex_llm_evaluation.ipynb b/notebooks/vertex_genai/solutions/vertex_llm_evaluation.ipynb index 5bf65703..7c1ae5aa 100644 --- a/notebooks/vertex_genai/solutions/vertex_llm_evaluation.ipynb +++ b/notebooks/vertex_genai/solutions/vertex_llm_evaluation.ipynb @@ -40,7 +40,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": { "id": "PyQmSRbKA8r-", "tags": [] @@ -59,20 +59,12 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": { "id": "oM1iC_MfAts1", "tags": [] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Your project ID is set to dherin-dev\n" - ] - } - ], + "outputs": [], "source": [ "project_id_list = !gcloud config get-value project 2> /dev/null\n", "PROJECT_ID = project_id_list[0]\n", @@ -102,7 +94,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": { "id": "NIq7R4HZCfIc", "tags": [] @@ -125,7 +117,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": { "id": "j4KEcQEWROby", "tags": [] @@ -182,107 +174,12 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": { "id": "R-_ettKRxfxT", "tags": [] }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
iddocumentresponse_aresponse_b
040159674The 33-year-old, capped 81 times by the Republ...Whelan joined Aston Villa on a free transfer f...- Glenn Whelan, 33, has joined Aston Villa fro...
136925620Paul Frew, MLA for North Antrim, appeared in c...- A 15-year-old girl is suing DUP MLA Paul Fre...- Paul Frew, MLA for North Antrim, appeared in...
239729595Party leader Jeremy Corbyn said half of the ho...Labour says it will build 100,000 new homes a ...- Labour leader Jeremy Corbyn announced a hous...
316788700No clear winner emerged between incumbent Jean...The African Union summit in Addis Ababa ended ...The African Union (AU) summit in Addis Ababa, ...
419389625The ban has been called by opposition coalitio...Opposition coalition in Togo has called for a ...Togo's opposition coalition, Let's Save Togo, ...
\n", - "
" - ], - "text/plain": [ - " id document \\\n", - "0 40159674 The 33-year-old, capped 81 times by the Republ... \n", - "1 36925620 Paul Frew, MLA for North Antrim, appeared in c... \n", - "2 39729595 Party leader Jeremy Corbyn said half of the ho... \n", - "3 16788700 No clear winner emerged between incumbent Jean... \n", - "4 19389625 The ban has been called by opposition coalitio... \n", - "\n", - " response_a \\\n", - "0 Whelan joined Aston Villa on a free transfer f... \n", - "1 - A 15-year-old girl is suing DUP MLA Paul Fre... \n", - "2 Labour says it will build 100,000 new homes a ... \n", - "3 The African Union summit in Addis Ababa ended ... \n", - "4 Opposition coalition in Togo has called for a ... \n", - "\n", - " response_b \n", - "0 - Glenn Whelan, 33, has joined Aston Villa fro... \n", - "1 - Paul Frew, MLA for North Antrim, appeared in... \n", - "2 - Labour leader Jeremy Corbyn announced a hous... \n", - "3 The African Union (AU) summit in Addis Ababa, ... \n", - "4 Togo's opposition coalition, Let's Save Togo, ... " - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "evaluation_gemini_df = pd.read_json(EVALUATION_FILE_URI, lines=True)\n", "evaluation_gemini_df.head()" @@ -300,7 +197,7 @@ "\n", "* `evaluation_dataset` to indicate where the evaluation dataset location. In this case, it is the JSONL Cloud bucket URI.\n", "\n", - "* `id_colums` to distinguish evaluation examples that are unique. Here, as you can imagine, your have `id` and `document` fields.\n", + "* `id_colums` to distinguish evaluation examples that are unique. Here, as you can imagine, your have `id` and `document` fields. These fields will be added in the judgment table generated by AutoSxS.\n", "\n", "* `task` to indicate the task type you want to evaluate. It can be `summarization` or `question_answer`. In this case you have `summarization`.\n", "\n", @@ -318,7 +215,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": { "id": "Cp7e-hOmNMhA", "tags": [] @@ -331,7 +228,7 @@ "\n", "parameters = {\n", " \"evaluation_dataset\": EVALUATION_FILE_URI,\n", - " \"id_columns\": [\"id\"],\n", + " \"id_columns\": [\"id\", \"document\"],\n", " \"task\": \"summarization\",\n", " \"autorater_prompt_parameters\": {\n", " \"inference_context\": {\"column\": \"document\"},\n", @@ -353,37 +250,12 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": { "id": "AjFHT5ze9m4L", "tags": [] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Creating PipelineJob\n", - "PipelineJob created. Resource name: projects/115851500182/locations/us-central1/pipelineJobs/autosxs-1713383655691384\n", - "To use this PipelineJob in another session:\n", - "pipeline_job = aiplatform.PipelineJob.get('projects/115851500182/locations/us-central1/pipelineJobs/autosxs-1713383655691384')\n", - "View Pipeline Job:\n", - "https://console.cloud.google.com/vertex-ai/locations/us-central1/pipelines/runs/autosxs-1713383655691384?project=115851500182\n", - "PipelineJob projects/115851500182/locations/us-central1/pipelineJobs/autosxs-1713383655691384 current state:\n", - "PipelineState.PIPELINE_STATE_RUNNING\n", - "PipelineJob projects/115851500182/locations/us-central1/pipelineJobs/autosxs-1713383655691384 current state:\n", - "PipelineState.PIPELINE_STATE_RUNNING\n", - "PipelineJob projects/115851500182/locations/us-central1/pipelineJobs/autosxs-1713383655691384 current state:\n", - "PipelineState.PIPELINE_STATE_RUNNING\n", - "PipelineJob projects/115851500182/locations/us-central1/pipelineJobs/autosxs-1713383655691384 current state:\n", - "PipelineState.PIPELINE_STATE_RUNNING\n", - "PipelineJob projects/115851500182/locations/us-central1/pipelineJobs/autosxs-1713383655691384 current state:\n", - "PipelineState.PIPELINE_STATE_RUNNING\n", - "PipelineJob projects/115851500182/locations/us-central1/pipelineJobs/autosxs-1713383655691384 current state:\n", - "PipelineState.PIPELINE_STATE_RUNNING\n" - ] - } - ], + "outputs": [], "source": [ "job = aiplatform.PipelineJob(\n", " job_id=display_name,\n", @@ -525,7 +397,8 @@ "cell_type": "code", "execution_count": null, "metadata": { - "id": "w2RISjQSJk9R" + "id": "w2RISjQSJk9R", + "tags": [] }, "outputs": [], "source": [ @@ -548,7 +421,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "def print_aggregated_metrics(scores):\n", @@ -597,7 +472,8 @@ "cell_type": "code", "execution_count": null, "metadata": { - "id": "mbfsO2uw9-i5" + "id": "mbfsO2uw9-i5", + "tags": [] }, "outputs": [], "source": [ @@ -633,7 +509,7 @@ "\n", "parameters = {\n", " \"evaluation_dataset\": HUMAN_EVALUATION_FILE_URI,\n", - " \"id_columns\": [\"id\"],\n", + " \"id_columns\": [\"id\", \"document\"],\n", " \"task\": \"summarization\",\n", " \"autorater_prompt_parameters\": {\n", " \"inference_context\": {\"column\": \"document\"},\n", @@ -649,7 +525,8 @@ "cell_type": "code", "execution_count": null, "metadata": { - "id": "KbhIPY-_3SSB" + "id": "KbhIPY-_3SSB", + "tags": [] }, "outputs": [], "source": [ @@ -681,7 +558,8 @@ "cell_type": "code", "execution_count": null, "metadata": { - "id": "JLUOJFjA38ja" + "id": "JLUOJFjA38ja", + "tags": [] }, "outputs": [], "source": [ @@ -710,7 +588,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "def print_human_preference_metrics(metrics):\n", @@ -725,20 +605,6 @@ "pprint.pprint(human_aligned_metrics)" ] }, - { - "cell_type": "markdown", - "metadata": { - "id": "TpV-iwP9qw9c" - }, - "source": [ - "## Cleaning up\n", - "\n", - "To clean up all Google Cloud resources used in this project, you can [delete the Google Cloud\n", - "project](https://cloud.google.com/resource-manager/docs/creating-managing-projects#shutting_down_projects) you used for the tutorial. Clicking the __End Lab__ button in your lab instructions will take care of this for you.\n", - "\n", - "Otherwise, you can delete the individual resources you created in this tutorial." - ] - }, { "cell_type": "markdown", "metadata": {},