diff --git a/notebooks/vertex_genai/solutions/vertex_llm_evaluation.ipynb b/notebooks/vertex_genai/solutions/vertex_llm_evaluation.ipynb
index 5bf65703..7c1ae5aa 100644
--- a/notebooks/vertex_genai/solutions/vertex_llm_evaluation.ipynb
+++ b/notebooks/vertex_genai/solutions/vertex_llm_evaluation.ipynb
@@ -40,7 +40,7 @@
},
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": null,
"metadata": {
"id": "PyQmSRbKA8r-",
"tags": []
@@ -59,20 +59,12 @@
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": null,
"metadata": {
"id": "oM1iC_MfAts1",
"tags": []
},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Your project ID is set to dherin-dev\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"project_id_list = !gcloud config get-value project 2> /dev/null\n",
"PROJECT_ID = project_id_list[0]\n",
@@ -102,7 +94,7 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": null,
"metadata": {
"id": "NIq7R4HZCfIc",
"tags": []
@@ -125,7 +117,7 @@
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": null,
"metadata": {
"id": "j4KEcQEWROby",
"tags": []
@@ -182,107 +174,12 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": null,
"metadata": {
"id": "R-_ettKRxfxT",
"tags": []
},
- "outputs": [
- {
- "data": {
- "text/html": [
- "
\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " id | \n",
- " document | \n",
- " response_a | \n",
- " response_b | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " 40159674 | \n",
- " The 33-year-old, capped 81 times by the Republ... | \n",
- " Whelan joined Aston Villa on a free transfer f... | \n",
- " - Glenn Whelan, 33, has joined Aston Villa fro... | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 36925620 | \n",
- " Paul Frew, MLA for North Antrim, appeared in c... | \n",
- " - A 15-year-old girl is suing DUP MLA Paul Fre... | \n",
- " - Paul Frew, MLA for North Antrim, appeared in... | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 39729595 | \n",
- " Party leader Jeremy Corbyn said half of the ho... | \n",
- " Labour says it will build 100,000 new homes a ... | \n",
- " - Labour leader Jeremy Corbyn announced a hous... | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 16788700 | \n",
- " No clear winner emerged between incumbent Jean... | \n",
- " The African Union summit in Addis Ababa ended ... | \n",
- " The African Union (AU) summit in Addis Ababa, ... | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " 19389625 | \n",
- " The ban has been called by opposition coalitio... | \n",
- " Opposition coalition in Togo has called for a ... | \n",
- " Togo's opposition coalition, Let's Save Togo, ... | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " id document \\\n",
- "0 40159674 The 33-year-old, capped 81 times by the Republ... \n",
- "1 36925620 Paul Frew, MLA for North Antrim, appeared in c... \n",
- "2 39729595 Party leader Jeremy Corbyn said half of the ho... \n",
- "3 16788700 No clear winner emerged between incumbent Jean... \n",
- "4 19389625 The ban has been called by opposition coalitio... \n",
- "\n",
- " response_a \\\n",
- "0 Whelan joined Aston Villa on a free transfer f... \n",
- "1 - A 15-year-old girl is suing DUP MLA Paul Fre... \n",
- "2 Labour says it will build 100,000 new homes a ... \n",
- "3 The African Union summit in Addis Ababa ended ... \n",
- "4 Opposition coalition in Togo has called for a ... \n",
- "\n",
- " response_b \n",
- "0 - Glenn Whelan, 33, has joined Aston Villa fro... \n",
- "1 - Paul Frew, MLA for North Antrim, appeared in... \n",
- "2 - Labour leader Jeremy Corbyn announced a hous... \n",
- "3 The African Union (AU) summit in Addis Ababa, ... \n",
- "4 Togo's opposition coalition, Let's Save Togo, ... "
- ]
- },
- "execution_count": 5,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"evaluation_gemini_df = pd.read_json(EVALUATION_FILE_URI, lines=True)\n",
"evaluation_gemini_df.head()"
@@ -300,7 +197,7 @@
"\n",
"* `evaluation_dataset` to indicate where the evaluation dataset location. In this case, it is the JSONL Cloud bucket URI.\n",
"\n",
- "* `id_colums` to distinguish evaluation examples that are unique. Here, as you can imagine, your have `id` and `document` fields.\n",
+ "* `id_colums` to distinguish evaluation examples that are unique. Here, as you can imagine, your have `id` and `document` fields. These fields will be added in the judgment table generated by AutoSxS.\n",
"\n",
"* `task` to indicate the task type you want to evaluate. It can be `summarization` or `question_answer`. In this case you have `summarization`.\n",
"\n",
@@ -318,7 +215,7 @@
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": null,
"metadata": {
"id": "Cp7e-hOmNMhA",
"tags": []
@@ -331,7 +228,7 @@
"\n",
"parameters = {\n",
" \"evaluation_dataset\": EVALUATION_FILE_URI,\n",
- " \"id_columns\": [\"id\"],\n",
+ " \"id_columns\": [\"id\", \"document\"],\n",
" \"task\": \"summarization\",\n",
" \"autorater_prompt_parameters\": {\n",
" \"inference_context\": {\"column\": \"document\"},\n",
@@ -353,37 +250,12 @@
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": null,
"metadata": {
"id": "AjFHT5ze9m4L",
"tags": []
},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Creating PipelineJob\n",
- "PipelineJob created. Resource name: projects/115851500182/locations/us-central1/pipelineJobs/autosxs-1713383655691384\n",
- "To use this PipelineJob in another session:\n",
- "pipeline_job = aiplatform.PipelineJob.get('projects/115851500182/locations/us-central1/pipelineJobs/autosxs-1713383655691384')\n",
- "View Pipeline Job:\n",
- "https://console.cloud.google.com/vertex-ai/locations/us-central1/pipelines/runs/autosxs-1713383655691384?project=115851500182\n",
- "PipelineJob projects/115851500182/locations/us-central1/pipelineJobs/autosxs-1713383655691384 current state:\n",
- "PipelineState.PIPELINE_STATE_RUNNING\n",
- "PipelineJob projects/115851500182/locations/us-central1/pipelineJobs/autosxs-1713383655691384 current state:\n",
- "PipelineState.PIPELINE_STATE_RUNNING\n",
- "PipelineJob projects/115851500182/locations/us-central1/pipelineJobs/autosxs-1713383655691384 current state:\n",
- "PipelineState.PIPELINE_STATE_RUNNING\n",
- "PipelineJob projects/115851500182/locations/us-central1/pipelineJobs/autosxs-1713383655691384 current state:\n",
- "PipelineState.PIPELINE_STATE_RUNNING\n",
- "PipelineJob projects/115851500182/locations/us-central1/pipelineJobs/autosxs-1713383655691384 current state:\n",
- "PipelineState.PIPELINE_STATE_RUNNING\n",
- "PipelineJob projects/115851500182/locations/us-central1/pipelineJobs/autosxs-1713383655691384 current state:\n",
- "PipelineState.PIPELINE_STATE_RUNNING\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"job = aiplatform.PipelineJob(\n",
" job_id=display_name,\n",
@@ -525,7 +397,8 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
- "id": "w2RISjQSJk9R"
+ "id": "w2RISjQSJk9R",
+ "tags": []
},
"outputs": [],
"source": [
@@ -548,7 +421,9 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {},
+ "metadata": {
+ "tags": []
+ },
"outputs": [],
"source": [
"def print_aggregated_metrics(scores):\n",
@@ -597,7 +472,8 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
- "id": "mbfsO2uw9-i5"
+ "id": "mbfsO2uw9-i5",
+ "tags": []
},
"outputs": [],
"source": [
@@ -633,7 +509,7 @@
"\n",
"parameters = {\n",
" \"evaluation_dataset\": HUMAN_EVALUATION_FILE_URI,\n",
- " \"id_columns\": [\"id\"],\n",
+ " \"id_columns\": [\"id\", \"document\"],\n",
" \"task\": \"summarization\",\n",
" \"autorater_prompt_parameters\": {\n",
" \"inference_context\": {\"column\": \"document\"},\n",
@@ -649,7 +525,8 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
- "id": "KbhIPY-_3SSB"
+ "id": "KbhIPY-_3SSB",
+ "tags": []
},
"outputs": [],
"source": [
@@ -681,7 +558,8 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
- "id": "JLUOJFjA38ja"
+ "id": "JLUOJFjA38ja",
+ "tags": []
},
"outputs": [],
"source": [
@@ -710,7 +588,9 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {},
+ "metadata": {
+ "tags": []
+ },
"outputs": [],
"source": [
"def print_human_preference_metrics(metrics):\n",
@@ -725,20 +605,6 @@
"pprint.pprint(human_aligned_metrics)"
]
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "TpV-iwP9qw9c"
- },
- "source": [
- "## Cleaning up\n",
- "\n",
- "To clean up all Google Cloud resources used in this project, you can [delete the Google Cloud\n",
- "project](https://cloud.google.com/resource-manager/docs/creating-managing-projects#shutting_down_projects) you used for the tutorial. Clicking the __End Lab__ button in your lab instructions will take care of this for you.\n",
- "\n",
- "Otherwise, you can delete the individual resources you created in this tutorial."
- ]
- },
{
"cell_type": "markdown",
"metadata": {},