push wxa test automation to public github

ibm-ecosystem-engineering · Jun 25, 2024 · 4db3a09 · 4db3a09
commit 4db3a09
Show file tree

Hide file tree

Showing 5 changed files with 259 additions and 0 deletions.
diff --git a/README.md b/README.md
@@ -0,0 +1,89 @@
+<!-- ABOUT THE PROJECT -->
+
+# watsonx Assistant Testing Automation Tool
+
+The watsonx Assistant Testing Automation Tool is used to batch test question groups against a watsonx Assistant instance. The script ingests an excel file with questions, uses the watsonx Assistant API to query the assistant, and outputs an excel file with the results. The goal of the tool is to reduce execution time for running tests and identifying potential recurring errors.
+
+<!-- GETTING STARTED -->
+
+## Getting Started
+
+### Prerequisites
+
+The following prerequisites are required to run the tester:
+
+1. Python3
+2. IBM Cloud api key (this must be for the same cloud account that hosts the assistant instance)
+3. watsonx Assistant service instance url
+4. watsonx Assistant environment id
+
+### Installation
+
+1. Clone the repo
+
+   ```bash
+   git clone [email protected]:EE-WW-BuildLab/wxa-test-automation-tool.git
+   ```
+
+2. Change directory into wxa-test-automation-tool
+
+   ```bash
+   cd wxa-test-automation-tool
+   ```
+
+3. Create a python virtual environment
+
+   ```bash
+   python3 -m venv virtual-env
+   source virtual-env/bin/activate
+   pip3 install -r requirements.txt
+   ```
+
+4. Copy env file to .env
+
+   ```bash
+   cp env .env
+   ```
+
+5. Configure parameters in .env and set the values of your environment
+   1. For your "input_data_file", see [Configuring Your Input Excel File](#configuring-your-input-excel-file)
+   2. You can name your "output_data_file" anything
+   3. Ensure both the input and output file names are of type .xlsx
+
+6. Run the following to start the script
+
+   ```bash
+   python3 main.py
+   ```
+
+7. Run the following command to exit the python virtual environment:
+
+   ```bash
+   deactivate
+   ```
+
+## Configuring your Input Excel File
+
+The repository contains a sample input file that you can copy, edit, and use to test.
+
+The input excel file must have the following three columns (**note:** they are spelling and case sensitive):
+
+1. "Question Groups"
+   1. This column acts as a label for questions that should be asked to the assistant in one session.
+   2. Example values:
+      1. "Group 1"
+      2. "Group 2"
+      3. "Group 3"
+2. "Question"
+   1. This column contains the question to be asked to the assistant.
+
+## Understanding the Results
+
+You can observe real time results in the terminal. Each time a questions is asked, you can view the input, response time, and output.
+
+When all tests are completed, an output excel file with your results is created using the name specified in your env file. The output file contains the question groups, questions, assistant results, error flags, and response times.
+
+### Error Flags
+
+1. Processing error: occurs when the assistant returns a general processing error
+2. Timeout error: occurs when the assistant response takes greater than 30 seconds
diff --git a/env b/env
@@ -0,0 +1,5 @@
+input_data_file="sample_input.xlsx"
+output_data_file="sample_output.xlsx"
+api_key= "IBM_Cloud_API_KEY"
+assistant_url = "service_instance_url"
+assistant_environment_id = "environment_id"
diff --git a/main.py b/main.py
@@ -0,0 +1,156 @@
+import os
+import time
+from dotenv import load_dotenv
+import numpy as np
+import pandas as pd
+import requests
+from requests.auth import HTTPBasicAuth
+
+load_dotenv()
+
+testing_data = pd.read_excel(os.getenv("input_data_file"))
+
+# replaces NaN with None for future string comparison logic
+testing_data = testing_data.replace({pd.NaT: None, np.nan: None})
+
+# filter our dataset to only include rows with groups
+testing_data = testing_data[testing_data['Question Groups'] != 'None']
+
+testing_data['Assistant Output'] = 'None'
+testing_data['Error Flags'] = 'None'
+testing_data['Response Times'] = 'None'
+
+question_groups_list = []
+for value in testing_data['Question Groups'].unique():
+    grouped_df = testing_data[testing_data['Question Groups'] == value].copy()
+    grouped_df = grouped_df[['Question Groups', 'Question',  'Assistant Output', 'Error Flags', 'Response Times']]
+    question_groups_list.append(grouped_df)
+
+
+apikey = os.getenv("api_key")
+assistant_url = os.getenv("assistant_url")
+assistant_environment_id = os.getenv("assistant_environment_id")
+
+# authenticating to wxa instance
+auth = HTTPBasicAuth("apikey", apikey)
+
+wxa_params = {
+    "version": "2023-07-15"
+}
+
+# watsonx assistant api functions for a batch test
+
+# creates an assistant session, 1 per question group
+def create_assistant_session():
+    url = f'{assistant_url}/v2/assistants/{assistant_environment_id}/sessions'
+
+    try:
+        response = requests.post(url, params=wxa_params, auth=auth).json()
+        session_id = response['session_id']
+    except Exception as e:
+        print(f"An error occurred: {e}")
+
+    return session_id
+
+# queries the assistant
+def query_assistant(session_id, query):
+    print('-' * 41)
+    print(f'1) Input:\n\n{query}\n')
+    start = time.time()
+
+    # wxa api message request
+    url = f'{assistant_url}/v2/assistants/{assistant_environment_id}/sessions/{session_id}/message'
+    headers = {
+        "Content-Type": "application/json"
+    }
+    data = {
+        "input": {
+            "text": query
+        }
+    }
+
+    try:
+        response = requests.post(url, params=wxa_params, headers=headers, json=data, auth=auth).json()
+    except Exception as e:
+        print(f"An error occurred: {e}")
+
+    end = time.time()
+    query_results = ""
+    print("2) Output:\n")
+    for index, item in enumerate(response["output"]["generic"], 1):
+        if item["response_type"] == "text":
+            print(item['text'] + "\n")
+            query_results += item['text'] + "\n\n"
+    query_results += "\n"
+
+    api_response_time = round(end - start,2)
+    print(f'3) API response time:\n\n{str(api_response_time)}\n')
+
+    return query_results, api_response_time
+
+# deletes an assistant session
+def delete_assistant_session(session_id):
+    url = f'{assistant_url}/v2/assistants/{assistant_environment_id}/sessions/{session_id}'
+
+    try:
+        response = requests.delete(url, params=wxa_params, auth=auth)
+    except Exception as e:
+        print(f"An error occurred: {e}")
+
+# query all groups
+def batch_assistant_query(question_group_df):
+    time.sleep(3)
+    session_id = create_assistant_session()
+
+    for index, row in question_group_df.iterrows():
+        time.sleep(3)
+        flags = ""
+        query_results, api_response_time = query_assistant(session_id, row['Question'])
+        row['Assistant Output'] = query_results
+
+        # error handling
+        if ("I'm sorry, I've run into an error processing your request. Please try the same question again.  Thank you!" in query_results):
+            if (api_response_time > 30):
+                flags += "Timeout Error\n"
+            else:
+                flags += "Processing Error\n"
+
+
+        row['Response Times'] = api_response_time
+        row['Flags'] = flags
+
+
+
+    delete_assistant_session(session_id)
+
+
+
+count = 1
+for df in question_groups_list:
+    print(f'-------------Testing group {count}-------------\n')
+    batch_assistant_query(df)
+    count += 1
+
+# create the final dataframe to be exported to an excel sheet
+concatenated_rows = []
+
+for df in question_groups_list:
+    for index, row in df.iterrows():
+        concatenated_rows.append(row)
+
+question_groups_df_combined = pd.DataFrame(concatenated_rows)
+
+
+
+# write the dataframe to an excel file
+writer = pd.ExcelWriter(os.getenv("output_data_file"), engine='xlsxwriter')
+question_groups_df_combined.to_excel(writer, index=False, sheet_name='Sheet1')
+workbook = writer.book
+worksheet = writer.sheets['Sheet1']
+cell_format = workbook.add_format({'text_wrap': True, 'valign': 'top', 'align': 'left'})
+for i, column in enumerate(question_groups_df_combined.columns):
+    worksheet.set_column(i, i, 40, cell_format)
+worksheet.set_column(3, 3, 70, cell_format)
+writer.close()
+
+
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,9 @@
+ibm-cloud-sdk-core==3.16.0
+ibm-watson==7.0.1
+numpy==1.26.1
+openpyxl==3.1.2
+pandas==1.5.3
+python-dotenv==1.0.0
+XlsxWriter==3.2.
+certifi==2023.7.22
+requests==2.31.0
diff --git a/sample_input.xlsx b/sample_input.xlsx