Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Modulize action analyzer contract change #2744

Merged
merged 8 commits into from
Apr 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.

"""__init__."""
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.

"""__init__."""
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.

"""Action Detector Class."""

import pandas
from abc import ABC, abstractmethod
from action_analyzer.contracts.actions.action import Action
from action_analyzer.contracts.llm_client import LLMClient


class ActionDetector(ABC):
"""Action detector base class."""

def __init__(self,
query_intention_enabled: str) -> None:
"""Create an action detector.

Args:
query_intention_enabled(str): enable llm generated query intention. Accepted values: true or false.
"""
self.query_intention_enabled = query_intention_enabled

@abstractmethod
def preprocess_data(self, df: pandas.DataFrame) -> pandas.DataFrame:
"""Preprocess the data for action detector.

Args:
df(pandas.DataFrame): input pandas dataframe.

Returns:
pandas.DataFrame: preprocessed pandas dataframe.
"""
pass

@abstractmethod
def detect(self, df: pandas.DataFrame, llm_client: LLMClient) -> list(Action):
"""Detect the action.

Args:
df(pandas.DataFrame): input pandas dataframe.
llm_client(LLMClient): LLM client used to get some llm scores/info for action.

Returns:
list(Action): list of actions.
"""
pass
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.

"""Low retrieval score index action detector class."""

import pandas
from action_analyzer.contracts.detectors.action_detector import ActionDetector
from action_analyzer.contracts.actions.action import Action
from action_analyzer.contracts.llm_client import LLMClient


class LowRetreivalScoreIndexActionDetector(ActionDetector):
"""Low retrieval score index action detector class."""

def __init__(self,
index_id: str,
violated_metrics: list[str],
llm_summary_enabled: str) -> None:
"""Create a low retrieval score index action detector.

Args:
index_id(str): the index asset id.
violated_metrics(List[str]): violated e2e metrics
llm_summary_enabled(str): enable llm generated summary. Accepted values: true or false.
"""
self.index_id = index_id
self.violated_metrics = violated_metrics
super().__init__(llm_summary_enabled)

def preprocess_data(self, df: pandas.DataFrame) -> pandas.DataFrame:
"""Preprocess the data for action detector.

Args:
df(pandas.DataFrame): input pandas dataframe.

Returns:
pandas.DataFrame: preprocessed pandas dataframe.
"""
pass

def detect(self, df: pandas.DataFrame, llm_client: LLMClient) -> list(Action):
"""Detect the action.

Args:
df(pandas.DataFrame): input pandas dataframe.
llm_client(LLMClient): LLM client used to get some llm scores/info for action.

Returns:
list(Action): list of actions.
"""
pass
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.

"""Metrics violation index action detector class."""

from action_analyzer.contracts.detectors.action_detector import ActionDetector
from action_analyzer.contracts.actions.action import Action
from action_analyzer.contracts.llm_client import LLMClient
import pandas

SUPPORTED_METRICS = ["Fluency", "Coherence", "Relevance", "Groundedness", "RetrievalRelevance"]


class MetricsViolationIndexActionDetector(ActionDetector):
"""Metrics violation index action detector class."""

def __init__(self,
index_id: str,
violated_metrics: list[str],
correlation_test_method: str,
correlation_test_pvalue_threshold: float,
llm_summary_enabled: str,
positive_metric_threshold=5,
negative_metric_threshold=3) -> None:
"""Create a metrics violation index action detector.

Args:
index_id(str): the index asset id.
violated_metrics(List[str]): violated e2e metrics
correlation_test_method(str): test method for correlation test. e.g. ttest.
correlation_test_pvalue_threshold(float): p-value threshold for correlation test to generate action.
llm_summary_enabled(str): enable llm generated summary. Accepted values: true or false.
positive_metric_threshold(int): (Optional) e2e metric threshold to mark the query as positive.
negative_metric_threshold(int): (Optional) e2e metric threshold to mark the query as negative.
"""
self.correlation_test_method = correlation_test_method
self.correlation_test_pvalue_threshold = correlation_test_pvalue_threshold
self.positive_metric_threshold = positive_metric_threshold
self.negative_metric_threshold = negative_metric_threshold
super().__init__(llm_summary_enabled)

def preprocess_data(self, df: pandas.DataFrame) -> pandas.DataFrame:
"""Preprocess the data for action detector.

Args:
df(pandas.DataFrame): input pandas dataframe.

Returns:
pandas.DataFrame: preprocessed pandas dataframe.
"""
pass

def detect(self, df: pandas.DataFrame, llm_client: LLMClient) -> list(Action):
"""Detect the action.

Args:
df(pandas.DataFrame): input pandas dataframe.
llm_client(LLMClient): LLM client used to get some llm scores/info for action.

Returns:
list(Action): list of actions.
"""
pass
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.

"""__init__."""
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.

"""Action Class."""

import datetime
import uuid
import json
from enum import Enum
from action_analyzer.utils.utils import convert_to_camel_case


class ActionType(Enum):
"""Action type."""

METRICS_VIOLATION_INDEX_ACTION = 1
LOW_RETRIEVAL_SCORE_INDEX_ACTION = 2


class ActionSample:
"""Action sample class."""

def __init__(self,
question: str,
answer: str,
debugging_info: str,
prompt_flow_input: str) -> None:
"""Create an action sample.

Args:
question(str): the input question of the flow.
answer(str): the output answer of the flow.
debugging_info(str): the json string of debugging info in a span tree structure.
prompt_flow_input(str): the json str of prompt flow input.
"""
self.question = question
self.answer = answer
self.debugging_info = debugging_info
self.prompt_flow_input = prompt_flow_input

def to_json_str(self) -> str:
"""Convert an action sample object to json string."""
attribute_dict = self.__dict__
json_out = {}
for key, val in attribute_dict.items():
json_out[convert_to_camel_case(key)] = val
return json.dumps(json_out)


class Action():
"""Action class."""

def __init__(self,
action_type: ActionType,
description: str,
confidence_score: float,
query_intention: str,
deployment_id: str,
run_id: str,
positive_samples: list[ActionSample],
negative_samples: list[ActionSample]) -> None:
"""Create an action.

Args:
action_type(ActionType): the action type.
description(str): the description of the action.
confidence_score(float): the confidence score of the action.
query_intention(str): the query intention of the action.
deployment_id(str): the azureml deployment id of the action.
run_id(int): the azureml run id which generates the action.
positive_samples(list[ActionSample]): list of positive samples of the action.
negative_samples(list[ActionSample]): list of negative samples of the action.
"""
self.action_id = str(uuid.uuid4())
self.action_type = action_type
self.description = description
self.confidence_score = confidence_score
self.query_intention = query_intention
self.creation_time = str(datetime.datetime.now())
self.deployment_id = deployment_id
self.run_id = run_id
self.positive_samples = positive_samples
self.negative_samples = negative_samples

def to_json_str(self) -> str:
"""Convert an action object to json str."""
attribute_dict = self.__dict__
json_out = {}
for key, val in attribute_dict.items():
if key == "action_type":
json_out[convert_to_camel_case(key)] = val.name
# serialize the samples
elif key.endswith("_samples"):
json_val = str([v.to_json_str() for v in val])
json_out[convert_to_camel_case(key)] = json_val
else:
json_out[convert_to_camel_case(key)] = val
return json.dumps(json_out)
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.

"""LowRetreivalScoreIndexAction Class."""

from action_analyzer.contracts.action import ActionType, Action
from action_analyzer.contracts.index_action_contracts import IndexActionSample
from shared_utilities.constants import (
ACTION_DESCRIPTION
)


class LowRetreivalScoreIndexAction(Action):
"""Low retrieval score index action class."""

def __init__(self,
index_id: str,
index_content: str,
confidence_score: str,
query_intention: str,
deployment_id: str,
run_id: str,
positive_samples: list[IndexActionSample],
negative_samples: list[IndexActionSample],
index_name=None) -> None:
"""Create a low retrieval score index action.

Args:
index_id(str): the index asset id.
index_content(str): the index content.
confidence_score(float): the confidence score of the action.
query_intention(str): the query intention of the action.
deployment_id(str): the azureml deployment id of the action.
run_id(str): the azureml run id which generates the action.
positive_samples(list[IndexActionSample]): list of positive samples of the action.
negative_samples(list[IndexActionSample]): list of negative samples of the action.
index_name(str): (optional) index name if index asset id does not exist.
"""
self.index_id = index_id
self.index_name = index_name
self.index_content = index_content
description = ACTION_DESCRIPTION.replace("{index_id}", index_id)
super().__init__(ActionType.LOW_RETRIEVAL_SCORE_INDEX_ACTION,
description,
confidence_score,
query_intention,
deployment_id,
run_id,
positive_samples,
negative_samples)
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.

"""MetricsViolationIndexAction Class."""

from action_analyzer.contracts.action import ActionType, Action
from action_analyzer.contracts.index_action_contracts import IndexActionSample
from shared_utilities.constants import (
ACTION_DESCRIPTION
)


class MetricsViolationIndexAction(Action):
"""Metrics violated index action class."""

def __init__(self,
index_id: str,
index_content: str,
violated_metrics: str,
confidence_score: float,
query_intention: str,
deployment_id: str,
run_id: str,
positive_samples: list[IndexActionSample],
negative_samples: list[IndexActionSample],
index_name=None) -> None:
"""Create a metrics violated index action.

Args:
index_id(str): the index asset id.
index_content(str): the index content.
violated_metrics(str): violated metrics in comma-separated string format.
confidence_score(float): the confidence score of the action.
query_intention(str): the query intention of the action.
deployment_id(str): the azureml deployment id of the action.
run_id(str): the azureml run id which generates the action.
positive_samples(list[IndexActionSample]): list of positive samples of the action.
negative_samples(list[IndexActionSample]): list of negative samples of the action.
index_name(str): (optional) index name if index asset id does not exist.
"""
self.index_id = index_id
self.index_name = index_name
self.index_content = index_content
self.violated_metrics = violated_metrics
description = ACTION_DESCRIPTION.replace("{index_id}", index_id)
super().__init__(ActionType.METRICS_VIOLATION_INDEX_ACTION,
description,
confidence_score,
query_intention,
deployment_id,
run_id,
positive_samples,
negative_samples)
Loading
Loading