Skip to content

Commit

Permalink
fix groundedness display (note: highlight doesn't work in patch)
Browse files Browse the repository at this point in the history
  • Loading branch information
Josh Reini committed Oct 4, 2024
1 parent d407abc commit 2048ee1
Show file tree
Hide file tree
Showing 3 changed files with 5 additions and 64 deletions.
1 change: 1 addition & 0 deletions src/dashboard/trulens/dashboard/display.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,7 @@ def expand_groundedness_df(df: pd.DataFrame) -> pd.DataFrame:
reasons_df = pd.DataFrame(data)

reasons_df.rename(columns={"Groundedness Score": "score"}, inplace=True)
print(reasons_df)

# Return only the expanded reasons DataFrame
return reasons_df.reset_index(drop=True)
66 changes: 4 additions & 62 deletions src/dashboard/trulens/dashboard/pages/Evaluations.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import asyncio
import json
import pprint as pp
import re

import matplotlib.pyplot as plt
import numpy as np
Expand All @@ -24,6 +23,7 @@
from trulens.core.utils.json import jsonify_for_ui
from trulens.core.utils.serial import Lens
from trulens.dashboard.components.record_viewer import record_viewer
from trulens.dashboard.display import expand_groundedness_df
from trulens.dashboard.streamlit_utils import init_from_args
from trulens.dashboard.ux.components import draw_agent_info
from trulens.dashboard.ux.components import draw_llm_info
Expand Down Expand Up @@ -481,67 +481,9 @@ def highlight_groundedness(s):

# note: improve conditional to not rely on the feedback name
if "groundedness" in feedback_name.lower():
try:
# Split the reasons value into separate rows and columns
reasons = df["reasons"].iloc[0]
# Split the reasons into separate statements
statements = reasons.split("STATEMENT")
data = []
# Each reason has three components: statement, supporting evidence, and score
# Parse each reason into these components and add them to the data list
for statement in statements[1:]:
try:
criteria = statement.split(
"Criteria: "
)[1].split("Supporting Evidence: ")[0]
supporting_evidence = statement.split(
"Supporting Evidence: "
)[1].split("Score: ")[0]
score_pattern = re.compile(
r"([0-9]+)(?=\D*$)"
)
score_split = statement.split(
"Score: "
)[1]
score_match = score_pattern.search(
score_split
)
if score_match:
score = (
float(score_match.group(1)) / 10
)
except Exception:
pass
data.append({
"Statement": criteria,
"Supporting Evidence from Source": supporting_evidence,
"Score": score,
})
reasons_df = pd.DataFrame(data)
# Combine the original feedback data with the expanded reasons
df_expanded = pd.concat(
[
df.reset_index(drop=True),
reasons_df.reset_index(drop=True),
],
axis=1,
)
st.dataframe(
df_expanded.style.apply(
highlight_groundedness, axis=1
).format("{:.2f}", subset=["Score"]),
hide_index=True,
column_order=[
"Statement",
"Supporting Evidence from Source",
"Score",
],
)
except Exception:
st.dataframe(
df.style.apply(highlight, axis=1),
hide_index=True,
)
# note: improve conditional to not rely on the feedback name
df = expand_groundedness_df(df)
st.dataframe(df, hide_index=True)
else:
st.dataframe(
df.style.apply(highlight, axis=1),
Expand Down
2 changes: 0 additions & 2 deletions src/feedback/trulens/feedback/llm_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -1660,8 +1660,6 @@ def evaluate_hypothesis(index, hypothesis):
np.mean(list(groundedness_scores.values()))
)

print(reasons_str)

return average_groundedness_score, {"reasons": reasons_str}

@deprecation_utils.method_renamed("relevance")
Expand Down

0 comments on commit 2048ee1

Please sign in to comment.