-
Notifications
You must be signed in to change notification settings - Fork 25
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
ef1b6f9
commit c23b163
Showing
5 changed files
with
225 additions
and
88 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
# Ragas CLI | ||
|
||
A one-step Ragas cli tool to evaluate QCAG testsets generated by RAG apps. (Q = Question, C = Contexts, A = Answer, G = Ground_truth) | ||
|
||
## Install with pip | ||
|
||
```bash | ||
pip install ragacli | ||
``` | ||
|
||
## Arguments | ||
|
||
- `--model`: Specifies the model to use for evaluation. | ||
- Default value is "gpt-3.5-turbo". Langchain compatible. | ||
- `--api_base`: Specifies the base URL for the API. | ||
- Default value is "https://api.openai.com/v1". | ||
- `--api_key`: Specifies the API key to authenticate requests. | ||
- Not required if using psuedo-openai API server, e.g. vLLM, Fastchat, etc. | ||
- `--embeddings`: Specifies the Huggingface embeddings model to use for evaluation. | ||
- Embeddings will run **locally**. | ||
- Will use OpenAI embeddings if not set. | ||
- Better set if using psuedo-openai API server. | ||
- `--metrics`: Specifies the metrics to use for evaluation. | ||
- Will use Ragas default metrics if not set. | ||
- Default metrics: `["answer_relevancy", "context_precision", "faithfulness", "context_recall", "context_relevancy"]` | ||
- Other metrics: `"answer_similarity", "answer_correctness"` | ||
- `--dataset`: Specifies the path to the dataset for evaluation. | ||
- Dataset format must meet RAGAS requirements. | ||
- Will use fiqa dataset as demo if not set. | ||
|
||
## Usage | ||
|
||
### Fiqa dataset demo: | ||
|
||
```bash | ||
python3 -m ragacli --api_key "YOUR_OPENAI_API_KEY" | ||
``` | ||
|
||
### Evaluate with GPT-4 and `BAAI/bge-small-en` embeddings | ||
|
||
The huggingface embeddings will run locally, so **Make sure your machine works and have [sentence-transformers](https://pypi.org/project/sentence-transformers/) installed:** | ||
|
||
```bash | ||
pip install sentence-transformers | ||
``` | ||
Then run: | ||
|
||
```bash | ||
python3 -m ragacli --model "gpt-4" --api_key "YOUR_OPENAI_API_KEY" --embeddings "BAAI/bge-small-en" --dataset "path/to/dataset.csv" | ||
``` | ||
|
||
### Prepare Dataset | ||
|
||
See [**Ragas documentation**](https://docs.ragas.io/en/stable/howtos/applications/data_preparation.html) |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
import argparse | ||
import src.pkg as pkg | ||
from ragas import evaluate | ||
from datasets import load_dataset | ||
|
||
def run_evaluation(): | ||
parser = argparse.ArgumentParser(description='RAGAS CLI') | ||
parser.add_argument("--model", type=str, default="gpt-3.5-turbo", | ||
help="Specifies the model to use for evaluation. Defaults to gpt-3.5-turbo.") | ||
parser.add_argument("--api_base", type=str, default="https://api.openai.com/v1", | ||
help="Specifies the base URL for the API. Defaults to OpenAI.") | ||
parser.add_argument("--api_key", type=str, | ||
help="Specifies the API key to authenticate requests.") | ||
parser.add_argument("--embeddings", type=str, | ||
help="Specifies Huggingface embeddings model (or its path) to use for evaluation. Will use OpenAI embeddings if not set.") | ||
parser.add_argument("--metrics", type=list, default=[], | ||
help="Specifies the metrics to use for evaluation.") | ||
parser.add_argument("--dataset", type=str, | ||
help="Specifies the path to the dataset for evaluation. Will use fiqa dataset if not set.") | ||
|
||
args = parser.parse_args() | ||
|
||
model = args.model | ||
api_base = args.api_base | ||
api_key = args.api_key | ||
metrics = args.metrics | ||
dataset = args.dataset | ||
|
||
judge_model = pkg.wrap_langchain_llm(model, api_base, api_key) | ||
|
||
embeddings_model_name = args.embeddings | ||
|
||
if embeddings_model_name: | ||
embeddings = pkg.wrap_embeddings('huggingface', embeddings_model_name, None) | ||
else: | ||
embeddings = pkg.wrap_embeddings('openai', None, api_key) | ||
|
||
if dataset: | ||
test_set = load_dataset('csv', data_files=dataset) | ||
else: | ||
print('test_set not provided, using fiqa dataset') | ||
fiqa = load_dataset('explodinggradients/fiqa', 'ragas_eval') | ||
test_set = fiqa["baseline"].select(range(5)) | ||
|
||
ms = pkg.set_metrics(metrics, judge_model, embeddings, metrics) | ||
|
||
return evaluate(test_set, ms) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
# make a setup.py for evaluation package | ||
|
||
from setuptools import setup, find_packages | ||
|
||
with open("README.md", "r", encoding="utf-8") as f: | ||
long_description = f.read() | ||
|
||
setup( | ||
name="ragacli", | ||
version="0.0.1", | ||
author="Kielo", | ||
author_email="[email protected]", | ||
description="A one-step cli tool for RAGAS", | ||
long_description=long_description, | ||
long_description_content_type="text/markdown", | ||
packages=find_packages(), | ||
classifiers=[ | ||
"Programming Language :: Python :: 3", | ||
"License :: OSI Approved :: MIT License", | ||
"Operating System :: OS Independent", | ||
], | ||
python_requires=">=3.8", | ||
install_requires=[ | ||
'ragas', | ||
'langchain==0.0.354' | ||
] | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
import os | ||
from langchain.chat_models import ChatOpenAI | ||
from ragas.llms import RagasLLM | ||
from ragas.llms import LangchainLLM | ||
from ragas.embeddings import RagasEmbeddings | ||
from ragas.embeddings import OpenAIEmbeddings | ||
from ragas.embeddings import HuggingfaceEmbeddings | ||
from ragas.metrics.base import Metric | ||
|
||
from ragas.metrics import ( | ||
context_precision, | ||
context_recall, | ||
context_relevancy, | ||
answer_relevancy, | ||
answer_correctness, | ||
answer_similarity, | ||
faithfulness | ||
) | ||
|
||
DEFAULT_METRICS = [ | ||
"answer_relevancy", | ||
"context_precision", | ||
"faithfulness", | ||
"context_recall", | ||
"context_relevancy" | ||
] | ||
|
||
def wrap_langchain_llm( | ||
model: str, | ||
api_base: str | None, | ||
api_key: str | None | ||
) -> LangchainLLM: | ||
if api_base is None: | ||
print('api_base not provided, assuming OpenAI default') | ||
api_base = 'https://api.openai.com/v1' | ||
os.environ["OPENAI_API_KEY"] = api_key | ||
if api_key is None: | ||
raise ValueError("api_key must be provided") | ||
base = ChatOpenAI(model_name=model) | ||
else: | ||
os.environ["OPENAI_API_KEY"] = api_key | ||
os.environ["OPENAI_API_BASE"] = api_base | ||
base = ChatOpenAI( | ||
model_name=model, | ||
openai_api_key=api_key, | ||
openai_api_base=api_base | ||
) | ||
return LangchainLLM(llm=base) | ||
|
||
|
||
def set_metrics( | ||
metrics: list[str], | ||
llm: RagasLLM | None, | ||
embeddings: RagasEmbeddings | None | ||
) -> list[Metric]: | ||
ms = [] | ||
if llm: | ||
context_precision.llm = llm | ||
context_recall.llm = llm | ||
context_relevancy.llm = llm | ||
answer_correctness.llm = llm | ||
answer_similarity.llm = llm | ||
faithfulness.llm = llm | ||
if embeddings: | ||
answer_relevancy.embeddings = embeddings | ||
answer_correctness.embeddings = embeddings | ||
if not metrics: | ||
metrics = DEFAULT_METRICS | ||
for m in metrics: | ||
if m == 'context_precision': | ||
ms.append(context_precision) | ||
elif m == 'context_recall': | ||
ms.append(context_recall) | ||
elif m == 'context_relevancy': | ||
ms.append(context_relevancy) | ||
elif m == 'answer_relevancy': | ||
ms.append(answer_relevancy) | ||
elif m == 'answer_correctness': | ||
ms.append(answer_correctness) | ||
elif m == 'answer_similarity': | ||
ms.append(answer_similarity) | ||
elif m == 'faithfulness': | ||
ms.append(faithfulness) | ||
return ms | ||
|
||
def wrap_embeddings( | ||
model_type: str, | ||
model_name: str | None, | ||
api_key: str | None | ||
) -> RagasEmbeddings: | ||
if model_type == 'openai': | ||
return OpenAIEmbeddings(api_key=api_key) | ||
elif model_type == 'huggingface': | ||
return HuggingfaceEmbeddings(model_name=model_name) | ||
else: | ||
raise ValueError(f"Invalid model type: {model_type}") |