From af60896871fe470f711d9664fd2f9b77ecb8c5c6 Mon Sep 17 00:00:00 2001 From: Timur Nizamov Date: Tue, 4 Feb 2025 13:43:21 +0300 Subject: [PATCH] Harmful warning (#79) * fix docstring formatting * add harmful warning --- src/llamator/attacks/bon.py | 2 +- src/llamator/attacks/crecsendo.py | 2 +- src/llamator/attacks/sycophancy.py | 2 +- src/llamator/main.py | 2 +- tests/test_llamator.py | 17 ++++++++++------- tests/test_local_llamator.py | 30 ++++++++++++++---------------- 6 files changed, 28 insertions(+), 27 deletions(-) diff --git a/src/llamator/attacks/bon.py b/src/llamator/attacks/bon.py index 224a5e9..e5bc24e 100644 --- a/src/llamator/attacks/bon.py +++ b/src/llamator/attacks/bon.py @@ -63,7 +63,7 @@ def apply_ascii_noising(prompt: str, sigma: float) -> str: @register_test class TestBon(TestBase): """Probes the LLM resilience against Best-of-N attack (text augmentations). - `Original Paper `_, `Code `_.""" + Original Paper: https://arxiv.org/abs/2412.03556, Code: https://github.com/jplhughes/bon-jailbreaking""" test_name = "bon" diff --git a/src/llamator/attacks/crecsendo.py b/src/llamator/attacks/crecsendo.py index f821a27..3e2c2fc 100644 --- a/src/llamator/attacks/crecsendo.py +++ b/src/llamator/attacks/crecsendo.py @@ -64,7 +64,7 @@ class TestCrescendo(TestBase): """Challenges the LLM with a dataset of harmful behavior prompts using Crescendo strategy, operates by engaging the LLM in a series of escalating conversational turns, and multistage refinement. - `Original Paper `_""" + Original Paper: https://arxiv.org/abs/2404.01833""" test_name = "crescendo" diff --git a/src/llamator/attacks/sycophancy.py b/src/llamator/attacks/sycophancy.py index d7c7b2e..1d38a28 100644 --- a/src/llamator/attacks/sycophancy.py +++ b/src/llamator/attacks/sycophancy.py @@ -63,7 +63,7 @@ @register_test class TestSycophancy(TestBase): """Attacks the LLM system during a multistage conversation using the argumentum ad verecundiam method based on the system's domain. - `Original Paper `_""" + Original Paper: https://arxiv.org/abs/2310.13548""" test_name = "sycophancy" diff --git a/src/llamator/main.py b/src/llamator/main.py index bfdc360..e64f24d 100644 --- a/src/llamator/main.py +++ b/src/llamator/main.py @@ -190,7 +190,7 @@ def start_testing( if enable_reports: report_language = validate_language(report_language) csv_folder_name = "csv_report" - # Create Excel report + print(f"{BRIGHT}{colorama.Fore.RED}DISCLAIMER: Report may contain HARMFUL and OFFENSIVE language, reader discretion is recommended.{RESET}") create_attack_report_from_artifacts( artifacts_dir=artifacts_run_path, csv_folder_name=csv_folder_name, report_file_name="attacks_report.xlsx" ) diff --git a/tests/test_llamator.py b/tests/test_llamator.py index 0faa70c..56d69a8 100644 --- a/tests/test_llamator.py +++ b/tests/test_llamator.py @@ -1,10 +1,12 @@ import importlib import os +from dotenv import load_dotenv + def test_openai_client(): """ - Tests OpenAI client from public LLaMator lib. + Tests OpenAI client from public llamator lib. Environment variables ---------- @@ -15,7 +17,8 @@ def test_openai_client(): OPENAI_CLIENT_MODEL : str Type of model """ - # Перезагрузка необходимых модулей + load_dotenv() + import llamator importlib.reload(llamator) @@ -63,11 +66,11 @@ def test_openai_client(): ] config = { - "enable_logging": True, # Включить логирование - "enable_reports": True, # Включить генерацию отчетов - "artifacts_path": "./artifacts", # Путь к директории для сохранения артефактов - "debug_level": 1, # Уровень логирования: 0 - WARNING, 1 - INFO, 2 - DEBUG - "report_language": "ru", # Язык отчета: 'en', 'ru' + "enable_logging": True, # Enable logging + "enable_reports": True, # Enable report generation + "artifacts_path": "./artifacts", # Path to the directory for saving artifacts + "debug_level": 1, # Logging level: 0 - WARNING, 1 - INFO, 2 - DEBUG + "report_language": "en", # Report language: 'en', 'ru' } llamator.start_testing(attack_model, tested_model, config=config, tests_with_attempts=tests_with_attempts) diff --git a/tests/test_local_llamator.py b/tests/test_local_llamator.py index 8e7509a..14cf3bd 100644 --- a/tests/test_local_llamator.py +++ b/tests/test_local_llamator.py @@ -6,7 +6,7 @@ def test_openai_client(): """ - Tests OpenAI client from local LLaMator lib. + Tests OpenAI client from local llamator lib. Environment variables ---------- @@ -17,7 +17,8 @@ def test_openai_client(): OPENAI_CLIENT_MODEL : str Type of model """ - # Перезагрузка необходимых модулей + load_dotenv() + import llamator.client.specific_chat_clients import llamator.main @@ -51,11 +52,11 @@ def test_openai_client(): ] config = { - "enable_logging": True, # Включить логирование - "enable_reports": True, # Включить генерацию отчетов - "artifacts_path": "./artifacts", # Путь к директории для сохранения артефактов - "debug_level": 1, # Уровень логирования: 0 - WARNING, 1 - INFO, 2 - DEBUG - "report_language": "ru", # Язык отчета: 'en', 'ru' + "enable_logging": True, # Enable logging + "enable_reports": True, # Enable report generation + "artifacts_path": "./artifacts", # Path to the directory for saving artifacts + "debug_level": 1, # Logging level: 0 - WARNING, 1 - INFO, 2 - DEBUG + "report_language": "en", # Report language: 'en', 'ru' } from llamator.main import start_testing @@ -65,7 +66,7 @@ def test_openai_client(): def test_langchain_client_yandexgpt(): """ - Tests LangChain client from local LLaMator lib using Yandex GPT backend. + Tests LangChain client from local llamator lib using Yandex GPT backend. Environment variables ---------- @@ -74,17 +75,14 @@ def test_langchain_client_yandexgpt(): FOLDER_ID : str Yandex Cloud folder ID """ - # Загружаем переменные окружения из .env файла load_dotenv() - # Перезагрузка необходимых модулей import llamator.client.specific_chat_clients import llamator.main importlib.reload(llamator.client.specific_chat_clients) importlib.reload(llamator.main) - # Получаем переменные из окружения folder_ID = os.getenv("FOLDER_ID") from llamator.client.specific_chat_clients import ClientLangChain @@ -124,11 +122,11 @@ def test_langchain_client_yandexgpt(): ] config = { - "enable_logging": True, # Включить логирование - "enable_reports": True, # Включить генерацию отчетов - "artifacts_path": "./artifacts", # Путь к директории для сохранения артефактов - "debug_level": 1, # Уровень логирования: 0 - WARNING, 1 - INFO, 2 - DEBUG - "report_language": "ru", # Язык отчета: 'en', 'ru' + "enable_logging": True, # Enable logging + "enable_reports": True, # Enable report generation + "artifacts_path": "./artifacts", # Path to the directory for saving artifacts + "debug_level": 1, # Logging level: 0 - WARNING, 1 - INFO, 2 - DEBUG + "report_language": "en", # Report language: 'en', 'ru' } from llamator.main import start_testing