From 5c9e7d93611ea716aafe417a6019efb2fc87e79c Mon Sep 17 00:00:00 2001 From: hupe1980 <frankhuebner1980@gmail.com> Date: Mon, 29 Apr 2024 18:46:28 +0200 Subject: [PATCH] Add unicode tags converter --- aisploit/converters/__init__.py | 2 + aisploit/converters/unicode_tags.py | 23 ++++++++++ aisploit/scanner/plugins/prompt_injection.py | 1 - examples/converter.ipynb | 47 +++++++++++++++++++- 4 files changed, 71 insertions(+), 2 deletions(-) create mode 100644 aisploit/converters/unicode_tags.py diff --git a/aisploit/converters/__init__.py b/aisploit/converters/__init__.py index 9d65a8a..8dd101e 100644 --- a/aisploit/converters/__init__.py +++ b/aisploit/converters/__init__.py @@ -13,6 +13,7 @@ from .stemming import StemmingConverter from .translation import TranslationConverter from .unicode_confusable import UnicodeConfusableConverter +from .unicode_tags import UnicodeTagsConverter __all__ = [ "Base64Converter", @@ -30,4 +31,5 @@ "StemmingConverter", "TranslationConverter", "UnicodeConfusableConverter", + "UnicodeTagsConverter", ] diff --git a/aisploit/converters/unicode_tags.py b/aisploit/converters/unicode_tags.py new file mode 100644 index 0000000..87664ea --- /dev/null +++ b/aisploit/converters/unicode_tags.py @@ -0,0 +1,23 @@ +from dataclasses import dataclass + +from ..core import BaseConverter + + +@dataclass +class UnicodeTagsConverter(BaseConverter): + prefix: str = "" + suffix: str = "" + add_sequence_markers: bool = False + + def _convert(self, prompt: str) -> str: + encoded = "" + + if self.add_sequence_markers: + encoded += chr(0xE0001) + + encoded = ''.join(chr(0xE0000 + ord(ch)) for ch in prompt) + + if self.add_sequence_markers: + encoded += chr(0xE007F) + + return self.prefix + encoded + self.suffix diff --git a/aisploit/scanner/plugins/prompt_injection.py b/aisploit/scanner/plugins/prompt_injection.py index aa41a17..8ad08ec 100644 --- a/aisploit/scanner/plugins/prompt_injection.py +++ b/aisploit/scanner/plugins/prompt_injection.py @@ -31,7 +31,6 @@ class PromptInjectionPlugin(SendPromptsPlugin): def create_prompts(self) -> Sequence[str | Prompt]: """Create prompts using templates from the specified dataset.""" - prompts = [] for data in self.dataset: prompt_template = PromptTemplate.from_template(data.template) diff --git a/examples/converter.ipynb b/examples/converter.ipynb index f713d35..0e57930 100644 --- a/examples/converter.ipynb +++ b/examples/converter.ipynb @@ -24,7 +24,7 @@ } ], "source": [ - "from IPython.display import display, Markdown\n", + "from IPython.display import display, Markdown, HTML\n", "import os\n", "import textwrap\n", "from dotenv import load_dotenv\n", @@ -41,6 +41,7 @@ " StemmingConverter,\n", " UnicodeConfusableConverter,\n", " TranslationConverter,\n", + " UnicodeTagsConverter,\n", ")\n", "from aisploit.models import ChatOpenAI\n", "\n", @@ -58,6 +59,50 @@ ")" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## UnicodeTagsConverter" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<textarea rows=\"1\" cols=\"30\">STARTEND</textarea>" + ], + "text/plain": [ + "<IPython.core.display.HTML object>" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "AIMessage(content=\"It seems like you've made a mistake in your input. Could you please provide more context or details?\", response_metadata={'token_usage': {'completion_tokens': 21, 'prompt_tokens': 9, 'total_tokens': 30}, 'model_name': 'gpt-4', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-17e2154f-9504-45c4-ac36-f8b1b4f7ce01-0')" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "converter = UnicodeTagsConverter(prefix=\"START\", suffix=\"END\")\n", + "prompt = converter.convert(\"What is the capital of France?\")\n", + "\n", + "display(HTML(f'<textarea rows=\"1\" cols=\"30\">{prompt.to_string()}</textarea>'))\n", + "\n", + "chat_model.invoke(prompt)" + ] + }, { "cell_type": "markdown", "metadata": {},