diff --git a/ols/utils/token_handler.py b/ols/utils/token_handler.py index a9f06f0d..3b14fcb5 100644 --- a/ols/utils/token_handler.py +++ b/ols/utils/token_handler.py @@ -181,10 +181,11 @@ def limit_conversation_history( ) -> tuple[list[str], bool]: """Limit conversation history to specified number of tokens.""" total_length = 0 + formatted_history: list[str] = [] - for index, message in enumerate(reversed(history)): + for original_message in reversed(history): # Restructure messages as per model - message = restructure_history(message, model) + message = restructure_history(original_message, model) message_length = TokenHandler._get_token_count(self.text_to_tokens(message)) total_length += message_length @@ -194,6 +195,7 @@ def limit_conversation_history( logger.debug( "History truncated, it exceeds available %d tokens.", limit ) - return history[len(history) - index :], True + return formatted_history[::-1], True + formatted_history.append(message) - return history, False + return formatted_history[::-1], False diff --git a/pyproject.toml b/pyproject.toml index c92b16a8..f685f511 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ [tool.ruff] # description of all rules are available on https://docs.astral.sh/ruff/rules/ -lint.select = ["D", "E", "F", "W", "C", "S", "I", "TCH", "SLOT", "RUF", "C90", "N", "YTT", "ASYNC", "A", "C4", "T10", "PGH", "FURB", "PERF", "AIR", "NPY", "FLY"] +lint.select = ["D", "E", "F", "W", "C", "S", "I", "TCH", "SLOT", "RUF", "C90", "N", "YTT", "ASYNC", "A", "C4", "T10", "PGH", "FURB", "PERF", "AIR", "NPY", "FLY", "PLW2901"] # we need to check 'mood' of all docstrings, this needs to be enabled explicitly lint.extend-select = ["D401"]