Skip to content

Commit

Permalink
Feature/new search graph (#120)
Browse files Browse the repository at this point in the history
* add new search graph

* add new search graph

* move set route node. Add clear documents.

* remove temp search

---------

Co-authored-by: Saisakul Chernbumroong <[email protected]>
  • Loading branch information
saisakul and Saisakul Chernbumroong authored Feb 28, 2025
1 parent 5b79a0d commit 6ebecd4
Show file tree
Hide file tree
Showing 2 changed files with 103 additions and 46 deletions.
123 changes: 82 additions & 41 deletions redbox-core/redbox/graph/root.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from typing import List

from langchain_core.messages import AIMessage
from langchain_core.tools import StructuredTool
from langchain_core.vectorstores import VectorStoreRetriever
from langgraph.graph import END, START, StateGraph
Expand Down Expand Up @@ -40,7 +41,79 @@
from redbox.transform import structure_documents_by_file_name, structure_documents_by_group_and_indices


def get_summarise_graph(all_chunks_retriever):
def get_search_graph(
retriever: VectorStoreRetriever,
prompt_set: PromptSet = PromptSet.Search,
debug: bool = False,
final_sources: bool = True,
) -> CompiledGraph:
"""Creates a subgraph for retrieval augmented generation (RAG)."""
builder = StateGraph(RedboxState)

# Processes
builder.add_node("set_route_to_search", build_set_route_pattern(route=ChatRoute.search))
builder.add_node("llm_generate_query", build_chat_pattern(prompt_set=PromptSet.CondenseQuestion))
builder.add_node(
"retrieve_documents",
build_retrieve_pattern(
retriever=retriever,
structure_func=structure_documents_by_group_and_indices,
final_source_chain=final_sources,
),
)
builder.add_node(
"llm_answer_question",
build_stuff_pattern(prompt_set=prompt_set, final_response_chain=True),
retry=RetryPolicy(max_attempts=3),
)
builder.add_node("set_route_to_summarise", build_set_route_pattern(route=ChatRoute.summarise))

def rag_cannot_answer(llm_response: str):
if isinstance(llm_response, AIMessage):
llm_response = llm_response.content
return "unanswerable" in llm_response.lower()

builder.add_node(
"check_if_RAG_can_answer",
build_stuff_pattern(
prompt_set=PromptSet.SelfRoute,
output_parser=build_self_route_output_parser(
match_condition=rag_cannot_answer,
max_tokens_to_check=4,
final_response_chain=True,
),
final_response_chain=False,
),
retry=RetryPolicy(max_attempts=3),
)
builder.add_node("is_self_route_on", empty_process)
builder.add_node("clear_documents", clear_documents_process)
builder.add_node("RAG_cannot_answer", empty_process)
# Edges
builder.add_edge(START, "llm_generate_query")
builder.add_edge("llm_generate_query", "retrieve_documents")
builder.add_edge("retrieve_documents", "is_self_route_on")
builder.add_conditional_edges(
"is_self_route_on",
lambda s: s.request.ai_settings.self_route_enabled,
{True: "check_if_RAG_can_answer", False: "llm_answer_question"},
)
builder.add_edge("llm_answer_question", "set_route_to_search")
builder.add_edge("check_if_RAG_can_answer", "RAG_cannot_answer")

builder.add_conditional_edges(
"RAG_cannot_answer",
lambda s: rag_cannot_answer(s.last_message),
{True: "clear_documents", False: "set_route_to_search"},
)
builder.add_edge("clear_documents", "set_route_to_summarise")
builder.add_edge("set_route_to_summarise", END)
builder.add_edge("set_route_to_search", END)

return builder.compile(debug=debug)


def get_summarise_graph(all_chunks_retriever: VectorStoreRetriever, debug=True):
builder = StateGraph(RedboxState)
builder.add_node("choose_route_based_on_request_token", empty_process)
builder.add_node("set_route_to_summarise_large_doc", build_set_route_pattern(ChatRoute.chat_with_docs_map_reduce))
Expand Down Expand Up @@ -163,7 +236,7 @@ def get_summarise_graph(all_chunks_retriever):
builder.add_edge("summarise_document", "clear_documents")
builder.add_edge("clear_documents", END)
builder.add_edge("files_too_large_error", END)
return builder.compile()
return builder.compile(debug=debug)


def get_self_route_graph(retriever: VectorStoreRetriever, prompt_set: PromptSet, debug: bool = False):
Expand Down Expand Up @@ -244,43 +317,6 @@ def get_chat_graph(
return builder.compile(debug=debug)


def get_search_graph(
retriever: VectorStoreRetriever,
prompt_set: PromptSet = PromptSet.Search,
debug: bool = False,
final_sources: bool = True,
final_response: bool = True,
) -> CompiledGraph:
"""Creates a subgraph for retrieval augmented generation (RAG)."""
builder = StateGraph(RedboxState)

# Processes
builder.add_node("p_set_search_route", build_set_route_pattern(route=ChatRoute.search))
builder.add_node("p_condense_question", build_chat_pattern(prompt_set=PromptSet.CondenseQuestion))
builder.add_node(
"p_retrieve_docs",
build_retrieve_pattern(
retriever=retriever,
structure_func=structure_documents_by_group_and_indices,
final_source_chain=final_sources,
),
)
builder.add_node(
"p_stuff_docs",
build_stuff_pattern(prompt_set=prompt_set, final_response_chain=final_response),
retry=RetryPolicy(max_attempts=3),
)

# Edges
builder.add_edge(START, "p_set_search_route")
builder.add_edge("p_set_search_route", "p_condense_question")
builder.add_edge("p_condense_question", "p_retrieve_docs")
builder.add_edge("p_retrieve_docs", "p_stuff_docs")
builder.add_edge("p_stuff_docs", END)

return builder.compile(debug=debug)


def get_agentic_search_graph(tools: List[StructuredTool], debug: bool = False) -> CompiledGraph:
"""Creates a subgraph for agentic RAG."""

Expand Down Expand Up @@ -560,7 +596,7 @@ def get_root_graph(
builder.add_node("p_chat_with_documents", cwd_subgraph)
builder.add_node("p_retrieve_metadata", metadata_subgraph)
builder.add_node("p_new_route", new_route)
builder.add_node("p_summarise", get_summarise_graph(all_chunks_retriever))
builder.add_node("p_summarise", get_summarise_graph(all_chunks_retriever=all_chunks_retriever, debug=debug))

# Log
builder.add_node(
Expand Down Expand Up @@ -603,7 +639,12 @@ def get_root_graph(
False: "p_chat",
},
)
builder.add_edge("p_search", END)
builder.add_node("is_summarise_route", empty_process)
builder.add_edge("p_search", "is_summarise_route")
builder.add_conditional_edges(
"is_summarise_route", lambda s: s.route_name == ChatRoute.summarise, {True: "p_summarise", False: END}
)

builder.add_edge("p_search_agentic", END)
builder.add_edge("p_chat", END)
builder.add_edge("p_chat_with_documents", END)
Expand Down
26 changes: 21 additions & 5 deletions redbox-core/redbox/models/prompts.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,12 +168,28 @@
"guiding the user in providing the information needed for a complete solution."
)

SELF_ROUTE_SYSTEM_PROMPT = """Answer the user's question using only information from documents. Do not use your own knowledge or information from any other source. Analyze document carefully to find relevant information.
If document contains information that answers the question:
- Provide a direct, concise answer based solely on that information
- Reference specific parts of document when appropriate
- Be clear about what the document states vs. what might be inferred
If document does not contain information that addresses the question:
- Respond with "unanswerable"
- Do not attempt to guess or provide partial answers based on your own knowledge
- Do not apologize or explain why you can't answer
Important: Your response must either:
1. Contain ONLY information from documents
OR
2. Be EXACTLY and ONLY the two words "I can't"
There should never be any additional text, explanations, or your own knowledge in the response.
Remember: Only use information from documents. If the information isn't there, simply respond with "unanswerable".
"""

SELF_ROUTE_SYSTEM_PROMPT = (
"Given the list of extracted parts of long documents and a question, answer the question if possible.\n"
"If the question cannot be answered respond with only the word 'unanswerable' \n"
"If the question can be answered accurately from the documents given then give that response \n"
)

CHAT_MAP_SYSTEM_PROMPT = (
"Your goal is to extract the most important information and present it in "
Expand Down

0 comments on commit 6ebecd4

Please sign in to comment.