From e4e5597585e96c099843d03b79075c88690a138c Mon Sep 17 00:00:00 2001 From: Georgiy Tarasov Date: Mon, 16 Dec 2024 18:10:02 +0100 Subject: [PATCH 01/55] feat: perplexity prompt --- ee/hogai/memory/nodes.py | 7 +++++++ ee/hogai/memory/prompts.py | 21 +++++++++++++++++++++ ee/models/assistant.py | 11 +++++++++++ 3 files changed, 39 insertions(+) create mode 100644 ee/hogai/memory/nodes.py create mode 100644 ee/hogai/memory/prompts.py diff --git a/ee/hogai/memory/nodes.py b/ee/hogai/memory/nodes.py new file mode 100644 index 0000000000000..bd2196a42d11a --- /dev/null +++ b/ee/hogai/memory/nodes.py @@ -0,0 +1,7 @@ +from langchain_core.prompts import ChatPromptTemplate + +from ee.hogai.memory.prompts import INITIALIZE_CORE_MEMORY_PROMPT + + +def initialize_memory(): + _ = ChatPromptTemplate.from_messages([("human", INITIALIZE_CORE_MEMORY_PROMPT)]) diff --git a/ee/hogai/memory/prompts.py b/ee/hogai/memory/prompts.py new file mode 100644 index 0000000000000..23e32dad3f01a --- /dev/null +++ b/ee/hogai/memory/prompts.py @@ -0,0 +1,21 @@ +INITIALIZE_CORE_MEMORY_PROMPT = """ +Your goal is to describe what the startup with the given URL does. The provided URL is "{{url}}". + + +- Check the provided URL. If the URL has a subdomain, check the root domain first and then the subdomain. +- Retrieve information from the websites that provide information about businesses like Crunchbase, G2, LinkedIn, Hackernews, YCombinator, etc. + + + +- Describe the product itself and the market where the company operates. +- Describe the target audience of the product. +- Describe the business model of the company. +- List all features that the product has. +- Describe each feature in as much detail as possible. + + + +Output your answer in paragraphs with two to three sentences. Separate new paragraphs with a newline. +Answer "No data available." if the given website doesn't exist. + +""" diff --git a/ee/models/assistant.py b/ee/models/assistant.py index f2a31d938f5d0..83ccff4935bf5 100644 --- a/ee/models/assistant.py +++ b/ee/models/assistant.py @@ -89,3 +89,14 @@ class Meta: name="unique_checkpoint_write", ) ] + + +class CoreMemory(UUIDModel): + team = models.ForeignKey(Team, on_delete=models.CASCADE) + user = models.ForeignKey(User, on_delete=models.CASCADE) + text = models.TextField(default="", help_text="Dumped core memory where facts are separated by newlines.") + + class Meta: + constraints = [ + models.UniqueConstraint(fields=["team_id", "user_id"], name="unique_core_memory"), + ] From 900558aea296648e1dbbac2c9e58a34ab2f08677 Mon Sep 17 00:00:00 2001 From: Georgiy Tarasov Date: Tue, 17 Dec 2024 18:43:40 +0100 Subject: [PATCH 02/55] chore: migrations --- ee/hogai/memory/nodes.py | 2 +- requirements.in | 1 + requirements.txt | 29 ++++++++++++++++++++++++++++- 3 files changed, 30 insertions(+), 2 deletions(-) diff --git a/ee/hogai/memory/nodes.py b/ee/hogai/memory/nodes.py index bd2196a42d11a..0a225756a20a6 100644 --- a/ee/hogai/memory/nodes.py +++ b/ee/hogai/memory/nodes.py @@ -4,4 +4,4 @@ def initialize_memory(): - _ = ChatPromptTemplate.from_messages([("human", INITIALIZE_CORE_MEMORY_PROMPT)]) + _ = ChatPromptTemplate.from_messages([("human", INITIALIZE_CORE_MEMORY_PROMPT)], template_format="mustache") diff --git a/requirements.in b/requirements.in index c4564b0f1ecaf..ec38e5c16d1d5 100644 --- a/requirements.in +++ b/requirements.in @@ -46,6 +46,7 @@ jsonref==1.1.0 kafka-python==2.0.2 kombu==5.3.2 langchain==0.3.9 +langchain-community==0.3.2 langchain-openai==0.2.11 langfuse==2.55.0 langgraph==0.2.56 diff --git a/requirements.txt b/requirements.txt index ec758cf47f4ca..60f1ae8a0fae3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -14,6 +14,7 @@ aiohttp==3.11.10 # aiobotocore # geoip2 # langchain + # langchain-community # s3fs aioitertools==0.11.0 # via aiobotocore @@ -132,6 +133,8 @@ cssselect==1.1.0 # via toronado cssutils==1.0.2 # via toronado +dataclasses-json==0.6.7 + # via langchain-community decorator==5.1.1 # via retry defusedxml==0.6.0 @@ -347,10 +350,15 @@ kombu==5.3.2 # -r requirements.in # celery langchain==0.3.9 + # via + # -r requirements.in + # langchain-community +langchain-community==0.3.2 # via -r requirements.in langchain-core==0.3.21 # via # langchain + # langchain-community # langchain-openai # langchain-text-splitters # langgraph @@ -370,6 +378,7 @@ langgraph-sdk==0.1.43 langsmith==0.1.132 # via # langchain + # langchain-community # langchain-core lxml==4.9.4 # via @@ -382,6 +391,8 @@ lzstring==1.0.4 # via -r requirements.in makefun==1.15.2 # via dlt +marshmallow==3.23.1 + # via dataclasses-json maxminddb==2.2.0 # via geoip2 mimesis==5.2.1 @@ -399,6 +410,8 @@ multidict==6.0.5 # -r requirements.in # aiohttp # yarl +mypy-extensions==1.0.0 + # via typing-inspect nanoid==2.0.0 # via -r requirements.in natsort==8.4.0 @@ -409,6 +422,7 @@ numpy==1.23.3 # via # -r requirements.in # langchain + # langchain-community # pandas # pyarrow # scikit-learn @@ -439,6 +453,7 @@ packaging==24.1 # google-cloud-bigquery # langchain-core # langfuse + # marshmallow # snowflake-connector-python # sqlalchemy-bigquery # webdriver-manager @@ -517,8 +532,11 @@ pydantic==2.9.2 # langfuse # langsmith # openai + # pydantic-settings pydantic-core==2.23.4 # via pydantic +pydantic-settings==2.7.0 + # via langchain-community pyjwt==2.4.0 # via # -r requirements.in @@ -552,7 +570,9 @@ python-dateutil==2.8.2 # pendulum # posthoganalytics python-dotenv==0.21.0 - # via webdriver-manager + # via + # pydantic-settings + # webdriver-manager python-statsd==2.1.0 # via django-statsd python3-openid==3.1.0 @@ -576,6 +596,7 @@ pyyaml==6.0.1 # dlt # drf-spectacular # langchain + # langchain-community # langchain-core qrcode==7.4.2 # via django-two-factor-auth @@ -599,6 +620,7 @@ requests==2.32.0 # google-cloud-bigquery # infi-clickhouse-orm # langchain + # langchain-community # langfuse # langsmith # pdpyras @@ -694,6 +716,7 @@ sqlalchemy==2.0.31 # via # -r requirements.in # langchain + # langchain-community # snowflake-sqlalchemy # sqlalchemy-bigquery sqlalchemy-bigquery==1.11.0 @@ -720,6 +743,7 @@ tenacity==8.4.2 # celery-redbeat # dlt # langchain + # langchain-community # langchain-core threadpoolctl==3.3.0 # via scikit-learn @@ -762,6 +786,9 @@ typing-extensions==4.12.2 # sqlalchemy # stripe # temporalio + # typing-inspect +typing-inspect==0.9.0 + # via dataclasses-json tzdata==2023.3 # via # celery From 4afa58bc81adef9ffd73c25a526f05547a4b409e Mon Sep 17 00:00:00 2001 From: Georgiy Tarasov Date: Wed, 18 Dec 2024 14:35:37 +0100 Subject: [PATCH 03/55] feat: initial memory scraping --- ee/hogai/eval/tests/test_eval_router.py | 2 +- ee/hogai/graph.py | 18 +++- ee/hogai/memory/nodes.py | 134 +++++++++++++++++++++++- ee/hogai/memory/prompts.py | 29 ++++- ee/hogai/utils/types.py | 1 + ee/models/assistant.py | 20 +++- 6 files changed, 193 insertions(+), 11 deletions(-) diff --git a/ee/hogai/eval/tests/test_eval_router.py b/ee/hogai/eval/tests/test_eval_router.py index 84e5c4c809972..7c4a3325ea892 100644 --- a/ee/hogai/eval/tests/test_eval_router.py +++ b/ee/hogai/eval/tests/test_eval_router.py @@ -13,7 +13,7 @@ def call_node(team, runnable_config) -> Callable[[str | list], str]: graph: CompiledStateGraph = ( AssistantGraph(team) - .add_start() + .add_edge(AssistantNodeName.START, AssistantNodeName.ROUTER) .add_router(path_map={"trends": AssistantNodeName.END, "funnel": AssistantNodeName.END}) .compile() ) diff --git a/ee/hogai/graph.py b/ee/hogai/graph.py index bf961d6bb9aa8..cdf4dfd25cf78 100644 --- a/ee/hogai/graph.py +++ b/ee/hogai/graph.py @@ -11,6 +11,7 @@ FunnelPlannerNode, FunnelPlannerToolsNode, ) +from ee.hogai.memory.nodes import MemoryInitializer from ee.hogai.router.nodes import RouterNode from ee.hogai.summarizer.nodes import SummarizerNode from ee.hogai.trends.nodes import ( @@ -49,9 +50,6 @@ def compile(self): raise ValueError("Start node not added to the graph") return self._graph.compile(checkpointer=checkpointer) - def add_start(self): - return self.add_edge(AssistantNodeName.START, AssistantNodeName.ROUTER) - def add_router( self, path_map: Optional[dict[Hashable, AssistantNodeName]] = None, @@ -171,9 +169,21 @@ def add_summarizer(self, next_node: AssistantNodeName = AssistantNodeName.END): builder.add_edge(AssistantNodeName.SUMMARIZER, next_node) return self + def add_memory_initializer(self, next_node: AssistantNodeName = AssistantNodeName.ROUTER): + builder = self._graph + memory_initializer = MemoryInitializer(self._team) + builder.add_node(AssistantNodeName.MEMORY_INITIALIZER, memory_initializer.run) + builder.add_conditional_edges( + AssistantNodeName.START, + memory_initializer.should_run, + path_map={True: AssistantNodeName.MEMORY_INITIALIZER, False: next_node}, + ) + builder.add_edge(AssistantNodeName.MEMORY_INITIALIZER, next_node) + return self + def compile_full_graph(self): return ( - self.add_start() + self.add_memory_initializer() .add_router() .add_trends_planner() .add_trends_generator() diff --git a/ee/hogai/memory/nodes.py b/ee/hogai/memory/nodes.py index 0a225756a20a6..8749230650723 100644 --- a/ee/hogai/memory/nodes.py +++ b/ee/hogai/memory/nodes.py @@ -1,7 +1,135 @@ +from typing import Literal +from uuid import uuid4 + +from langchain_community.chat_models import ChatPerplexity +from langchain_core.output_parsers import StrOutputParser from langchain_core.prompts import ChatPromptTemplate +from langchain_core.runnables import RunnableConfig +from langgraph.errors import NodeInterrupt + +from ee.hogai.memory.prompts import ( + FAILED_SCRAPING_MESSAGE, + INITIALIZE_CORE_MEMORY_PROMPT_WITH_BUNDLE_IDS, + INITIALIZE_CORE_MEMORY_PROMPT_WITH_URL, +) +from ee.hogai.utils.helpers import find_last_message_of_type +from ee.hogai.utils.nodes import AssistantNode +from ee.hogai.utils.types import AssistantState, PartialAssistantState +from ee.models.assistant import CoreMemory +from posthog.hogql_queries.ai.event_taxonomy_query_runner import EventTaxonomyQueryRunner +from posthog.hogql_queries.query_runner import ExecutionMode +from posthog.models import Team +from posthog.schema import AssistantMessage, CachedEventTaxonomyQueryResponse, EventTaxonomyQuery, HumanMessage + + +class MemoryInitializerNode(AssistantNode): + _team: Team + + def __init__(self, team: Team): + self._team = team + + def run(self, state: AssistantState, config: RunnableConfig) -> PartialAssistantState: + core_memory = CoreMemory.objects.get_or_create(team=self._team) + retrieved_properties = self._retrieve_context() + + # No host or app bundle ID found, continue. + if not retrieved_properties or retrieved_properties[0].sample_count == 0: + core_memory.scraping_status = CoreMemory.ScrapingStatus.SKIPPED + core_memory.save() + return PartialAssistantState() + + retrieved_prop = retrieved_properties[0] + if retrieved_prop.property == "$host": + prompt = ChatPromptTemplate.from_messages( + [("human", INITIALIZE_CORE_MEMORY_PROMPT_WITH_URL)], template_format="mustache" + ).partial(url=retrieved_prop.sample_values[0]) + else: + prompt = ChatPromptTemplate.from_messages( + [("human", INITIALIZE_CORE_MEMORY_PROMPT_WITH_BUNDLE_IDS)], template_format="mustache" + ).partial(bundle_ids=retrieved_prop.sample_values) + + chain = prompt | self._model() | StrOutputParser() + answer = chain.invoke({}, config=config) + + if "no data available." in answer.lower(): + core_memory.scraping_status = CoreMemory.ScrapingStatus.COMPLETED + core_memory.save() + + return PartialAssistantState( + messages=[ + AssistantMessage( + content=FAILED_SCRAPING_MESSAGE, + id=uuid4(), + ) + ] + ) + + return PartialAssistantState(messages=[AssistantMessage(content=answer, id=uuid4())]) + + def should_run(self, _: AssistantState) -> bool: + core_memory: CoreMemory | None = self._team.core_memories.first() + return not core_memory or not core_memory.is_scraping_finished + + def router(self, state: AssistantState) -> Literal["interrupt", "next_node"]: + last_message = state.messages[-1] + if ( + isinstance(last_message, HumanMessage) + or isinstance(last_message, AssistantMessage) + and last_message.content == FAILED_SCRAPING_MESSAGE + ): + return "next_node" + return "interrupt" + + def _model(self): + return ChatPerplexity(model="llama-3.1-sonar-huge-128k-online", streaming=True) + + def _retrieve_context(self): + # Retrieve the origin URL. + runner = EventTaxonomyQueryRunner( + team=self._team, query=EventTaxonomyQuery(event="$pageview", properties=["$host"]) + ) + response = runner.run(ExecutionMode.RECENT_CACHE_CALCULATE_ASYNC_IF_STALE_AND_BLOCKING_ON_MISS) + if not isinstance(response, CachedEventTaxonomyQueryResponse): + raise ValueError("Failed to query the event taxonomy.") + # Otherwise, retrieve the app bundle ID. + if not response.results: + runner = EventTaxonomyQueryRunner( + team=self._team, query=EventTaxonomyQuery(event="$screen", properties=["$app_namespace"]) + ) + response = runner.run(ExecutionMode.RECENT_CACHE_CALCULATE_ASYNC_IF_STALE_AND_BLOCKING_ON_MISS) + if not isinstance(response, CachedEventTaxonomyQueryResponse): + raise ValueError("Failed to query the event taxonomy.") + return response.results + + +class MemoryInitializerInterruptNode(AssistantNode): + def run(self, state: AssistantState, config: RunnableConfig) -> PartialAssistantState: + last_message = state.messages[-1] + if isinstance(last_message, AssistantMessage): + raise NodeInterrupt("Does it look like a good summary of what your product does?") + if not isinstance(last_message, HumanMessage): + raise ValueError("Last message is not a human message.") + if "yes" in last_message.content.lower(): + return PartialAssistantState( + messages=[ + AssistantMessage( + content="All right, let's skip this step. You could edit my initial memory in Settings.", + id=uuid4(), + ) + ] + ) -from ee.hogai.memory.prompts import INITIALIZE_CORE_MEMORY_PROMPT + core_memory = CoreMemory.objects.get(team=self._team) + assistant_message = find_last_message_of_type(state.messages, AssistantMessage) + if not assistant_message: + raise ValueError("No memory message found.") -def initialize_memory(): - _ = ChatPromptTemplate.from_messages([("human", INITIALIZE_CORE_MEMORY_PROMPT)], template_format="mustache") + core_memory.set_core_memory(assistant_message.content) + return PartialAssistantState( + messages=[ + AssistantMessage( + content="Thanks! I've updated my initial memory. Let me help with your request.", id=uuid4() + ) + ] + ) diff --git a/ee/hogai/memory/prompts.py b/ee/hogai/memory/prompts.py index 23e32dad3f01a..a8004fcb6e3da 100644 --- a/ee/hogai/memory/prompts.py +++ b/ee/hogai/memory/prompts.py @@ -1,4 +1,4 @@ -INITIALIZE_CORE_MEMORY_PROMPT = """ +INITIALIZE_CORE_MEMORY_PROMPT_WITH_URL = """ Your goal is to describe what the startup with the given URL does. The provided URL is "{{url}}". @@ -19,3 +19,30 @@ Answer "No data available." if the given website doesn't exist. """ + +INITIALIZE_CORE_MEMORY_PROMPT_WITH_BUNDLE_IDS = """ +Your goal is to describe what the startup with the given application bundle ID{{#bundle_ids.length>1}}s{{/bundle_ids.length>1}} does. The provided bundle ID{{#bundle_ids.length > 1}}s are{{/bundle_ids.length > 1}}{{^bundle_ids.length > 1}} is{{/bundle_ids.length > 1}} {{#bundle_ids}}"{{.}}"{{^last}}, {{/last}}{{/bundle_ids}}. + + +- Retrieve information about app identifiers from app listings of App Store and Google Play. +- If a website URL is provided on the app listing, check the website and retrieve information about the app. +- Retrieve information from the websites that provide information about businesses like Crunchbase, G2, LinkedIn, Hackernews, YCombinator, etc. + + + +- Describe the product itself and the market where the company operates. +- Describe the target audience of the product. +- Describe the business model of the company. +- List all features that the product has. +- Describe each feature in as much detail as possible. + + + +Output your answer in paragraphs with two to three sentences. Separate new paragraphs with a newline. +Answer "No data available." if the given website doesn't exist. + +""" + +FAILED_SCRAPING_MESSAGE = """ +Unfortunately, I couldn't find any information about your product. You could edit my initial memory in Settings. Let me help with your request. +""".strip() diff --git a/ee/hogai/utils/types.py b/ee/hogai/utils/types.py index 917edb3d4987e..907d168af58b8 100644 --- a/ee/hogai/utils/types.py +++ b/ee/hogai/utils/types.py @@ -41,6 +41,7 @@ class PartialAssistantState(_SharedAssistantState): class AssistantNodeName(StrEnum): START = START END = END + MEMORY_INITIALIZER = "memory_initializer" ROUTER = "router" TRENDS_PLANNER = "trends_planner" TRENDS_PLANNER_TOOLS = "trends_planner_tools" diff --git a/ee/models/assistant.py b/ee/models/assistant.py index 83ccff4935bf5..323583f6962a0 100644 --- a/ee/models/assistant.py +++ b/ee/models/assistant.py @@ -92,11 +92,27 @@ class Meta: class CoreMemory(UUIDModel): - team = models.ForeignKey(Team, on_delete=models.CASCADE) - user = models.ForeignKey(User, on_delete=models.CASCADE) + class ScrapingStatus(models.TextChoices): + PENDING = "pending", "Pending" + COMPLETED = "completed", "Completed" + SKIPPED = "skipped", "Skipped" + + team = models.ForeignKey(Team, on_delete=models.CASCADE, related_name="core_memories") text = models.TextField(default="", help_text="Dumped core memory where facts are separated by newlines.") + initial_text = models.TextField(default="", help_text="Scraped memory about the business.") + scraping_status = models.CharField(max_length=20, choices=ScrapingStatus.choices, blank=True, null=True) class Meta: constraints = [ models.UniqueConstraint(fields=["team_id", "user_id"], name="unique_core_memory"), ] + + @property + def is_scraping_finished(self) -> bool: + return self.scraping_status in [CoreMemory.ScrapingStatus.COMPLETED, CoreMemory.ScrapingStatus.SKIPPED] + + def set_core_memory(self, text: str): + self.text = text + self.initial_text = text + self.scraping_status = CoreMemory.ScrapingStatus.COMPLETED + self.save() From 995e1f0a20f6360fafc28b3e7561a9f48168e8a9 Mon Sep 17 00:00:00 2001 From: Georgiy Tarasov Date: Wed, 18 Dec 2024 15:37:21 +0100 Subject: [PATCH 04/55] feat: block onboarding when someone has started it --- ee/hogai/graph.py | 14 +++++++++++--- ee/hogai/memory/nodes.py | 19 +++++++++++-------- ee/hogai/utils/types.py | 1 + ee/models/assistant.py | 24 +++++++++++++++++++----- 4 files changed, 42 insertions(+), 16 deletions(-) diff --git a/ee/hogai/graph.py b/ee/hogai/graph.py index cdf4dfd25cf78..8e6d2268e01a1 100644 --- a/ee/hogai/graph.py +++ b/ee/hogai/graph.py @@ -11,7 +11,7 @@ FunnelPlannerNode, FunnelPlannerToolsNode, ) -from ee.hogai.memory.nodes import MemoryInitializer +from ee.hogai.memory.nodes import MemoryInitializerInterruptNode, MemoryInitializerNode from ee.hogai.router.nodes import RouterNode from ee.hogai.summarizer.nodes import SummarizerNode from ee.hogai.trends.nodes import ( @@ -171,14 +171,22 @@ def add_summarizer(self, next_node: AssistantNodeName = AssistantNodeName.END): def add_memory_initializer(self, next_node: AssistantNodeName = AssistantNodeName.ROUTER): builder = self._graph - memory_initializer = MemoryInitializer(self._team) + self._has_start_node = True + memory_initializer = MemoryInitializerNode(self._team) + memory_initializer_interrupt = MemoryInitializerInterruptNode(self._team) builder.add_node(AssistantNodeName.MEMORY_INITIALIZER, memory_initializer.run) + builder.add_node(AssistantNodeName.MEMORY_INITIALIZER_INTERRUPT, memory_initializer_interrupt.run) builder.add_conditional_edges( AssistantNodeName.START, memory_initializer.should_run, path_map={True: AssistantNodeName.MEMORY_INITIALIZER, False: next_node}, ) - builder.add_edge(AssistantNodeName.MEMORY_INITIALIZER, next_node) + builder.add_conditional_edges( + AssistantNodeName.MEMORY_INITIALIZER, + memory_initializer.router, + path_map={"next_node": next_node, "interrupt": AssistantNodeName.MEMORY_INITIALIZER_INTERRUPT}, + ) + builder.add_edge(AssistantNodeName.MEMORY_INITIALIZER_INTERRUPT, next_node) return self def compile_full_graph(self): diff --git a/ee/hogai/memory/nodes.py b/ee/hogai/memory/nodes.py index 8749230650723..185d2e5924808 100644 --- a/ee/hogai/memory/nodes.py +++ b/ee/hogai/memory/nodes.py @@ -29,15 +29,16 @@ def __init__(self, team: Team): self._team = team def run(self, state: AssistantState, config: RunnableConfig) -> PartialAssistantState: - core_memory = CoreMemory.objects.get_or_create(team=self._team) + core_memory, _ = CoreMemory.objects.get_or_create(team=self._team) retrieved_properties = self._retrieve_context() # No host or app bundle ID found, continue. if not retrieved_properties or retrieved_properties[0].sample_count == 0: - core_memory.scraping_status = CoreMemory.ScrapingStatus.SKIPPED - core_memory.save() + core_memory.change_status_to_skipped() return PartialAssistantState() + core_memory.change_status_to_pending() + retrieved_prop = retrieved_properties[0] if retrieved_prop.property == "$host": prompt = ChatPromptTemplate.from_messages( @@ -51,10 +52,9 @@ def run(self, state: AssistantState, config: RunnableConfig) -> PartialAssistant chain = prompt | self._model() | StrOutputParser() answer = chain.invoke({}, config=config) + # Perplexity has failed to scrape the data, continue. if "no data available." in answer.lower(): - core_memory.scraping_status = CoreMemory.ScrapingStatus.COMPLETED - core_memory.save() - + core_memory.change_status_to_skipped() return PartialAssistantState( messages=[ AssistantMessage( @@ -67,8 +67,11 @@ def run(self, state: AssistantState, config: RunnableConfig) -> PartialAssistant return PartialAssistantState(messages=[AssistantMessage(content=answer, id=uuid4())]) def should_run(self, _: AssistantState) -> bool: - core_memory: CoreMemory | None = self._team.core_memories.first() - return not core_memory or not core_memory.is_scraping_finished + try: + core_memory = CoreMemory.objects.get(team=self._team) + except CoreMemory.DoesNotExist: + return True + return not core_memory.is_scraping_pending def router(self, state: AssistantState) -> Literal["interrupt", "next_node"]: last_message = state.messages[-1] diff --git a/ee/hogai/utils/types.py b/ee/hogai/utils/types.py index 907d168af58b8..6972b368f8d29 100644 --- a/ee/hogai/utils/types.py +++ b/ee/hogai/utils/types.py @@ -42,6 +42,7 @@ class AssistantNodeName(StrEnum): START = START END = END MEMORY_INITIALIZER = "memory_initializer" + MEMORY_INITIALIZER_INTERRUPT = "memory_initializer_interrupt" ROUTER = "router" TRENDS_PLANNER = "trends_planner" TRENDS_PLANNER_TOOLS = "trends_planner_tools" diff --git a/ee/models/assistant.py b/ee/models/assistant.py index 323583f6962a0..f55feb889177e 100644 --- a/ee/models/assistant.py +++ b/ee/models/assistant.py @@ -1,6 +1,8 @@ from collections.abc import Iterable +from datetime import timedelta from django.db import models +from django.utils import timezone from langgraph.checkpoint.serde.types import TASKS from posthog.models.team.team import Team @@ -97,15 +99,27 @@ class ScrapingStatus(models.TextChoices): COMPLETED = "completed", "Completed" SKIPPED = "skipped", "Skipped" - team = models.ForeignKey(Team, on_delete=models.CASCADE, related_name="core_memories") + team = models.OneToOneField(Team, on_delete=models.CASCADE) text = models.TextField(default="", help_text="Dumped core memory where facts are separated by newlines.") initial_text = models.TextField(default="", help_text="Scraped memory about the business.") scraping_status = models.CharField(max_length=20, choices=ScrapingStatus.choices, blank=True, null=True) + scraping_started_at = models.DateTimeField(null=True) - class Meta: - constraints = [ - models.UniqueConstraint(fields=["team_id", "user_id"], name="unique_core_memory"), - ] + def change_status_to_pending(self): + self.scraping_started_at = timezone.now() + self.scraping_status = CoreMemory.ScrapingStatus.PENDING + self.save() + + def change_status_to_skipped(self): + self.scraping_status = CoreMemory.ScrapingStatus.SKIPPED + self.save() + + @property + def is_scraping_pending(self) -> bool: + return ( + self.scraping_status == CoreMemory.ScrapingStatus.PENDING + and self.scraping_started_at < timezone.now() - timedelta(minutes=5) + ) @property def is_scraping_finished(self) -> bool: From 8c4fa2e5f5d7924bd54aa506fcf8ef5a91c883c6 Mon Sep 17 00:00:00 2001 From: Georgiy Tarasov Date: Wed, 18 Dec 2024 17:22:24 +0100 Subject: [PATCH 05/55] feat: assistant onboarding --- ee/hogai/assistant.py | 19 ++++-- ee/hogai/graph.py | 19 ++++-- ee/hogai/memory/nodes.py | 121 +++++++++++++++++++++++---------------- ee/hogai/utils/types.py | 1 + 4 files changed, 102 insertions(+), 58 deletions(-) diff --git a/ee/hogai/assistant.py b/ee/hogai/assistant.py index 17a1c6341b667..8d49be91f62ec 100644 --- a/ee/hogai/assistant.py +++ b/ee/hogai/assistant.py @@ -59,6 +59,13 @@ AssistantNodeName.FUNNEL_GENERATOR: FunnelGeneratorNode, } +STREAMING_NODES: set[AssistantNodeName] = { + AssistantNodeName.MEMORY_ONBOARDING, + AssistantNodeName.MEMORY_INITIALIZER, + AssistantNodeName.MEMORY_INITIALIZER_INTERRUPT, + AssistantNodeName.SUMMARIZER, +} + class Assistant: _team: Team @@ -225,10 +232,12 @@ def _process_value_update(self, update: GraphValueUpdateTuple) -> BaseModel | No return node_val.messages[0] elif node_val.intermediate_steps: return AssistantGenerationStatusEvent(type=AssistantGenerationStatusType.GENERATION_ERROR) - elif node_val := state_update.get(AssistantNodeName.SUMMARIZER): - if isinstance(node_val, PartialAssistantState) and node_val.messages: - self._chunks = AIMessageChunk(content="") - return node_val.messages[0] + + for node_name in STREAMING_NODES: + if node_val := state_update.get(node_name): + if isinstance(node_val, PartialAssistantState) and node_val.messages: + self._chunks = AIMessageChunk(content="") + return node_val.messages[0] return None @@ -243,7 +252,7 @@ def _process_message_update(self, update: GraphMessageUpdateTuple) -> BaseModel if parsed_message: initiator_id = self._state.start_id if self._state is not None else None return VisualizationMessage(answer=parsed_message.query, initiator=initiator_id) - elif langgraph_state["langgraph_node"] == AssistantNodeName.SUMMARIZER: + elif langgraph_state["langgraph_node"] in STREAMING_NODES: self._chunks += langchain_message # type: ignore return AssistantMessage(content=self._chunks.content) return None diff --git a/ee/hogai/graph.py b/ee/hogai/graph.py index 8e6d2268e01a1..f471c1def48dc 100644 --- a/ee/hogai/graph.py +++ b/ee/hogai/graph.py @@ -11,7 +11,7 @@ FunnelPlannerNode, FunnelPlannerToolsNode, ) -from ee.hogai.memory.nodes import MemoryInitializerInterruptNode, MemoryInitializerNode +from ee.hogai.memory.nodes import MemoryInitializerInterruptNode, MemoryInitializerNode, MemoryOnboardingNode from ee.hogai.router.nodes import RouterNode from ee.hogai.summarizer.nodes import SummarizerNode from ee.hogai.trends.nodes import ( @@ -172,21 +172,32 @@ def add_summarizer(self, next_node: AssistantNodeName = AssistantNodeName.END): def add_memory_initializer(self, next_node: AssistantNodeName = AssistantNodeName.ROUTER): builder = self._graph self._has_start_node = True + + memory_onboarding = MemoryOnboardingNode(self._team) memory_initializer = MemoryInitializerNode(self._team) memory_initializer_interrupt = MemoryInitializerInterruptNode(self._team) + + builder.add_node(AssistantNodeName.MEMORY_ONBOARDING, memory_onboarding.run) builder.add_node(AssistantNodeName.MEMORY_INITIALIZER, memory_initializer.run) builder.add_node(AssistantNodeName.MEMORY_INITIALIZER_INTERRUPT, memory_initializer_interrupt.run) + builder.add_conditional_edges( AssistantNodeName.START, - memory_initializer.should_run, - path_map={True: AssistantNodeName.MEMORY_INITIALIZER, False: next_node}, + memory_onboarding.should_run, + path_map={True: AssistantNodeName.MEMORY_ONBOARDING, False: next_node}, + ) + builder.add_conditional_edges( + AssistantNodeName.MEMORY_ONBOARDING, + memory_onboarding.router, + path_map={"continue": next_node, "initialize_memory": AssistantNodeName.MEMORY_INITIALIZER}, ) builder.add_conditional_edges( AssistantNodeName.MEMORY_INITIALIZER, memory_initializer.router, - path_map={"next_node": next_node, "interrupt": AssistantNodeName.MEMORY_INITIALIZER_INTERRUPT}, + path_map={"continue": next_node, "interrupt": AssistantNodeName.MEMORY_INITIALIZER_INTERRUPT}, ) builder.add_edge(AssistantNodeName.MEMORY_INITIALIZER_INTERRUPT, next_node) + return self def compile_full_graph(self): diff --git a/ee/hogai/memory/nodes.py b/ee/hogai/memory/nodes.py index 185d2e5924808..2918410af9d76 100644 --- a/ee/hogai/memory/nodes.py +++ b/ee/hogai/memory/nodes.py @@ -1,6 +1,7 @@ from typing import Literal from uuid import uuid4 +from attr import dataclass from langchain_community.chat_models import ChatPerplexity from langchain_core.output_parsers import StrOutputParser from langchain_core.prompts import ChatPromptTemplate @@ -22,12 +23,34 @@ from posthog.schema import AssistantMessage, CachedEventTaxonomyQueryResponse, EventTaxonomyQuery, HumanMessage -class MemoryInitializerNode(AssistantNode): - _team: Team +class MemoryInitializerContextMixin: + def _retrieve_context(self): + @dataclass + class Mock: + sample_count: int + property: str + sample_values: list[str] + + return [Mock(sample_count=1, property="$host", sample_values=["https://posthog.com"])] + # Retrieve the origin URL. + runner = EventTaxonomyQueryRunner( + team=self._team, query=EventTaxonomyQuery(event="$pageview", properties=["$host"]) + ) + response = runner.run(ExecutionMode.RECENT_CACHE_CALCULATE_ASYNC_IF_STALE_AND_BLOCKING_ON_MISS) + if not isinstance(response, CachedEventTaxonomyQueryResponse): + raise ValueError("Failed to query the event taxonomy.") + # Otherwise, retrieve the app bundle ID. + if not response.results: + runner = EventTaxonomyQueryRunner( + team=self._team, query=EventTaxonomyQuery(event="$screen", properties=["$app_namespace"]) + ) + response = runner.run(ExecutionMode.RECENT_CACHE_CALCULATE_ASYNC_IF_STALE_AND_BLOCKING_ON_MISS) + if not isinstance(response, CachedEventTaxonomyQueryResponse): + raise ValueError("Failed to query the event taxonomy.") + return response.results - def __init__(self, team: Team): - self._team = team +class MemoryOnboardingNode(MemoryInitializerContextMixin, AssistantNode): def run(self, state: AssistantState, config: RunnableConfig) -> PartialAssistantState: core_memory, _ = CoreMemory.objects.get_or_create(team=self._team) retrieved_properties = self._retrieve_context() @@ -38,6 +61,42 @@ def run(self, state: AssistantState, config: RunnableConfig) -> PartialAssistant return PartialAssistantState() core_memory.change_status_to_pending() + return PartialAssistantState( + messages=[ + AssistantMessage( + content="Hey, my name is Max. Before we start, let's find and verify information about your product.", + id=str(uuid4()), + ) + ] + ) + + def should_run(self, _: AssistantState) -> bool: + try: + core_memory = CoreMemory.objects.get(team=self._team) + except CoreMemory.DoesNotExist: + return True + return not core_memory.is_scraping_pending + + def router(self, state: AssistantState) -> Literal["initialize_memory", "continue"]: + last_message = state.messages[-1] + if isinstance(last_message, HumanMessage): + return "continue" + return "initialize_memory" + + +class MemoryInitializerNode(MemoryInitializerContextMixin, AssistantNode): + _team: Team + + def __init__(self, team: Team): + self._team = team + + def run(self, state: AssistantState, config: RunnableConfig) -> PartialAssistantState: + core_memory, _ = CoreMemory.objects.get_or_create(team=self._team) + retrieved_properties = self._retrieve_context() + + # No host or app bundle ID found, continue. + if not retrieved_properties or retrieved_properties[0].sample_count == 0: + raise ValueError("No host or app bundle ID found in the memory initializer.") retrieved_prop = retrieved_properties[0] if retrieved_prop.property == "$host": @@ -55,54 +114,17 @@ def run(self, state: AssistantState, config: RunnableConfig) -> PartialAssistant # Perplexity has failed to scrape the data, continue. if "no data available." in answer.lower(): core_memory.change_status_to_skipped() - return PartialAssistantState( - messages=[ - AssistantMessage( - content=FAILED_SCRAPING_MESSAGE, - id=uuid4(), - ) - ] - ) - - return PartialAssistantState(messages=[AssistantMessage(content=answer, id=uuid4())]) - - def should_run(self, _: AssistantState) -> bool: - try: - core_memory = CoreMemory.objects.get(team=self._team) - except CoreMemory.DoesNotExist: - return True - return not core_memory.is_scraping_pending + return PartialAssistantState(messages=[AssistantMessage(content=FAILED_SCRAPING_MESSAGE, id=str(uuid4()))]) + return PartialAssistantState(messages=[AssistantMessage(content=answer, id=str(uuid4()))]) - def router(self, state: AssistantState) -> Literal["interrupt", "next_node"]: + def router(self, state: AssistantState) -> Literal["interrupt", "continue"]: last_message = state.messages[-1] - if ( - isinstance(last_message, HumanMessage) - or isinstance(last_message, AssistantMessage) - and last_message.content == FAILED_SCRAPING_MESSAGE - ): - return "next_node" + if isinstance(last_message, AssistantMessage) and last_message.content == FAILED_SCRAPING_MESSAGE: + return "continue" return "interrupt" def _model(self): - return ChatPerplexity(model="llama-3.1-sonar-huge-128k-online", streaming=True) - - def _retrieve_context(self): - # Retrieve the origin URL. - runner = EventTaxonomyQueryRunner( - team=self._team, query=EventTaxonomyQuery(event="$pageview", properties=["$host"]) - ) - response = runner.run(ExecutionMode.RECENT_CACHE_CALCULATE_ASYNC_IF_STALE_AND_BLOCKING_ON_MISS) - if not isinstance(response, CachedEventTaxonomyQueryResponse): - raise ValueError("Failed to query the event taxonomy.") - # Otherwise, retrieve the app bundle ID. - if not response.results: - runner = EventTaxonomyQueryRunner( - team=self._team, query=EventTaxonomyQuery(event="$screen", properties=["$app_namespace"]) - ) - response = runner.run(ExecutionMode.RECENT_CACHE_CALCULATE_ASYNC_IF_STALE_AND_BLOCKING_ON_MISS) - if not isinstance(response, CachedEventTaxonomyQueryResponse): - raise ValueError("Failed to query the event taxonomy.") - return response.results + return ChatPerplexity(model="llama-3.1-sonar-large-128k-online", streaming=True) class MemoryInitializerInterruptNode(AssistantNode): @@ -117,7 +139,7 @@ def run(self, state: AssistantState, config: RunnableConfig) -> PartialAssistant messages=[ AssistantMessage( content="All right, let's skip this step. You could edit my initial memory in Settings.", - id=uuid4(), + id=str(uuid4()), ) ] ) @@ -132,7 +154,8 @@ def run(self, state: AssistantState, config: RunnableConfig) -> PartialAssistant return PartialAssistantState( messages=[ AssistantMessage( - content="Thanks! I've updated my initial memory. Let me help with your request.", id=uuid4() + content="Thanks! I've updated my initial memory. Let me help with your request.", + id=str(uuid4()), ) ] ) diff --git a/ee/hogai/utils/types.py b/ee/hogai/utils/types.py index 6972b368f8d29..15a52ca90a218 100644 --- a/ee/hogai/utils/types.py +++ b/ee/hogai/utils/types.py @@ -41,6 +41,7 @@ class PartialAssistantState(_SharedAssistantState): class AssistantNodeName(StrEnum): START = START END = END + MEMORY_ONBOARDING = "memory_onboarding" MEMORY_INITIALIZER = "memory_initializer" MEMORY_INITIALIZER_INTERRUPT = "memory_initializer_interrupt" ROUTER = "router" From 1af9f54e8a2152a3f0444df5d40b83dc0519620f Mon Sep 17 00:00:00 2001 From: Georgiy Tarasov Date: Fri, 20 Dec 2024 16:14:31 +0100 Subject: [PATCH 06/55] chore: migration --- ee/migrations/0020_corememory.py | 45 ++++++++++++++++++++++++++++++++ ee/migrations/max_migration.txt | 2 +- 2 files changed, 46 insertions(+), 1 deletion(-) create mode 100644 ee/migrations/0020_corememory.py diff --git a/ee/migrations/0020_corememory.py b/ee/migrations/0020_corememory.py new file mode 100644 index 0000000000000..a66baec6e5542 --- /dev/null +++ b/ee/migrations/0020_corememory.py @@ -0,0 +1,45 @@ +# Generated by Django 4.2.15 on 2024-12-20 15:14 + +from django.db import migrations, models +import django.db.models.deletion +import posthog.models.utils + + +class Migration(migrations.Migration): + dependencies = [ + ("posthog", "0535_alter_hogfunction_type"), + ("ee", "0019_remove_conversationcheckpointblob_unique_checkpoint_blob_and_more"), + ] + + operations = [ + migrations.CreateModel( + name="CoreMemory", + fields=[ + ( + "id", + models.UUIDField( + default=posthog.models.utils.UUIDT, editable=False, primary_key=True, serialize=False + ), + ), + ( + "text", + models.TextField(default="", help_text="Dumped core memory where facts are separated by newlines."), + ), + ("initial_text", models.TextField(default="", help_text="Scraped memory about the business.")), + ( + "scraping_status", + models.CharField( + blank=True, + choices=[("pending", "Pending"), ("completed", "Completed"), ("skipped", "Skipped")], + max_length=20, + null=True, + ), + ), + ("scraping_started_at", models.DateTimeField(null=True)), + ("team", models.OneToOneField(on_delete=django.db.models.deletion.CASCADE, to="posthog.team")), + ], + options={ + "abstract": False, + }, + ), + ] diff --git a/ee/migrations/max_migration.txt b/ee/migrations/max_migration.txt index aec0628d960c8..cd0433c401973 100644 --- a/ee/migrations/max_migration.txt +++ b/ee/migrations/max_migration.txt @@ -1 +1 @@ -0019_remove_conversationcheckpointblob_unique_checkpoint_blob_and_more +0020_corememory From 66d3ec26eb14cee60facf2b77b24d221a5aa6cfa Mon Sep 17 00:00:00 2001 From: Georgiy Tarasov Date: Thu, 19 Dec 2024 13:20:17 +0100 Subject: [PATCH 07/55] feat: flag for resumed conversation --- ee/hogai/taxonomy_agent/nodes.py | 6 +----- ee/hogai/utils/types.py | 2 +- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/ee/hogai/taxonomy_agent/nodes.py b/ee/hogai/taxonomy_agent/nodes.py index 92fe74ae55bcb..755911f54e5aa 100644 --- a/ee/hogai/taxonomy_agent/nodes.py +++ b/ee/hogai/taxonomy_agent/nodes.py @@ -272,13 +272,9 @@ def _run_with_toolkit( # Feedback was provided. last_message = state.messages[-1] - response = "" - if isinstance(last_message, HumanMessage): - response = last_message.content - return PartialAssistantState( resumed=False, - intermediate_steps=[*intermediate_steps[:-1], (action, response)], + intermediate_steps=[*intermediate_steps[:-1], (action, last_message.content)], ) output = "" diff --git a/ee/hogai/utils/types.py b/ee/hogai/utils/types.py index 15a52ca90a218..4f81a5fae44c5 100644 --- a/ee/hogai/utils/types.py +++ b/ee/hogai/utils/types.py @@ -35,7 +35,7 @@ class AssistantState(_SharedAssistantState): class PartialAssistantState(_SharedAssistantState): - messages: Optional[Sequence[AssistantMessageUnion]] = Field(default=None) + messages: Sequence[AssistantMessageUnion] = Field(default=None) class AssistantNodeName(StrEnum): From 7cacd2fedca634436926ba218a08e60799a7b1cb Mon Sep 17 00:00:00 2001 From: Georgiy Tarasov Date: Fri, 20 Dec 2024 10:48:08 +0100 Subject: [PATCH 08/55] fix: mypy --- ee/hogai/taxonomy_agent/nodes.py | 6 +++++- ee/hogai/utils/types.py | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/ee/hogai/taxonomy_agent/nodes.py b/ee/hogai/taxonomy_agent/nodes.py index 755911f54e5aa..92fe74ae55bcb 100644 --- a/ee/hogai/taxonomy_agent/nodes.py +++ b/ee/hogai/taxonomy_agent/nodes.py @@ -272,9 +272,13 @@ def _run_with_toolkit( # Feedback was provided. last_message = state.messages[-1] + response = "" + if isinstance(last_message, HumanMessage): + response = last_message.content + return PartialAssistantState( resumed=False, - intermediate_steps=[*intermediate_steps[:-1], (action, last_message.content)], + intermediate_steps=[*intermediate_steps[:-1], (action, response)], ) output = "" diff --git a/ee/hogai/utils/types.py b/ee/hogai/utils/types.py index 4f81a5fae44c5..15a52ca90a218 100644 --- a/ee/hogai/utils/types.py +++ b/ee/hogai/utils/types.py @@ -35,7 +35,7 @@ class AssistantState(_SharedAssistantState): class PartialAssistantState(_SharedAssistantState): - messages: Sequence[AssistantMessageUnion] = Field(default=None) + messages: Optional[Sequence[AssistantMessageUnion]] = Field(default=None) class AssistantNodeName(StrEnum): From d47b848cbc9c45938ebdba7981bf4e0983385409 Mon Sep 17 00:00:00 2001 From: Georgiy Tarasov Date: Fri, 20 Dec 2024 16:53:43 +0100 Subject: [PATCH 09/55] feat: stricter prompt for perplexity --- ee/hogai/memory/nodes.py | 32 +++++++++++++++++--------------- ee/hogai/memory/prompts.py | 32 +++++++++++++++++++++----------- 2 files changed, 38 insertions(+), 26 deletions(-) diff --git a/ee/hogai/memory/nodes.py b/ee/hogai/memory/nodes.py index 2918410af9d76..22d811396111b 100644 --- a/ee/hogai/memory/nodes.py +++ b/ee/hogai/memory/nodes.py @@ -1,7 +1,6 @@ from typing import Literal from uuid import uuid4 -from attr import dataclass from langchain_community.chat_models import ChatPerplexity from langchain_core.output_parsers import StrOutputParser from langchain_core.prompts import ChatPromptTemplate @@ -10,8 +9,10 @@ from ee.hogai.memory.prompts import ( FAILED_SCRAPING_MESSAGE, - INITIALIZE_CORE_MEMORY_PROMPT_WITH_BUNDLE_IDS, - INITIALIZE_CORE_MEMORY_PROMPT_WITH_URL, + INITIALIZE_CORE_MEMORY_WITH_BUNDLE_IDS_PROMPT, + INITIALIZE_CORE_MEMORY_WITH_BUNDLE_IDS_USER_PROMPT, + INITIALIZE_CORE_MEMORY_WITH_URL_PROMPT, + INITIALIZE_CORE_MEMORY_WITH_URL_USER_PROMPT, ) from ee.hogai.utils.helpers import find_last_message_of_type from ee.hogai.utils.nodes import AssistantNode @@ -25,13 +26,6 @@ class MemoryInitializerContextMixin: def _retrieve_context(self): - @dataclass - class Mock: - sample_count: int - property: str - sample_values: list[str] - - return [Mock(sample_count=1, property="$host", sample_values=["https://posthog.com"])] # Retrieve the origin URL. runner = EventTaxonomyQueryRunner( team=self._team, query=EventTaxonomyQuery(event="$pageview", properties=["$host"]) @@ -75,7 +69,7 @@ def should_run(self, _: AssistantState) -> bool: core_memory = CoreMemory.objects.get(team=self._team) except CoreMemory.DoesNotExist: return True - return not core_memory.is_scraping_pending + return not core_memory.is_scraping_pending and not core_memory.is_scraping_finished def router(self, state: AssistantState) -> Literal["initialize_memory", "continue"]: last_message = state.messages[-1] @@ -101,11 +95,19 @@ def run(self, state: AssistantState, config: RunnableConfig) -> PartialAssistant retrieved_prop = retrieved_properties[0] if retrieved_prop.property == "$host": prompt = ChatPromptTemplate.from_messages( - [("human", INITIALIZE_CORE_MEMORY_PROMPT_WITH_URL)], template_format="mustache" + [ + ("system", INITIALIZE_CORE_MEMORY_WITH_URL_PROMPT), + ("human", INITIALIZE_CORE_MEMORY_WITH_URL_USER_PROMPT), + ], + template_format="mustache", ).partial(url=retrieved_prop.sample_values[0]) else: prompt = ChatPromptTemplate.from_messages( - [("human", INITIALIZE_CORE_MEMORY_PROMPT_WITH_BUNDLE_IDS)], template_format="mustache" + [ + ("system", INITIALIZE_CORE_MEMORY_WITH_BUNDLE_IDS_PROMPT), + ("human", INITIALIZE_CORE_MEMORY_WITH_BUNDLE_IDS_USER_PROMPT), + ], + template_format="mustache", ).partial(bundle_ids=retrieved_prop.sample_values) chain = prompt | self._model() | StrOutputParser() @@ -130,11 +132,11 @@ def _model(self): class MemoryInitializerInterruptNode(AssistantNode): def run(self, state: AssistantState, config: RunnableConfig) -> PartialAssistantState: last_message = state.messages[-1] - if isinstance(last_message, AssistantMessage): + if not state.resumed: raise NodeInterrupt("Does it look like a good summary of what your product does?") if not isinstance(last_message, HumanMessage): raise ValueError("Last message is not a human message.") - if "yes" in last_message.content.lower(): + if "yes" not in last_message.content.lower(): return PartialAssistantState( messages=[ AssistantMessage( diff --git a/ee/hogai/memory/prompts.py b/ee/hogai/memory/prompts.py index a8004fcb6e3da..9770d3a163251 100644 --- a/ee/hogai/memory/prompts.py +++ b/ee/hogai/memory/prompts.py @@ -1,5 +1,5 @@ -INITIALIZE_CORE_MEMORY_PROMPT_WITH_URL = """ -Your goal is to describe what the startup with the given URL does. The provided URL is "{{url}}". +INITIALIZE_CORE_MEMORY_WITH_URL_PROMPT = """ +Your goal is to describe what the startup with the given URL does. - Check the provided URL. If the URL has a subdomain, check the root domain first and then the subdomain. @@ -9,19 +9,25 @@ - Describe the product itself and the market where the company operates. - Describe the target audience of the product. -- Describe the business model of the company. -- List all features that the product has. -- Describe each feature in as much detail as possible. +- Describe the company's business model. +- List all the features of the product and describe each feature in as much detail as possible. -Output your answer in paragraphs with two to three sentences. Separate new paragraphs with a newline. -Answer "No data available." if the given website doesn't exist. +Output your answer in paragraphs with two to three sentences. Separate new paragraphs with a new line. +IMPORTANT: do not use any markdown and headers. It must be plain text. +Answer "No data available." if: +- the given website doesn't exist. +- the URL is not a valid website or points to a local environment, for example, localhost, 127.0.0.1, etc. -""" +""".strip() -INITIALIZE_CORE_MEMORY_PROMPT_WITH_BUNDLE_IDS = """ -Your goal is to describe what the startup with the given application bundle ID{{#bundle_ids.length>1}}s{{/bundle_ids.length>1}} does. The provided bundle ID{{#bundle_ids.length > 1}}s are{{/bundle_ids.length > 1}}{{^bundle_ids.length > 1}} is{{/bundle_ids.length > 1}} {{#bundle_ids}}"{{.}}"{{^last}}, {{/last}}{{/bundle_ids}}. +INITIALIZE_CORE_MEMORY_WITH_URL_USER_PROMPT = """ +The provided URL is "{{url}}". +""".strip() + +INITIALIZE_CORE_MEMORY_WITH_BUNDLE_IDS_PROMPT = """ +Your goal is to describe what the startup with the given application bundle IDs does. - Retrieve information about app identifiers from app listings of App Store and Google Play. @@ -41,7 +47,11 @@ Output your answer in paragraphs with two to three sentences. Separate new paragraphs with a newline. Answer "No data available." if the given website doesn't exist. -""" +""".strip() + +INITIALIZE_CORE_MEMORY_WITH_BUNDLE_IDS_USER_PROMPT = """ +The provided bundle ID{{#bundle_ids.length > 1}}s are{{/bundle_ids.length > 1}}{{^bundle_ids.length > 1}} is{{/bundle_ids.length > 1}} {{#bundle_ids}}"{{.}}"{{^last}}, {{/last}}{{/bundle_ids}}. +""".strip() FAILED_SCRAPING_MESSAGE = """ Unfortunately, I couldn't find any information about your product. You could edit my initial memory in Settings. Let me help with your request. From 7143371adc191aae6ee63923737f19a1834abe67 Mon Sep 17 00:00:00 2001 From: Georgiy Tarasov Date: Thu, 2 Jan 2025 12:57:07 +0100 Subject: [PATCH 10/55] feat: simple dynamic forms --- ee/hogai/assistant.py | 5 ++- ee/hogai/memory/nodes.py | 25 ++++++++++-- frontend/src/queries/schema.json | 27 ++++++++++++- frontend/src/queries/schema.ts | 10 +++++ frontend/src/scenes/max/Thread.tsx | 63 +++++++++++++++++++++++------- posthog/schema.py | 24 ++++++++++-- 6 files changed, 130 insertions(+), 24 deletions(-) diff --git a/ee/hogai/assistant.py b/ee/hogai/assistant.py index 8d49be91f62ec..517379839ccdc 100644 --- a/ee/hogai/assistant.py +++ b/ee/hogai/assistant.py @@ -126,8 +126,11 @@ def _stream(self) -> Generator[str, None, None]: # Check if the assistant has requested help. state = self._graph.get_state(config) if state.next: + interrupt_value = state.tasks[0].interrupts[0].value yield self._serialize_message( - AssistantMessage(content=state.tasks[0].interrupts[0].value, id=str(uuid4())) + AssistantMessage(content=interrupt_value, id=str(uuid4())) + if isinstance(interrupt_value, str) + else interrupt_value ) else: self._report_conversation_state(last_viz_message) diff --git a/ee/hogai/memory/nodes.py b/ee/hogai/memory/nodes.py index 22d811396111b..efc3dc3da4fd9 100644 --- a/ee/hogai/memory/nodes.py +++ b/ee/hogai/memory/nodes.py @@ -21,10 +21,19 @@ from posthog.hogql_queries.ai.event_taxonomy_query_runner import EventTaxonomyQueryRunner from posthog.hogql_queries.query_runner import ExecutionMode from posthog.models import Team -from posthog.schema import AssistantMessage, CachedEventTaxonomyQueryResponse, EventTaxonomyQuery, HumanMessage +from posthog.schema import ( + AssistantForm, + AssistantMessage, + AssistantMessageMetadata, + CachedEventTaxonomyQueryResponse, + EventTaxonomyQuery, + HumanMessage, +) class MemoryInitializerContextMixin: + _team: Team + def _retrieve_context(self): # Retrieve the origin URL. runner = EventTaxonomyQueryRunner( @@ -130,13 +139,21 @@ def _model(self): class MemoryInitializerInterruptNode(AssistantNode): + OPTIONS = ("Yes, save this.", "No, this doesn't look right.") + def run(self, state: AssistantState, config: RunnableConfig) -> PartialAssistantState: last_message = state.messages[-1] if not state.resumed: - raise NodeInterrupt("Does it look like a good summary of what your product does?") + raise NodeInterrupt( + AssistantMessage( + content="Does it look like a good summary of what your product does?", + meta=AssistantMessageMetadata(form=AssistantForm(options=self.OPTIONS)), + id=str(uuid4()), + ) + ) if not isinstance(last_message, HumanMessage): - raise ValueError("Last message is not a human message.") - if "yes" not in last_message.content.lower(): + raise ValueError("Last messa1ge is not a human message.") + if last_message.content != self.OPTIONS[0]: return PartialAssistantState( messages=[ AssistantMessage( diff --git a/frontend/src/queries/schema.json b/frontend/src/queries/schema.json index dc52be4fe170c..dfe6e4b216d9d 100644 --- a/frontend/src/queries/schema.json +++ b/frontend/src/queries/schema.json @@ -572,6 +572,19 @@ "enum": ["status", "message", "conversation"], "type": "string" }, + "AssistantForm": { + "additionalProperties": false, + "properties": { + "options": { + "items": { + "type": "string" + }, + "type": "array" + } + }, + "required": ["options"], + "type": "object" + }, "AssistantFunnelsBreakdownFilter": { "additionalProperties": false, "properties": { @@ -1063,6 +1076,9 @@ "id": { "type": "string" }, + "meta": { + "$ref": "#/definitions/AssistantMessageMetadata" + }, "type": { "const": "ai", "type": "string" @@ -1071,8 +1087,17 @@ "required": ["type", "content"], "type": "object" }, + "AssistantMessageMetadata": { + "additionalProperties": false, + "properties": { + "form": { + "$ref": "#/definitions/AssistantForm" + } + }, + "type": "object" + }, "AssistantMessageType": { - "enum": ["human", "ai", "ai/reasoning", "ai/viz", "ai/failure", "ai/router"], + "enum": ["human", "ai", "ai/reasoning", "ai/viz", "ai/failure", "ai/router", "ai/form"], "type": "string" }, "AssistantMultipleBreakdownFilter": { diff --git a/frontend/src/queries/schema.ts b/frontend/src/queries/schema.ts index daaae3e403c08..0f6478651dda8 100644 --- a/frontend/src/queries/schema.ts +++ b/frontend/src/queries/schema.ts @@ -2456,6 +2456,7 @@ export enum AssistantMessageType { Visualization = 'ai/viz', Failure = 'ai/failure', Router = 'ai/router', + Form = 'ai/form', } export interface BaseAssistantMessage { @@ -2467,9 +2468,18 @@ export interface HumanMessage extends BaseAssistantMessage { content: string } +export interface AssistantForm { + options: string[] +} + +export interface AssistantMessageMetadata { + form?: AssistantForm +} + export interface AssistantMessage extends BaseAssistantMessage { type: AssistantMessageType.Assistant content: string + meta?: AssistantMessageMetadata } export interface ReasoningMessage extends BaseAssistantMessage { diff --git a/frontend/src/scenes/max/Thread.tsx b/frontend/src/scenes/max/Thread.tsx index f8a5d56d0650b..3f7067130bf18 100644 --- a/frontend/src/scenes/max/Thread.tsx +++ b/frontend/src/scenes/max/Thread.tsx @@ -22,6 +22,7 @@ import { twMerge } from 'tailwind-merge' import { Query } from '~/queries/Query/Query' import { + AssistantForm, AssistantMessage, FailureMessage, HumanMessage, @@ -100,8 +101,8 @@ function MessageGroup({ messages, isFinal: isGroupFinal, index: messageGroupInde ) @@ -173,31 +174,48 @@ const MessageTemplate = React.forwardRef(f interface TextAnswerProps { message: (AssistantMessage | FailureMessage) & ThreadMessage - rateable: boolean retriable: boolean messageGroupIndex: number + interactable?: boolean } const TextAnswer = React.forwardRef(function TextAnswer( - { message, rateable, retriable, messageGroupIndex }, + { message, retriable, messageGroupIndex, interactable }, ref ) { + const action = (() => { + if (message.status !== 'completed') { + return null + } + + // Don't show retry button when rate-limited + if ( + isFailureMessage(message) && + !message.content?.includes('usage limit') && // Don't show retry button when rate-limited + retriable + ) { + return + } + + if (isAssistantMessage(message) && interactable) { + // Message has been interrupted with a form + if (message.meta?.form?.options) { + return + } + + // Show answer actions if the assistant's response is complete at this point + return + } + + return null + })() + return ( - ) : message.status === 'completed' && message.type === 'ai' && rateable ? ( - // Show answer actions if the assistant's response is complete at this point - - ) : null - } + action={action} > {message.content || '*Max has failed to generate an answer. Please try again.*'} @@ -206,6 +224,23 @@ const TextAnswer = React.forwardRef(function Te ) }) +interface AssistantMessageFormProps { + form: AssistantForm +} + +function AssistantMessageForm({ form }: AssistantMessageFormProps): JSX.Element { + const { askMax } = useActions(maxLogic) + return ( +
+ {form.options.map((option) => ( + askMax(option)} size="small" type="secondary"> + {option} + + ))} +
+ ) +} + function VisualizationAnswer({ message, status, diff --git a/posthog/schema.py b/posthog/schema.py index 98159253931bc..c7d58e5a720f2 100644 --- a/posthog/schema.py +++ b/posthog/schema.py @@ -101,6 +101,13 @@ class AssistantEventType(StrEnum): CONVERSATION = "conversation" +class AssistantForm(BaseModel): + model_config = ConfigDict( + extra="forbid", + ) + options: list[str] + + class AssistantFunnelsBreakdownType(StrEnum): PERSON = "person" EVENT = "event" @@ -205,13 +212,11 @@ class AssistantGroupPropertyFilter3(BaseModel): value: str = Field(..., description="Value must be a date in ISO 8601 format.") -class AssistantMessage(BaseModel): +class AssistantMessageMetadata(BaseModel): model_config = ConfigDict( extra="forbid", ) - content: str - id: Optional[str] = None - type: Literal["ai"] = "ai" + form: Optional[AssistantForm] = None class AssistantMessageType(StrEnum): @@ -221,6 +226,7 @@ class AssistantMessageType(StrEnum): AI_VIZ = "ai/viz" AI_FAILURE = "ai/failure" AI_ROUTER = "ai/router" + AI_FORM = "ai/form" class AssistantSetPropertyFilterOperator(StrEnum): @@ -1905,6 +1911,16 @@ class AssistantGroupPropertyFilter4(BaseModel): type: Literal["group"] = "group" +class AssistantMessage(BaseModel): + model_config = ConfigDict( + extra="forbid", + ) + content: str + id: Optional[str] = None + meta: Optional[AssistantMessageMetadata] = None + type: Literal["ai"] = "ai" + + class AssistantSetPropertyFilter(BaseModel): model_config = ConfigDict( extra="forbid", From 92e3d44986530679e86a2bd47dbbdc92c5e90d38 Mon Sep 17 00:00:00 2001 From: Georgiy Tarasov Date: Thu, 2 Jan 2025 14:52:08 +0100 Subject: [PATCH 11/55] feat: style form options --- ee/hogai/memory/nodes.py | 10 ++++- frontend/src/queries/schema.json | 17 ++++++++- frontend/src/queries/schema.ts | 8 +++- frontend/src/scenes/max/Max.stories.tsx | 17 +++++++++ frontend/src/scenes/max/QuestionInput.tsx | 4 +- frontend/src/scenes/max/Thread.tsx | 17 +++++++-- .../max/__mocks__/chatResponse.mocks.ts | 22 +++++++++++ frontend/src/scenes/max/maxLogic.ts | 16 +++++++- posthog/schema.py | 37 +++++++++++-------- 9 files changed, 121 insertions(+), 27 deletions(-) diff --git a/ee/hogai/memory/nodes.py b/ee/hogai/memory/nodes.py index efc3dc3da4fd9..248a6bf49b236 100644 --- a/ee/hogai/memory/nodes.py +++ b/ee/hogai/memory/nodes.py @@ -23,6 +23,7 @@ from posthog.models import Team from posthog.schema import ( AssistantForm, + AssistantFormOption, AssistantMessage, AssistantMessageMetadata, CachedEventTaxonomyQueryResponse, @@ -147,7 +148,14 @@ def run(self, state: AssistantState, config: RunnableConfig) -> PartialAssistant raise NodeInterrupt( AssistantMessage( content="Does it look like a good summary of what your product does?", - meta=AssistantMessageMetadata(form=AssistantForm(options=self.OPTIONS)), + meta=AssistantMessageMetadata( + form=AssistantForm( + options=[ + AssistantFormOption(value=self.OPTIONS[0], variant="primary"), + AssistantFormOption(value=self.OPTIONS[1]), + ] + ) + ), id=str(uuid4()), ) ) diff --git a/frontend/src/queries/schema.json b/frontend/src/queries/schema.json index dfe6e4b216d9d..eb8409d8d406f 100644 --- a/frontend/src/queries/schema.json +++ b/frontend/src/queries/schema.json @@ -577,7 +577,7 @@ "properties": { "options": { "items": { - "type": "string" + "$ref": "#/definitions/AssistantFormOption" }, "type": "array" } @@ -585,6 +585,19 @@ "required": ["options"], "type": "object" }, + "AssistantFormOption": { + "additionalProperties": false, + "properties": { + "value": { + "type": "string" + }, + "variant": { + "type": "string" + } + }, + "required": ["value"], + "type": "object" + }, "AssistantFunnelsBreakdownFilter": { "additionalProperties": false, "properties": { @@ -1097,7 +1110,7 @@ "type": "object" }, "AssistantMessageType": { - "enum": ["human", "ai", "ai/reasoning", "ai/viz", "ai/failure", "ai/router", "ai/form"], + "enum": ["human", "ai", "ai/reasoning", "ai/viz", "ai/failure", "ai/router"], "type": "string" }, "AssistantMultipleBreakdownFilter": { diff --git a/frontend/src/queries/schema.ts b/frontend/src/queries/schema.ts index 0f6478651dda8..267e01e2d4746 100644 --- a/frontend/src/queries/schema.ts +++ b/frontend/src/queries/schema.ts @@ -2456,7 +2456,6 @@ export enum AssistantMessageType { Visualization = 'ai/viz', Failure = 'ai/failure', Router = 'ai/router', - Form = 'ai/form', } export interface BaseAssistantMessage { @@ -2468,8 +2467,13 @@ export interface HumanMessage extends BaseAssistantMessage { content: string } +export interface AssistantFormOption { + value: string + variant?: string +} + export interface AssistantForm { - options: string[] + options: AssistantFormOption[] } export interface AssistantMessageMetadata { diff --git a/frontend/src/scenes/max/Max.stories.tsx b/frontend/src/scenes/max/Max.stories.tsx index 51dc03ab0cb5c..93a30c078e4b2 100644 --- a/frontend/src/scenes/max/Max.stories.tsx +++ b/frontend/src/scenes/max/Max.stories.tsx @@ -10,6 +10,7 @@ import { chatResponseChunk, CONVERSATION_ID, failureChunk, + formChunk, generationFailureChunk, humanMessage, } from './__mocks__/chatResponse.mocks' @@ -192,3 +193,19 @@ export const ThreadWithRateLimit: StoryFn = () => { return