Merge branch 'master' into split-schema

PostHog · Jan 9, 2025 · a5426b5 · a5426b5
2 parents f622ab3 + e441bbd
commit a5426b5
Show file tree

Hide file tree

Showing 402 changed files with 9,225 additions and 24,481 deletions.
diff --git a/.github/pr-deploy/hobby.yaml.tmpl b/.github/pr-deploy/hobby.yaml.tmpl
@@ -20,7 +20,7 @@ spec:
           privileged: true
         resources:
           requests:
-            cpu: 250m
+            cpu: 2
             memory: 500M          
         ports:
         - containerPort: 2375
@@ -72,4 +72,4 @@ spec:
           service:
             name: hobby-service-$HOSTNAME
             port:
-              number: 80
+              number: 80
diff --git a/bin/mprocs.yaml b/bin/mprocs.yaml
@@ -1,9 +1,9 @@
 procs:
     celery-worker:
-        shell: 'bin/check_kafka_clickhouse_up && source ./bin/celery-queues.env && python manage.py run_autoreload_celery --type=worker'
+        shell: 'bin/check_kafka_clickhouse_up && ./bin/start-celery worker'
 
     celery-beat:
-        shell: 'bin/check_kafka_clickhouse_up && source ./bin/celery-queues.env && python manage.py run_autoreload_celery --type=beat'
+        shell: 'bin/check_kafka_clickhouse_up && ./bin/start-celery beat'
 
     plugin-server:
         shell: 'bin/check_kafka_clickhouse_up && ./bin/plugin-server'

diff --git a/bin/start-celery b/bin/start-celery
@@ -0,0 +1,9 @@
+#!/bin/bash
+# Starts a celery worker / heartbeat job. Must be run with a type of process: worker | beat
+
+set -e
+
+source ./bin/celery-queues.env
+
+# start celery worker with heartbeat (-B)
+python manage.py run_autoreload_celery --type=$1
diff --git a/cypress/productAnalytics/index.ts b/cypress/productAnalytics/index.ts
@@ -167,6 +167,7 @@ export const dashboard = {
 
         cy.get('[data-attr=dashboard-add-graph-header]').contains('Add insight').click()
         cy.get('[data-attr=toast-close-button]').click({ multiple: true })
+        cy.get('[data-attr=dashboard-add-new-insight]').contains('New insight').click()
 
         if (insightName) {
             cy.get('[data-attr="top-bar-name"] button').click()

diff --git a/ee/api/test/__snapshots__/test_organization_resource_access.ambr b/ee/api/test/__snapshots__/test_organization_resource_access.ambr
@@ -19,6 +19,7 @@
          "posthog_user"."has_seen_product_intro_for",
          "posthog_user"."strapi_id",
          "posthog_user"."is_active",
+         "posthog_user"."role_at_organization",
          "posthog_user"."theme_mode",
          "posthog_user"."partial_notification_settings",
          "posthog_user"."anonymize_data",
@@ -191,6 +192,7 @@
          "posthog_user"."has_seen_product_intro_for",
          "posthog_user"."strapi_id",
          "posthog_user"."is_active",
+         "posthog_user"."role_at_organization",
          "posthog_user"."theme_mode",
          "posthog_user"."partial_notification_settings",
          "posthog_user"."anonymize_data",

diff --git a/ee/clickhouse/views/experiments.py b/ee/clickhouse/views/experiments.py
@@ -192,6 +192,7 @@ class Meta:
             "type",
             "metrics",
             "metrics_secondary",
+            "stats_config",
         ]
         read_only_fields = [
             "id",
@@ -300,6 +301,9 @@ def create(self, validated_data: dict, *args: Any, **kwargs: Any) -> Experiment:
         feature_flag_serializer.is_valid(raise_exception=True)
         feature_flag = feature_flag_serializer.save()
 
+        if not validated_data.get("stats_config"):
+            validated_data["stats_config"] = {"version": 2}
+
         experiment = Experiment.objects.create(
             team_id=self.context["team_id"], feature_flag=feature_flag, **validated_data
         )
@@ -376,6 +380,7 @@ def update(self, instance: Experiment, validated_data: dict, *args: Any, **kwarg
             "holdout",
             "metrics",
             "metrics_secondary",
+            "stats_config",
         }
         given_keys = set(validated_data.keys())
         extra_keys = given_keys - expected_keys

diff --git a/ee/clickhouse/views/test/test_clickhouse_experiments.py b/ee/clickhouse/views/test/test_clickhouse_experiments.py
@@ -102,6 +102,11 @@ def test_creating_updating_basic_experiment(self):
         self.assertEqual(response.status_code, status.HTTP_201_CREATED)
         self.assertEqual(response.json()["name"], "Test Experiment")
         self.assertEqual(response.json()["feature_flag_key"], ff_key)
+        self.assertEqual(response.json()["stats_config"], {"version": 2})
+
+        id = response.json()["id"]
+        experiment = Experiment.objects.get(pk=id)
+        self.assertEqual(experiment.get_stats_config("version"), 2)
 
         created_ff = FeatureFlag.objects.get(key=ff_key)
 
@@ -110,20 +115,20 @@ def test_creating_updating_basic_experiment(self):
         self.assertEqual(created_ff.filters["multivariate"]["variants"][1]["key"], "test")
         self.assertEqual(created_ff.filters["groups"][0]["properties"], [])
 
-        id = response.json()["id"]
         end_date = "2021-12-10T00:00"
 
         # Now update
         response = self.client.patch(
             f"/api/projects/{self.team.id}/experiments/{id}",
-            {"description": "Bazinga", "end_date": end_date},
+            {"description": "Bazinga", "end_date": end_date, "stats_config": {"version": 1}},
         )
 
         self.assertEqual(response.status_code, status.HTTP_200_OK)
 
         experiment = Experiment.objects.get(pk=id)
         self.assertEqual(experiment.description, "Bazinga")
         self.assertEqual(experiment.end_date.strftime("%Y-%m-%dT%H:%M"), end_date)
+        self.assertEqual(experiment.get_stats_config("version"), 1)
 
     def test_creating_updating_web_experiment(self):
         ff_key = "a-b-tests"

diff --git a/ee/hogai/assistant.py b/ee/hogai/assistant.py
@@ -10,14 +10,11 @@
 from pydantic import BaseModel
 
 from ee import settings
-from ee.hogai.funnels.nodes import (
-    FunnelGeneratorNode,
-)
+from ee.hogai.funnels.nodes import FunnelGeneratorNode
 from ee.hogai.graph import AssistantGraph
+from ee.hogai.retention.nodes import RetentionGeneratorNode
 from ee.hogai.schema_generator.nodes import SchemaGeneratorNode
-from ee.hogai.trends.nodes import (
-    TrendsGeneratorNode,
-)
+from ee.hogai.trends.nodes import TrendsGeneratorNode
 from ee.hogai.utils.asgi import SyncIterableToAsync
 from ee.hogai.utils.state import (
     GraphMessageUpdateTuple,
@@ -57,6 +54,7 @@
 VISUALIZATION_NODES: dict[AssistantNodeName, type[SchemaGeneratorNode]] = {
     AssistantNodeName.TRENDS_GENERATOR: TrendsGeneratorNode,
     AssistantNodeName.FUNNEL_GENERATOR: FunnelGeneratorNode,
+    AssistantNodeName.RETENTION_GENERATOR: RetentionGeneratorNode,
 }
 
 
@@ -166,6 +164,8 @@ def _node_to_reasoning_message(
                 | AssistantNodeName.TRENDS_PLANNER_TOOLS
                 | AssistantNodeName.FUNNEL_PLANNER
                 | AssistantNodeName.FUNNEL_PLANNER_TOOLS
+                | AssistantNodeName.RETENTION_PLANNER
+                | AssistantNodeName.RETENTION_PLANNER_TOOLS
             ):
                 substeps: list[str] = []
                 if input:
@@ -191,6 +191,8 @@ def _node_to_reasoning_message(
                 return ReasoningMessage(content="Creating trends query")
             case AssistantNodeName.FUNNEL_GENERATOR:
                 return ReasoningMessage(content="Creating funnel query")
+            case AssistantNodeName.RETENTION_GENERATOR:
+                return ReasoningMessage(content="Creating retention query")
             case _:
                 return None
 

diff --git a/ee/hogai/eval/tests/test_eval_retention_generator.py b/ee/hogai/eval/tests/test_eval_retention_generator.py
@@ -0,0 +1,76 @@
+from collections.abc import Callable
+from typing import cast
+
+import pytest
+from langgraph.graph.state import CompiledStateGraph
+
+from ee.hogai.assistant import AssistantGraph
+from ee.hogai.utils.types import AssistantNodeName, AssistantState
+from posthog.schema import (
+    AssistantRetentionQuery,
+    HumanMessage,
+    RetentionEntity,
+    VisualizationMessage,
+)
+
+
+@pytest.fixture
+def call_node(team, runnable_config) -> Callable[[str, str], AssistantRetentionQuery]:
+    graph: CompiledStateGraph = (
+        AssistantGraph(team)
+        .add_edge(AssistantNodeName.START, AssistantNodeName.RETENTION_GENERATOR)
+        .add_retention_generator(AssistantNodeName.END)
+        .compile()
+    )
+
+    def callable(query: str, plan: str) -> AssistantRetentionQuery:
+        state = graph.invoke(
+            AssistantState(messages=[HumanMessage(content=query)], plan=plan),
+            runnable_config,
+        )
+        message = cast(VisualizationMessage, AssistantState.model_validate(state).messages[-1])
+        answer = message.answer
+        assert isinstance(answer, AssistantRetentionQuery), "Expected AssistantRetentionQuery"
+        return answer
+
+    return callable
+
+
+def test_node_replaces_equals_with_contains(call_node):
+    query = "Show file upload retention after signup for users with name John"
+    plan = """Target event:
+    - signed_up
+
+    Returning event:
+    - file_uploaded
+
+    Filters:
+        - property filter 1:
+            - person
+            - name
+            - equals
+            - John
+    """
+    actual_output = call_node(query, plan).model_dump_json(exclude_none=True)
+    assert "exact" not in actual_output
+    assert "icontains" in actual_output
+    assert "John" not in actual_output
+    assert "john" in actual_output
+
+
+def test_basic_retention_structure(call_node):
+    query = "Show retention for users who signed up"
+    plan = """Target Event:
+    - signed_up
+
+    Returning Event:
+    - file_uploaded
+    """
+    actual_output = call_node(query, plan)
+    assert actual_output.retentionFilter is not None
+    assert actual_output.retentionFilter.targetEntity == RetentionEntity(
+        id="signed_up", type="events", name="signed_up", order=0
+    )
+    assert actual_output.retentionFilter.returningEntity == RetentionEntity(
+        id="file_uploaded", type="events", name="file_uploaded", order=0
+    )
diff --git a/ee/hogai/eval/tests/test_eval_retention_planner.py b/ee/hogai/eval/tests/test_eval_retention_planner.py
@@ -0,0 +1,118 @@
+from collections.abc import Callable
+
+import pytest
+from deepeval import assert_test
+from deepeval.metrics import GEval
+from deepeval.test_case import LLMTestCase, LLMTestCaseParams
+from langchain_core.runnables.config import RunnableConfig
+from langgraph.graph.state import CompiledStateGraph
+
+from ee.hogai.assistant import AssistantGraph
+from ee.hogai.utils.types import AssistantNodeName, AssistantState
+from posthog.schema import HumanMessage
+
+
+@pytest.fixture(scope="module")
+def metric():
+    return GEval(
+        name="Retention Plan Correctness",
+        criteria="You will be given expected and actual generated plans to provide a taxonomy to answer a user's question with a retention insight. Compare the plans to determine whether the taxonomy of the actual plan matches the expected plan. Do not apply general knowledge about retention insights.",
+        evaluation_steps=[
+            "A plan must define both a target event (cohort-defining event) and a returning event (retention-measuring event), but it is not required to define any filters. It can't have breakdowns.",
+            "Compare target event, returning event, properties, and property values of 'expected output' and 'actual output'. Do not penalize if the actual output does not include a timeframe.",
+            "Check if the combination of target events, returning events, properties, and property values in 'actual output' can answer the user's question according to the 'expected output'.",
+            "If 'expected output' contains a breakdown, check if 'actual output' contains a similar breakdown, and heavily penalize if the breakdown is not present or different.",
+            # We don't want to see in the output unnecessary property filters. The assistant tries to use them all the time.
+            "Heavily penalize if the 'actual output' contains any excessive output not present in the 'expected output'. For example, the `is set` operator in filters should not be used unless the user explicitly asks for it.",
+        ],
+        evaluation_params=[
+            LLMTestCaseParams.INPUT,
+            LLMTestCaseParams.EXPECTED_OUTPUT,
+            LLMTestCaseParams.ACTUAL_OUTPUT,
+        ],
+        threshold=0.7,
+    )
+
+
+@pytest.fixture
+def call_node(team, runnable_config: RunnableConfig) -> Callable[[str], str]:
+    graph: CompiledStateGraph = (
+        AssistantGraph(team)
+        .add_edge(AssistantNodeName.START, AssistantNodeName.RETENTION_PLANNER)
+        .add_retention_planner(AssistantNodeName.END)
+        .compile()
+    )
+
+    def callable(query: str) -> str:
+        raw_state = graph.invoke(
+            AssistantState(messages=[HumanMessage(content=query)]),
+            runnable_config,
+        )
+        state = AssistantState.model_validate(raw_state)
+        return state.plan or "NO PLAN WAS GENERATED"
+
+    return callable
+
+
+def test_basic_retention(metric, call_node):
+    query = "What's the file upload retention of new users?"
+    test_case = LLMTestCase(
+        input=query,
+        expected_output="""
+        Target event:
+        - signed_up
+
+        Returning event:
+        - uploaded_file
+        """,
+        actual_output=call_node(query),
+    )
+    assert_test(test_case, [metric])
+
+
+def test_basic_filtering(metric, call_node):
+    query = "Show retention of Chrome users uploading files"
+    test_case = LLMTestCase(
+        input=query,
+        expected_output="""
+        Target event:
+        - uploaded_file
+
+        Returning event:
+        - uploaded_file
+
+        Filters:
+        - property filter 1:
+            - entity: event
+            - property name: $browser
+            - property type: String
+            - operator: equals
+            - property value: Chrome
+        """,
+        actual_output=call_node(query),
+    )
+    assert_test(test_case, [metric])
+
+
+def test_needle_in_a_haystack(metric, call_node):
+    query = "Show retention for users who have paid a bill and are on the personal/pro plan"
+    test_case = LLMTestCase(
+        input=query,
+        expected_output="""
+        Target event:
+        - paid_bill
+
+        Returning event:
+        - downloaded_file
+
+        Filters:
+            - property filter 1:
+                - entity: account
+                - property name: plan
+                - property type: String
+                - operator: equals
+                - property value: personal/pro
+        """,
+        actual_output=call_node(query),
+    )
+    assert_test(test_case, [metric])