Skip to content

Commit

Permalink
Neo4j metadata filtering support (run-llama#12923)
Browse files Browse the repository at this point in the history
  • Loading branch information
tomasonjo authored Apr 18, 2024
1 parent 67218f0 commit 0cdb5b7
Show file tree
Hide file tree
Showing 4 changed files with 523 additions and 5 deletions.
394 changes: 394 additions & 0 deletions docs/docs/examples/vector_stores/neo4j_metadata_filter.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,394 @@
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"id": "71144bf9",
"metadata": {},
"source": [
"<a href=\"https://colab.research.google.com/github/run-llama/llama_index/blob/main/docs/docs/examples/vector_stores/pinecone_metadata_filter.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "307804a3-c02b-4a57-ac0d-172c30ddc851",
"metadata": {},
"source": [
"# Neo4j Vector Store - Metadata Filter"
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "3ceaf5c9",
"metadata": {},
"source": [
"If you're opening this Notebook on colab, you will probably need to install LlamaIndex 🦙."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e5715fa1",
"metadata": {},
"outputs": [],
"source": [
"%pip install llama-index-vector-stores-neo4jvector"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "04b9d10f",
"metadata": {},
"outputs": [],
"source": [
"# !pip install llama-index>=0.9.31 neo4j"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d48af8e1",
"metadata": {},
"outputs": [],
"source": [
"import logging\n",
"import sys\n",
"import os\n",
"\n",
"logging.basicConfig(stream=sys.stdout, level=logging.INFO)\n",
"logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))"
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "f7010b1d-d1bb-4f08-9309-a328bb4ea396",
"metadata": {},
"source": [
"Build a Neo4j vector Index and connect to it"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4ad14111-0bbb-4c62-906d-6d6253e0cdee",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:numexpr.utils:Note: NumExpr detected 16 cores but \"NUMEXPR_MAX_THREADS\" not set, so enforcing safe limit of 8.\n",
"Note: NumExpr detected 16 cores but \"NUMEXPR_MAX_THREADS\" not set, so enforcing safe limit of 8.\n",
"INFO:numexpr.utils:NumExpr defaulting to 8 threads.\n",
"NumExpr defaulting to 8 threads.\n"
]
}
],
"source": [
"import os\n",
"from llama_index.vector_stores.neo4jvector import Neo4jVectorStore\n",
"\n",
"os.environ[\"OPENAI_API_KEY\"] = \"sk-...\"\n",
"\n",
"username = \"neo4j\"\n",
"password = \"password\"\n",
"url = \"bolt://localhost:7687\"\n",
"embed_dim = 1536 # Dimensions are for text-embedding-ada-002\n",
"\n",
"vector_store = Neo4jVectorStore(username, password, url, embed_dim)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "8ee4473a-094f-4d0a-a825-e1213db07240",
"metadata": {},
"source": [
"Build the VectorStoreIndex"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9ae59590",
"metadata": {},
"outputs": [],
"source": [
"from llama_index.core import VectorStoreIndex, StorageContext\n",
"from llama_index.core.schema import TextNode\n",
"\n",
"nodes = [\n",
" TextNode(\n",
" text=\"The Shawshank Redemption\",\n",
" metadata={\n",
" \"author\": \"Stephen King\",\n",
" \"theme\": \"Friendship\",\n",
" \"year\": 1994,\n",
" },\n",
" ),\n",
" TextNode(\n",
" text=\"The Godfather\",\n",
" metadata={\n",
" \"director\": \"Francis Ford Coppola\",\n",
" \"theme\": \"Mafia\",\n",
" \"year\": 1972,\n",
" },\n",
" ),\n",
" TextNode(\n",
" text=\"Inception\",\n",
" metadata={\n",
" \"director\": \"Christopher Nolan\",\n",
" \"theme\": \"Fiction\",\n",
" \"year\": 2010,\n",
" },\n",
" ),\n",
" TextNode(\n",
" text=\"To Kill a Mockingbird\",\n",
" metadata={\n",
" \"author\": \"Harper Lee\",\n",
" \"theme\": \"Mafia\",\n",
" \"year\": 1960,\n",
" },\n",
" ),\n",
" TextNode(\n",
" text=\"1984\",\n",
" metadata={\n",
" \"author\": \"George Orwell\",\n",
" \"theme\": \"Totalitarianism\",\n",
" \"year\": 1949,\n",
" },\n",
" ),\n",
" TextNode(\n",
" text=\"The Great Gatsby\",\n",
" metadata={\n",
" \"author\": \"F. Scott Fitzgerald\",\n",
" \"theme\": \"The American Dream\",\n",
" \"year\": 1925,\n",
" },\n",
" ),\n",
" TextNode(\n",
" text=\"Harry Potter and the Sorcerer's Stone\",\n",
" metadata={\n",
" \"author\": \"J.K. Rowling\",\n",
" \"theme\": \"Fiction\",\n",
" \"year\": 1997,\n",
" },\n",
" ),\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ee6eeecb-d54f-4a71-b5fe-0cda8a5c3e10",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n",
"HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n"
]
}
],
"source": [
"storage_context = StorageContext.from_defaults(vector_store=vector_store)\n",
"index = VectorStoreIndex(nodes, storage_context=storage_context)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "8f0f49cf",
"metadata": {},
"source": [
"Define metadata filters"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "22157658",
"metadata": {},
"outputs": [],
"source": [
"from llama_index.core.vector_stores import (\n",
" MetadataFilter,\n",
" MetadataFilters,\n",
" FilterOperator,\n",
")\n",
"\n",
"filters = MetadataFilters(\n",
" filters=[\n",
" MetadataFilter(\n",
" key=\"theme\", operator=FilterOperator.EQ, value=\"Fiction\"\n",
" ),\n",
" ]\n",
")"
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "f31c16b3",
"metadata": {},
"source": [
"Retrieve from vector store with filters"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "147df357",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n",
"HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n"
]
},
{
"data": {
"text/plain": [
"[NodeWithScore(node=TextNode(id_='814e5f2a-2150-4bae-8a59-fa728379e978', embedding=None, metadata={'director': 'Christopher Nolan', 'theme': 'Fiction', 'year': 2010}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, text='Inception', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\\n\\n{content}', metadata_template='{key}: {value}', metadata_seperator='\\n'), score=0.9202238321304321),\n",
" NodeWithScore(node=TextNode(id_='fc1df8cc-f1d3-4a7b-8c21-f83b18463758', embedding=None, metadata={'author': 'J.K. Rowling', 'theme': 'Fiction', 'year': 1997}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, text=\"Harry Potter and the Sorcerer's Stone\", start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\\n\\n{content}', metadata_template='{key}: {value}', metadata_seperator='\\n'), score=0.8823964595794678)]"
]
},
"execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"retriever = index.as_retriever(filters=filters)\n",
"retriever.retrieve(\"What is inception about?\")"
]
},
{
"cell_type": "markdown",
"id": "88c105a6",
"metadata": {},
"source": [
"Multiple Metadata Filters with `AND` condition"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "df8df0a7",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n",
"HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n"
]
},
{
"data": {
"text/plain": [
"[NodeWithScore(node=TextNode(id_='814e5f2a-2150-4bae-8a59-fa728379e978', embedding=None, metadata={'director': 'Christopher Nolan', 'theme': 'Fiction', 'year': 2010}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, text='Inception', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\\n\\n{content}', metadata_template='{key}: {value}', metadata_seperator='\\n'), score=0.8818434476852417)]"
]
},
"execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from llama_index.core.vector_stores import FilterOperator, FilterCondition\n",
"\n",
"filters = MetadataFilters(\n",
" filters=[\n",
" MetadataFilter(key=\"theme\", value=\"Fiction\"),\n",
" MetadataFilter(key=\"year\", value=1997, operator=FilterOperator.GT),\n",
" ],\n",
" condition=FilterCondition.AND,\n",
")\n",
"\n",
"retriever = index.as_retriever(filters=filters)\n",
"retriever.retrieve(\"Harry Potter?\")"
]
},
{
"cell_type": "markdown",
"id": "3731d3b2",
"metadata": {},
"source": [
"Multiple Metadata Filters with `OR` condition"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5eaa5d4d",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n",
"HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n"
]
},
{
"data": {
"text/plain": [
"[NodeWithScore(node=TextNode(id_='fc1df8cc-f1d3-4a7b-8c21-f83b18463758', embedding=None, metadata={'author': 'J.K. Rowling', 'theme': 'Fiction', 'year': 1997}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, text=\"Harry Potter and the Sorcerer's Stone\", start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\\n\\n{content}', metadata_template='{key}: {value}', metadata_seperator='\\n'), score=0.9242331385612488),\n",
" NodeWithScore(node=TextNode(id_='814e5f2a-2150-4bae-8a59-fa728379e978', embedding=None, metadata={'director': 'Christopher Nolan', 'theme': 'Fiction', 'year': 2010}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, text='Inception', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\\n\\n{content}', metadata_template='{key}: {value}', metadata_seperator='\\n'), score=0.8818434476852417)]"
]
},
"execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from llama_index.core.vector_stores import FilterOperator, FilterCondition\n",
"\n",
"\n",
"filters = MetadataFilters(\n",
" filters=[\n",
" MetadataFilter(key=\"theme\", value=\"Fiction\"),\n",
" MetadataFilter(key=\"year\", value=1997, operator=FilterOperator.GT),\n",
" ],\n",
" condition=FilterCondition.OR,\n",
")\n",
"\n",
"retriever = index.as_retriever(filters=filters)\n",
"retriever.retrieve(\"Harry Potter?\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Loading

0 comments on commit 0cdb5b7

Please sign in to comment.