forked from run-llama/llama_index
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Neo4j metadata filtering support (run-llama#12923)
- Loading branch information
Showing
4 changed files
with
523 additions
and
5 deletions.
There are no files selected for viewing
394 changes: 394 additions & 0 deletions
394
docs/docs/examples/vector_stores/neo4j_metadata_filter.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,394 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"attachments": {}, | ||
"cell_type": "markdown", | ||
"id": "71144bf9", | ||
"metadata": {}, | ||
"source": [ | ||
"<a href=\"https://colab.research.google.com/github/run-llama/llama_index/blob/main/docs/docs/examples/vector_stores/pinecone_metadata_filter.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | ||
] | ||
}, | ||
{ | ||
"attachments": {}, | ||
"cell_type": "markdown", | ||
"id": "307804a3-c02b-4a57-ac0d-172c30ddc851", | ||
"metadata": {}, | ||
"source": [ | ||
"# Neo4j Vector Store - Metadata Filter" | ||
] | ||
}, | ||
{ | ||
"attachments": {}, | ||
"cell_type": "markdown", | ||
"id": "3ceaf5c9", | ||
"metadata": {}, | ||
"source": [ | ||
"If you're opening this Notebook on colab, you will probably need to install LlamaIndex 🦙." | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "e5715fa1", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"%pip install llama-index-vector-stores-neo4jvector" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "04b9d10f", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# !pip install llama-index>=0.9.31 neo4j" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "d48af8e1", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import logging\n", | ||
"import sys\n", | ||
"import os\n", | ||
"\n", | ||
"logging.basicConfig(stream=sys.stdout, level=logging.INFO)\n", | ||
"logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))" | ||
] | ||
}, | ||
{ | ||
"attachments": {}, | ||
"cell_type": "markdown", | ||
"id": "f7010b1d-d1bb-4f08-9309-a328bb4ea396", | ||
"metadata": {}, | ||
"source": [ | ||
"Build a Neo4j vector Index and connect to it" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "4ad14111-0bbb-4c62-906d-6d6253e0cdee", | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"INFO:numexpr.utils:Note: NumExpr detected 16 cores but \"NUMEXPR_MAX_THREADS\" not set, so enforcing safe limit of 8.\n", | ||
"Note: NumExpr detected 16 cores but \"NUMEXPR_MAX_THREADS\" not set, so enforcing safe limit of 8.\n", | ||
"INFO:numexpr.utils:NumExpr defaulting to 8 threads.\n", | ||
"NumExpr defaulting to 8 threads.\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"import os\n", | ||
"from llama_index.vector_stores.neo4jvector import Neo4jVectorStore\n", | ||
"\n", | ||
"os.environ[\"OPENAI_API_KEY\"] = \"sk-...\"\n", | ||
"\n", | ||
"username = \"neo4j\"\n", | ||
"password = \"password\"\n", | ||
"url = \"bolt://localhost:7687\"\n", | ||
"embed_dim = 1536 # Dimensions are for text-embedding-ada-002\n", | ||
"\n", | ||
"vector_store = Neo4jVectorStore(username, password, url, embed_dim)" | ||
] | ||
}, | ||
{ | ||
"attachments": {}, | ||
"cell_type": "markdown", | ||
"id": "8ee4473a-094f-4d0a-a825-e1213db07240", | ||
"metadata": {}, | ||
"source": [ | ||
"Build the VectorStoreIndex" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "9ae59590", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"from llama_index.core import VectorStoreIndex, StorageContext\n", | ||
"from llama_index.core.schema import TextNode\n", | ||
"\n", | ||
"nodes = [\n", | ||
" TextNode(\n", | ||
" text=\"The Shawshank Redemption\",\n", | ||
" metadata={\n", | ||
" \"author\": \"Stephen King\",\n", | ||
" \"theme\": \"Friendship\",\n", | ||
" \"year\": 1994,\n", | ||
" },\n", | ||
" ),\n", | ||
" TextNode(\n", | ||
" text=\"The Godfather\",\n", | ||
" metadata={\n", | ||
" \"director\": \"Francis Ford Coppola\",\n", | ||
" \"theme\": \"Mafia\",\n", | ||
" \"year\": 1972,\n", | ||
" },\n", | ||
" ),\n", | ||
" TextNode(\n", | ||
" text=\"Inception\",\n", | ||
" metadata={\n", | ||
" \"director\": \"Christopher Nolan\",\n", | ||
" \"theme\": \"Fiction\",\n", | ||
" \"year\": 2010,\n", | ||
" },\n", | ||
" ),\n", | ||
" TextNode(\n", | ||
" text=\"To Kill a Mockingbird\",\n", | ||
" metadata={\n", | ||
" \"author\": \"Harper Lee\",\n", | ||
" \"theme\": \"Mafia\",\n", | ||
" \"year\": 1960,\n", | ||
" },\n", | ||
" ),\n", | ||
" TextNode(\n", | ||
" text=\"1984\",\n", | ||
" metadata={\n", | ||
" \"author\": \"George Orwell\",\n", | ||
" \"theme\": \"Totalitarianism\",\n", | ||
" \"year\": 1949,\n", | ||
" },\n", | ||
" ),\n", | ||
" TextNode(\n", | ||
" text=\"The Great Gatsby\",\n", | ||
" metadata={\n", | ||
" \"author\": \"F. Scott Fitzgerald\",\n", | ||
" \"theme\": \"The American Dream\",\n", | ||
" \"year\": 1925,\n", | ||
" },\n", | ||
" ),\n", | ||
" TextNode(\n", | ||
" text=\"Harry Potter and the Sorcerer's Stone\",\n", | ||
" metadata={\n", | ||
" \"author\": \"J.K. Rowling\",\n", | ||
" \"theme\": \"Fiction\",\n", | ||
" \"year\": 1997,\n", | ||
" },\n", | ||
" ),\n", | ||
"]" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "ee6eeecb-d54f-4a71-b5fe-0cda8a5c3e10", | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n", | ||
"HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"storage_context = StorageContext.from_defaults(vector_store=vector_store)\n", | ||
"index = VectorStoreIndex(nodes, storage_context=storage_context)" | ||
] | ||
}, | ||
{ | ||
"attachments": {}, | ||
"cell_type": "markdown", | ||
"id": "8f0f49cf", | ||
"metadata": {}, | ||
"source": [ | ||
"Define metadata filters" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "22157658", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"from llama_index.core.vector_stores import (\n", | ||
" MetadataFilter,\n", | ||
" MetadataFilters,\n", | ||
" FilterOperator,\n", | ||
")\n", | ||
"\n", | ||
"filters = MetadataFilters(\n", | ||
" filters=[\n", | ||
" MetadataFilter(\n", | ||
" key=\"theme\", operator=FilterOperator.EQ, value=\"Fiction\"\n", | ||
" ),\n", | ||
" ]\n", | ||
")" | ||
] | ||
}, | ||
{ | ||
"attachments": {}, | ||
"cell_type": "markdown", | ||
"id": "f31c16b3", | ||
"metadata": {}, | ||
"source": [ | ||
"Retrieve from vector store with filters" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "147df357", | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n", | ||
"HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n" | ||
] | ||
}, | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"[NodeWithScore(node=TextNode(id_='814e5f2a-2150-4bae-8a59-fa728379e978', embedding=None, metadata={'director': 'Christopher Nolan', 'theme': 'Fiction', 'year': 2010}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, text='Inception', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\\n\\n{content}', metadata_template='{key}: {value}', metadata_seperator='\\n'), score=0.9202238321304321),\n", | ||
" NodeWithScore(node=TextNode(id_='fc1df8cc-f1d3-4a7b-8c21-f83b18463758', embedding=None, metadata={'author': 'J.K. Rowling', 'theme': 'Fiction', 'year': 1997}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, text=\"Harry Potter and the Sorcerer's Stone\", start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\\n\\n{content}', metadata_template='{key}: {value}', metadata_seperator='\\n'), score=0.8823964595794678)]" | ||
] | ||
}, | ||
"execution_count": null, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"retriever = index.as_retriever(filters=filters)\n", | ||
"retriever.retrieve(\"What is inception about?\")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "88c105a6", | ||
"metadata": {}, | ||
"source": [ | ||
"Multiple Metadata Filters with `AND` condition" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "df8df0a7", | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n", | ||
"HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n" | ||
] | ||
}, | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"[NodeWithScore(node=TextNode(id_='814e5f2a-2150-4bae-8a59-fa728379e978', embedding=None, metadata={'director': 'Christopher Nolan', 'theme': 'Fiction', 'year': 2010}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, text='Inception', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\\n\\n{content}', metadata_template='{key}: {value}', metadata_seperator='\\n'), score=0.8818434476852417)]" | ||
] | ||
}, | ||
"execution_count": null, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"from llama_index.core.vector_stores import FilterOperator, FilterCondition\n", | ||
"\n", | ||
"filters = MetadataFilters(\n", | ||
" filters=[\n", | ||
" MetadataFilter(key=\"theme\", value=\"Fiction\"),\n", | ||
" MetadataFilter(key=\"year\", value=1997, operator=FilterOperator.GT),\n", | ||
" ],\n", | ||
" condition=FilterCondition.AND,\n", | ||
")\n", | ||
"\n", | ||
"retriever = index.as_retriever(filters=filters)\n", | ||
"retriever.retrieve(\"Harry Potter?\")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "3731d3b2", | ||
"metadata": {}, | ||
"source": [ | ||
"Multiple Metadata Filters with `OR` condition" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "5eaa5d4d", | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n", | ||
"HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n" | ||
] | ||
}, | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"[NodeWithScore(node=TextNode(id_='fc1df8cc-f1d3-4a7b-8c21-f83b18463758', embedding=None, metadata={'author': 'J.K. Rowling', 'theme': 'Fiction', 'year': 1997}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, text=\"Harry Potter and the Sorcerer's Stone\", start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\\n\\n{content}', metadata_template='{key}: {value}', metadata_seperator='\\n'), score=0.9242331385612488),\n", | ||
" NodeWithScore(node=TextNode(id_='814e5f2a-2150-4bae-8a59-fa728379e978', embedding=None, metadata={'director': 'Christopher Nolan', 'theme': 'Fiction', 'year': 2010}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, text='Inception', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\\n\\n{content}', metadata_template='{key}: {value}', metadata_seperator='\\n'), score=0.8818434476852417)]" | ||
] | ||
}, | ||
"execution_count": null, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"from llama_index.core.vector_stores import FilterOperator, FilterCondition\n", | ||
"\n", | ||
"\n", | ||
"filters = MetadataFilters(\n", | ||
" filters=[\n", | ||
" MetadataFilter(key=\"theme\", value=\"Fiction\"),\n", | ||
" MetadataFilter(key=\"year\", value=1997, operator=FilterOperator.GT),\n", | ||
" ],\n", | ||
" condition=FilterCondition.OR,\n", | ||
")\n", | ||
"\n", | ||
"retriever = index.as_retriever(filters=filters)\n", | ||
"retriever.retrieve(\"Harry Potter?\")" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3 (ipykernel)", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 5 | ||
} |
Oops, something went wrong.