From 1866e645974894d290f032abd64919eb1101d2ab Mon Sep 17 00:00:00 2001 From: Julian Minder Date: Thu, 16 Nov 2023 20:15:38 +0100 Subject: [PATCH] added examples --- examples/example_iris.ipynb | 190 ++++++++++++++++++++++++++ examples/example_northwind.ipynb | 220 +++++++++++++++++++++++++++++++ 2 files changed, 410 insertions(+) create mode 100644 examples/example_iris.ipynb create mode 100644 examples/example_northwind.ipynb diff --git a/examples/example_iris.ipynb b/examples/example_iris.ipynb new file mode 100644 index 0000000..52953bf --- /dev/null +++ b/examples/example_iris.ipynb @@ -0,0 +1,190 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Hack to make the module importable\n", + "import sys\n", + "sys.path.append(r'./../')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2\n", + "import neo4j\n", + "import pandas as pd\n", + "\n", + "from rel2graph.relational_modules.pandas import PandasDataFrameIterator\n", + "from rel2graph import IteratorIterator\n", + "from rel2graph import Converter\n", + "from rel2graph.utils import load_file\n", + "from rel2graph import register_attribute_postprocessor, Attribute\n", + "\n", + "import rel2graph.common_modules.types # For FLOAT, INT, etc. wrappers\n", + "# This is required because the pandas dataframe iterator will convert all values \n", + "# to int64 which is not supported by neo4j" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Configure Logging\n", + "import logging\n", + "\n", + "#logging.basicConfig(level=logging.WARNING)\n", + "logger = logging.getLogger(\"rel2graph\")\n", + "logger.setLevel(logging.INFO)\n", + "log_formatter = logging.Formatter(\"%(asctime)s [%(threadName)s]::[%(levelname)s]::%(filename)s: %(message)s\")\n", + "console_handler = logging.StreamHandler()\n", + "console_handler.setFormatter(log_formatter)\n", + "logger.addHandler(console_handler)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "iris = pd.read_csv('https://raw.githubusercontent.com/mwaskom/seaborn-data/master/iris.csv')\n", + "iris" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "data = {\n", + " \"ID\": [1,2,2,3,4,4],\n", + " \"FirstName\": [\"Julian\", \"Fritz\", \"Fritz\", \"Hans\", \"Rudolfo\", \"Rudolfo\"],\n", + " \"LastName\": [\"Minder\", \"Generic\", \"SomeGuy\", \"Müller\", \"Muster\", \"Muster\"],\n", + " \"FavoriteFlower\": [\"virginica\", \"setosa\", \"setosa\", \"versicolor\", \"setosa\", \"setosa\"]\n", + "}\n", + "people = pd.DataFrame(data)\n", + "people" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "schema = \"\"\"\n", + "ENTITY(\"Flower\"):\n", + " NODE(\"Flower\") flower:\n", + " - sepal_length = FLOAT(Flower.sepal_length)\n", + " - petal_length = FLOAT(Flower.petal_width)\n", + " - sepal_width = FLOAT(Flower.sepal_width)\n", + " - petal_width = FLOAT(Flower.petal_width)\n", + " NODE(\"Species\", \"BioEntity\") species:\n", + " + Name = Flower.species\n", + " RELATIONSHIP(flower, \"is\", species):\n", + " \n", + "ENTITY(\"Person\"):\n", + " NODE(\"Person\") person:\n", + " + ID = INT(Person.ID)\n", + " - FirstName = Person.FirstName\n", + " - LastName = Person.LastName\n", + "\n", + " RELATIONSHIP(person, \"likes\", MATCH(\"Species\", Name=Person.FavoriteFlower)):\n", + " - Since = \"4ever\"\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "uri = \"bolt://localhost:7687\"\n", + "auth = neo4j.basic_auth(\"neo4j\", \"password\") # CHANGE TO YOUR CREDENTIALS" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Delete all nodes and relationships\n", + "driver = neo4j.GraphDatabase().driver(uri, auth=auth)\n", + "with driver.session() as session:\n", + " session.run(\"MATCH (n) DETACH DELETE n\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "iterator = IteratorIterator([PandasDataFrameIterator(people, \"Person\"), PandasDataFrameIterator(iris, \"Flower\")])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "converter = Converter(schema, iterator, uri, auth, num_workers=1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from tqdm.notebook import tqdm\n", + "converter(progress_bar=tqdm)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "interpreter": { + "hash": "b412206d49013109e888184d145344cd80b977ea9059b5a051a9ff53a4d07d7f" + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/example_northwind.ipynb b/examples/example_northwind.ipynb new file mode 100644 index 0000000..573a878 --- /dev/null +++ b/examples/example_northwind.ipynb @@ -0,0 +1,220 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Hack to make the module importable\n", + "import sys\n", + "sys.path.append(r'./../')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2\n", + "import neo4j\n", + "import pandas as pd\n", + "\n", + "from rel2graph.relational_modules.pandas import PandasDataFrameIterator\n", + "from rel2graph import IteratorIterator\n", + "from rel2graph import Converter\n", + "from rel2graph.utils import load_file\n", + "from rel2graph import register_subgraph_preprocessor\n", + "\n", + "import rel2graph.common_modules.types # For FLOAT, INT, etc. wrappers\n", + "# This is required because the pandas dataframe iterator will convert all values \n", + "# to int64 which is not supported by neo4j" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Configure Logging\n", + "import logging\n", + "\n", + "#logging.basicConfig(level=logging.WARNING)\n", + "logger = logging.getLogger(\"rel2graph\")\n", + "logger.setLevel(logging.INFO)\n", + "log_formatter = logging.Formatter(\"%(asctime)s [%(threadName)s]::[%(levelname)s]::%(filename)s: %(message)s\")\n", + "console_handler = logging.StreamHandler()\n", + "console_handler.setFormatter(log_formatter)\n", + "logger.addHandler(console_handler)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "schema = \"\"\"\n", + "ENTITY(\"orders\"):\n", + " NODE(\"Order\") order:\n", + " + orderID = INT(orders.OrderID)\n", + " - shipName = orders.ShipName\n", + " NODE(\"Product\") product:\n", + " + productID = INT(products.ProductID)\n", + " NODE(\"Employee\") employee:\n", + " + employeeID = INT(employees.EmployeeID)\n", + " \n", + " RELATIONSHIP(order, \"CONTAINS\", product):\n", + " - unitPrice = FLOAT(orders.UnitPrice)\n", + " - quantity = FLOAT(orders.Quantity)\n", + "\n", + " RELATIONSHIP(employee, \"SOLD\", order):\n", + "\n", + "\n", + "ENTITY(\"suppliers\"):\n", + " NODE(\"Supplier\") supplier:\n", + " + supplierID = INT(suppliers.SupplierID)\n", + " - companyName = suppliers.CompanyName\n", + "\n", + "\n", + "ENTITY(\"products\"):\n", + " NODE(\"Product\") product:\n", + " + productID = INT(products.ProductID)\n", + " - productName = products.ProductName\n", + " - unitPrice = FLOAT(products.UnitPrice)\n", + "\n", + " NODE(\"Supplier\") supplier:\n", + " + supplierID = INT(suppliers.SupplierID)\n", + " \n", + " NODE(\"Category\") category:\n", + " + categoryID = INT(categories.CategoryID)\n", + "\n", + " RELATIONSHIP(supplier, \"SUPPLIES\", product):\n", + " \n", + " RELATIONSHIP(product, \"PART_OF\", category):\n", + "\n", + "\n", + "ENTITY(\"employees\"):\n", + " NODE(\"Employee\") employee:\n", + " + employeeID = INT(employees.EmployeeID)\n", + " - firstName = employees.FirstName\n", + " - lastName = employees.LastName\n", + " - title = employees.Title\n", + "\n", + " IF_HAS_BOSS(RELATIONSHIP(employee, \"REPORTS_TO\", MATCH(\"Employee\", employeeID = INT(employees.ReportsTo)))):\n", + "\n", + "\n", + "ENTITY(\"categories\"):\n", + " NODE(\"Category\") category:\n", + " + categoryID = INT(categories.CategoryID)\n", + " - categoryName = categories.CategoryName\n", + " - description = categories.Description\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "@register_subgraph_preprocessor\n", + "def IF_HAS_BOSS(resource):\n", + " if pd.isna(resource[\"ReportsTo\"]):\n", + " return None\n", + " return resource" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "uri = \"bolt://localhost:7687\"\n", + "auth = neo4j.basic_auth(\"neo4j\", \"password\") # CHANGE TO YOUR CREDENTIALS" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Delete all nodes and relationships\n", + "driver = neo4j.GraphDatabase().driver(uri, auth=auth)\n", + "with driver.session() as session:\n", + " session.run(\"MATCH (n) DETACH DELETE n\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Create IteratorIterator\n", + "files = [\"categories\", \"employees\", \"orders\", \"products\", \"suppliers\"]\n", + "iterators = []\n", + "for file in files:\n", + " df = pd.read_csv(f\"https://raw.githubusercontent.com/neo4j-documentation/developer-resources/gh-pages/data/northwind/{file}.csv\")\n", + " iterators.append(PandasDataFrameIterator(df, file))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "iterator = IteratorIterator(iterators)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "converter = Converter(schema, iterator, uri, auth, num_workers=1, serialize=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from tqdm.notebook import tqdm\n", + "converter(progress_bar=tqdm)" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "b412206d49013109e888184d145344cd80b977ea9059b5a051a9ff53a4d07d7f" + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}