diff --git a/examples/example_iris.ipynb b/examples/example_iris.ipynb new file mode 100644 index 0000000..52953bf --- /dev/null +++ b/examples/example_iris.ipynb @@ -0,0 +1,190 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Hack to make the module importable\n", + "import sys\n", + "sys.path.append(r'./../')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2\n", + "import neo4j\n", + "import pandas as pd\n", + "\n", + "from rel2graph.relational_modules.pandas import PandasDataFrameIterator\n", + "from rel2graph import IteratorIterator\n", + "from rel2graph import Converter\n", + "from rel2graph.utils import load_file\n", + "from rel2graph import register_attribute_postprocessor, Attribute\n", + "\n", + "import rel2graph.common_modules.types # For FLOAT, INT, etc. wrappers\n", + "# This is required because the pandas dataframe iterator will convert all values \n", + "# to int64 which is not supported by neo4j" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Configure Logging\n", + "import logging\n", + "\n", + "#logging.basicConfig(level=logging.WARNING)\n", + "logger = logging.getLogger(\"rel2graph\")\n", + "logger.setLevel(logging.INFO)\n", + "log_formatter = logging.Formatter(\"%(asctime)s [%(threadName)s]::[%(levelname)s]::%(filename)s: %(message)s\")\n", + "console_handler = logging.StreamHandler()\n", + "console_handler.setFormatter(log_formatter)\n", + "logger.addHandler(console_handler)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "iris = pd.read_csv('https://raw.githubusercontent.com/mwaskom/seaborn-data/master/iris.csv')\n", + "iris" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "data = {\n", + " \"ID\": [1,2,2,3,4,4],\n", + " \"FirstName\": [\"Julian\", \"Fritz\", \"Fritz\", \"Hans\", \"Rudolfo\", \"Rudolfo\"],\n", + " \"LastName\": [\"Minder\", \"Generic\", \"SomeGuy\", \"Müller\", \"Muster\", \"Muster\"],\n", + " \"FavoriteFlower\": [\"virginica\", \"setosa\", \"setosa\", \"versicolor\", \"setosa\", \"setosa\"]\n", + "}\n", + "people = pd.DataFrame(data)\n", + "people" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "schema = \"\"\"\n", + "ENTITY(\"Flower\"):\n", + " NODE(\"Flower\") flower:\n", + " - sepal_length = FLOAT(Flower.sepal_length)\n", + " - petal_length = FLOAT(Flower.petal_width)\n", + " - sepal_width = FLOAT(Flower.sepal_width)\n", + " - petal_width = FLOAT(Flower.petal_width)\n", + " NODE(\"Species\", \"BioEntity\") species:\n", + " + Name = Flower.species\n", + " RELATIONSHIP(flower, \"is\", species):\n", + " \n", + "ENTITY(\"Person\"):\n", + " NODE(\"Person\") person:\n", + " + ID = INT(Person.ID)\n", + " - FirstName = Person.FirstName\n", + " - LastName = Person.LastName\n", + "\n", + " RELATIONSHIP(person, \"likes\", MATCH(\"Species\", Name=Person.FavoriteFlower)):\n", + " - Since = \"4ever\"\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "uri = \"bolt://localhost:7687\"\n", + "auth = neo4j.basic_auth(\"neo4j\", \"password\") # CHANGE TO YOUR CREDENTIALS" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Delete all nodes and relationships\n", + "driver = neo4j.GraphDatabase().driver(uri, auth=auth)\n", + "with driver.session() as session:\n", + " session.run(\"MATCH (n) DETACH DELETE n\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "iterator = IteratorIterator([PandasDataFrameIterator(people, \"Person\"), PandasDataFrameIterator(iris, \"Flower\")])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "converter = Converter(schema, iterator, uri, auth, num_workers=1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from tqdm.notebook import tqdm\n", + "converter(progress_bar=tqdm)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "interpreter": { + "hash": "b412206d49013109e888184d145344cd80b977ea9059b5a051a9ff53a4d07d7f" + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/example_northwind.ipynb b/examples/example_northwind.ipynb new file mode 100644 index 0000000..573a878 --- /dev/null +++ b/examples/example_northwind.ipynb @@ -0,0 +1,220 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Hack to make the module importable\n", + "import sys\n", + "sys.path.append(r'./../')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2\n", + "import neo4j\n", + "import pandas as pd\n", + "\n", + "from rel2graph.relational_modules.pandas import PandasDataFrameIterator\n", + "from rel2graph import IteratorIterator\n", + "from rel2graph import Converter\n", + "from rel2graph.utils import load_file\n", + "from rel2graph import register_subgraph_preprocessor\n", + "\n", + "import rel2graph.common_modules.types # For FLOAT, INT, etc. wrappers\n", + "# This is required because the pandas dataframe iterator will convert all values \n", + "# to int64 which is not supported by neo4j" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Configure Logging\n", + "import logging\n", + "\n", + "#logging.basicConfig(level=logging.WARNING)\n", + "logger = logging.getLogger(\"rel2graph\")\n", + "logger.setLevel(logging.INFO)\n", + "log_formatter = logging.Formatter(\"%(asctime)s [%(threadName)s]::[%(levelname)s]::%(filename)s: %(message)s\")\n", + "console_handler = logging.StreamHandler()\n", + "console_handler.setFormatter(log_formatter)\n", + "logger.addHandler(console_handler)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "schema = \"\"\"\n", + "ENTITY(\"orders\"):\n", + " NODE(\"Order\") order:\n", + " + orderID = INT(orders.OrderID)\n", + " - shipName = orders.ShipName\n", + " NODE(\"Product\") product:\n", + " + productID = INT(products.ProductID)\n", + " NODE(\"Employee\") employee:\n", + " + employeeID = INT(employees.EmployeeID)\n", + " \n", + " RELATIONSHIP(order, \"CONTAINS\", product):\n", + " - unitPrice = FLOAT(orders.UnitPrice)\n", + " - quantity = FLOAT(orders.Quantity)\n", + "\n", + " RELATIONSHIP(employee, \"SOLD\", order):\n", + "\n", + "\n", + "ENTITY(\"suppliers\"):\n", + " NODE(\"Supplier\") supplier:\n", + " + supplierID = INT(suppliers.SupplierID)\n", + " - companyName = suppliers.CompanyName\n", + "\n", + "\n", + "ENTITY(\"products\"):\n", + " NODE(\"Product\") product:\n", + " + productID = INT(products.ProductID)\n", + " - productName = products.ProductName\n", + " - unitPrice = FLOAT(products.UnitPrice)\n", + "\n", + " NODE(\"Supplier\") supplier:\n", + " + supplierID = INT(suppliers.SupplierID)\n", + " \n", + " NODE(\"Category\") category:\n", + " + categoryID = INT(categories.CategoryID)\n", + "\n", + " RELATIONSHIP(supplier, \"SUPPLIES\", product):\n", + " \n", + " RELATIONSHIP(product, \"PART_OF\", category):\n", + "\n", + "\n", + "ENTITY(\"employees\"):\n", + " NODE(\"Employee\") employee:\n", + " + employeeID = INT(employees.EmployeeID)\n", + " - firstName = employees.FirstName\n", + " - lastName = employees.LastName\n", + " - title = employees.Title\n", + "\n", + " IF_HAS_BOSS(RELATIONSHIP(employee, \"REPORTS_TO\", MATCH(\"Employee\", employeeID = INT(employees.ReportsTo)))):\n", + "\n", + "\n", + "ENTITY(\"categories\"):\n", + " NODE(\"Category\") category:\n", + " + categoryID = INT(categories.CategoryID)\n", + " - categoryName = categories.CategoryName\n", + " - description = categories.Description\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "@register_subgraph_preprocessor\n", + "def IF_HAS_BOSS(resource):\n", + " if pd.isna(resource[\"ReportsTo\"]):\n", + " return None\n", + " return resource" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "uri = \"bolt://localhost:7687\"\n", + "auth = neo4j.basic_auth(\"neo4j\", \"password\") # CHANGE TO YOUR CREDENTIALS" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Delete all nodes and relationships\n", + "driver = neo4j.GraphDatabase().driver(uri, auth=auth)\n", + "with driver.session() as session:\n", + " session.run(\"MATCH (n) DETACH DELETE n\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Create IteratorIterator\n", + "files = [\"categories\", \"employees\", \"orders\", \"products\", \"suppliers\"]\n", + "iterators = []\n", + "for file in files:\n", + " df = pd.read_csv(f\"https://raw.githubusercontent.com/neo4j-documentation/developer-resources/gh-pages/data/northwind/{file}.csv\")\n", + " iterators.append(PandasDataFrameIterator(df, file))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "iterator = IteratorIterator(iterators)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "converter = Converter(schema, iterator, uri, auth, num_workers=1, serialize=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from tqdm.notebook import tqdm\n", + "converter(progress_bar=tqdm)" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "b412206d49013109e888184d145344cd80b977ea9059b5a051a9ff53a4d07d7f" + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/rel2graph/core/converter.py b/rel2graph/core/converter.py index f6013b8..be5b960 100644 --- a/rel2graph/core/converter.py +++ b/rel2graph/core/converter.py @@ -28,7 +28,7 @@ class WorkType(IntEnum): NODE = 0 - RELATION = 1 + RELATIONSHIP = 1 __process_config = None @@ -46,7 +46,7 @@ class WorkerConfig: nodes_flag: Flag that is set when the worker should process nodes processed_resources: Counter for the number of processed resources processed_nodes: Counter for the number of processed nodes - processed_relations: Counter for the number of processed relations + processed_relationships: Counter for the number of processed relationships processed_lock: Lock to ensure that only one process is writing to the counters at a time """ @@ -57,7 +57,7 @@ def __init__(self, neo4j_uri: str, neo4j_auth: Auth) -> None: neo4j_uri: The uri of the neo4j database neo4j_auth: The authentication for the neo4j database """ - self.factories, self.node_mask, self.relation_mask = None, None, None + self.factories, self.node_mask, self.relationship_mask = None, None, None self.graph_lock = mp.Lock() self.neo4j_uri = neo4j_uri self.neo4j_auth = neo4j_auth @@ -66,7 +66,7 @@ def __init__(self, neo4j_uri: str, neo4j_auth: Auth) -> None: self.nodes_flag = mp.Event() self.processed_resources = mp.Value('i', 0) self.processed_nodes = mp.Value('i', 0) - self.processed_relations = mp.Value('i', 0) + self.processed_relationships = mp.Value('i', 0) self.processed_lock = mp.Lock() self._graph_driver = None @@ -101,14 +101,14 @@ def commit_wrap(function): def commit_batch(to_create: Subgraph, to_merge: Subgraph) -> None: """"Commits processed batch to graph.""" nodes_committed = 0 - relations_committed = 0 + relationships_committed = 0 # Creating does not rely on synchronous executions if len(to_create.nodes) + len(to_create.relationships) > 0: with __process_config.graph_driver.session() as session: commit_wrap(lambda: session.execute_write(to_create.__db_create__)) nodes_committed += len(to_create.nodes) - relations_committed += len(to_create.relationships) + relationships_committed += len(to_create.relationships) # Merging nodes requires serialization (synchronous executions) between processes # Using locks to enforce this @@ -117,13 +117,13 @@ def commit_batch(to_create: Subgraph, to_merge: Subgraph) -> None: with __process_config.graph_driver.session() as session: commit_wrap(lambda: session.execute_write(to_merge.__db_merge__)) nodes_committed += len(to_merge.nodes) - relations_committed += len(to_merge.relationships) + relationships_committed += len(to_merge.relationships) # Update the processed nodes and relations - if nodes_committed > 0 or relations_committed > 0: + if nodes_committed > 0 or relationships_committed > 0: with __process_config.processed_lock: __process_config.processed_nodes.value += nodes_committed - __process_config.processed_relations.value += relations_committed + __process_config.processed_relationships.value += relationships_committed def process_batch(batch) -> None: """ @@ -134,8 +134,8 @@ def process_batch(batch) -> None: # __process_config is a global variable that contains the configuration for the current process batch = pickle.loads(batch) try: - work_type = WorkType.NODE if __process_config.nodes_flag.is_set() else WorkType.RELATION - mask = __process_config.node_mask if work_type == WorkType.NODE else __process_config.relation_mask + work_type = WorkType.NODE if __process_config.nodes_flag.is_set() else WorkType.RELATIONSHIP + mask = __process_config.node_mask if work_type == WorkType.NODE else __process_config.relationship_mask to_merge = [[], []] # List of resources to merge (nodes, rels) to_create = [[], []] # List of resources to create (nodes, rels) processed_resources = [] @@ -154,15 +154,13 @@ def process_batch(batch) -> None: try: subgraph = factory.construct(resource) except Exception as err: - err.args += (f"Encountered error when processing {'nodes' if __process_config.nodes_flag.is_set() else 'relations'} of {resource}.",) + err.args += (f"Encountered error when processing {'nodes' if __process_config.nodes_flag.is_set() else 'relationships'} of {resource}.",) raise err # We sort the subgraph based on if its parts should be # merged or just created. This is selected based on if the - # __primarykey__ property is set. Note that for relations - # this only matters if you use the GraphWithParallelRelations from - # rel2graph.py2neo_extensions (otherwise relations are always merged) + # __primarykey__ property is set. for node in subgraph.nodes: if node.__primarykey__ is not None: @@ -170,15 +168,15 @@ def process_batch(batch) -> None: to_merge[0].append(node) else: to_create[0].append(node) - for relation in subgraph.relationships: - if getattr(relation, "__primarykey__", None) is not None: - # If a primary key is existing we merge the relation to the graph - to_merge[1].append(relation) + for relationship in subgraph.relationships: + if getattr(relationship, "__primarykey__", None) is not None: + # If a primary key is existing we merge the relationship to the graph + to_merge[1].append(relationship) pass else: - # If no primary key is existing we create the relation to the graph - relation.__primarykey__ = -1 - to_create[1].append(relation) + # If no primary key is existing we create the relationship to the graph + relationship.__primarykey__ = -1 + to_create[1].append(relationship) processed_resources.append(resource) @@ -201,7 +199,7 @@ def process_batch(batch) -> None: bin_resources = pickle.dumps(processed_resources) return bin_resources else: - # No need to return anything for relations as no synchronization is needed + # No need to return anything for relationship as no synchronization is needed return [] class Batcher: @@ -245,10 +243,10 @@ def init_process_state(proc_config: WorkerConfig, conversion_objects: Tuple, glo __process_config.setup() # Load the conversion objects - factories,node_mask,relation_mask = conversion_objects + factories,node_mask,relationship_mask = conversion_objects __process_config.factories = factories __process_config.node_mask = node_mask - __process_config.relation_mask = relation_mask + __process_config.relationship_mask = relationship_mask # Set driver for matcher # TODO: This is a hacky way to set the matcher to the graph. @@ -294,7 +292,9 @@ def __init__(self, schema: str, iterator: ResourceIterator, neo4j_uri: str, neo4 driver.close() # Compile the schema - self._factories, self._node_mask, self._relation_mask = compile_schema(schema) + if "RELATION(" in schema: + raise DeprecationWarning("The RELATION keyword is deprecated. Please use RELATIONSHIP instead.") + self._factories, self._node_mask, self._relationship_mask = compile_schema(schema) self.iterator = iterator self._num_workers = num_workers @@ -334,17 +334,17 @@ def _process_iteration(self, pool: mp.Pool, iterator: Iterable, config: WorkerCo raise e return processed_resources - def __call__(self, progress_bar: "tdqm.tqdm" = None, skip_nodes = False, skip_relations = False) -> None: - """Runs the convertion and commits the produced nodes and relations to the graph. + def __call__(self, progress_bar: "tdqm.tqdm" = None, skip_nodes = False, skip_relationships = False) -> None: + """Runs the convertion and commits the produced nodes and relationships to the graph. Args: progress_bar: An optional tqdm like instance for a progress bar. skip_nodes: (default: False) If true creation of nodes will be skiped. ATTENTION: this might lead to problems if you use identifiers. - skip_relation: If true creation of relations will be skiped (default: False) + skip_relationships: If true creation of relationships will be skiped (default: False) """ config = WorkerConfig(self._neo4j_uri, self._neo4j_auth) - conversion_objects = (self._factories, self._node_mask, self._relation_mask) + conversion_objects = (self._factories, self._node_mask, self._relationship_mask) # Handle progress bar (create new or update it) pb = None @@ -372,9 +372,9 @@ def __call__(self, progress_bar: "tdqm.tqdm" = None, skip_nodes = False, skip_re else: logger.info("Skipping creation of nodes.") - if not skip_relations: - config.set_work_type(WorkType.RELATION) - logger.info("Starting creation of relations.") + if not skip_relationships: + config.set_work_type(WorkType.RELATIONSHIP) + logger.info("Starting creation of relationships.") self._process_iteration(pool, processed_batches, config) else: @@ -396,12 +396,12 @@ def __call__(self, progress_bar: "tdqm.tqdm" = None, skip_nodes = False, skip_re else: logger.info("Skipping creation of nodes.") - if not skip_relations: - config.set_work_type(WorkType.RELATION) + if not skip_relationships: + config.set_work_type(WorkType.RELATIONSHIP) logger.info("Starting creation of relations.") list(map(process_batch, processed_batches)) else: - logger.info("Skipping creation of relations.") + logger.info("Skipping creation of relationships.") finally: # Cleanup the process state cleanup_process_state() @@ -414,4 +414,4 @@ def __call__(self, progress_bar: "tdqm.tqdm" = None, skip_nodes = False, skip_re pb.refresh() time.sleep(0.1) pb.close() - logger.info(f"Processed in total {config.processed_nodes.value} nodes and {config.processed_relations.value} relations (this run took {int(time.time()-start)}s)") \ No newline at end of file + logger.info(f"Processed in total {config.processed_nodes.value} nodes and {config.processed_relationships.value} relationships (this run took {int(time.time()-start)}s)") \ No newline at end of file diff --git a/rel2graph/core/factories/factory.py b/rel2graph/core/factories/factory.py index e2fff51..9f76b02 100644 --- a/rel2graph/core/factories/factory.py +++ b/rel2graph/core/factories/factory.py @@ -187,7 +187,7 @@ def construct(self, resource: Resource) -> Node: return Node.from_attributes([l for l in labels if l is not None], [attr for attr in attributes if attr is not None], self._primary_key) @register_factory -class RelationFactory(SubgraphFactory): +class RelationshipFactory(SubgraphFactory): """Factory for creating Relations from a Resource The RelationFactory is initialised with two Matcher objects (from_matcher and to_matcher) that specify how to diff --git a/rel2graph/core/schema_compiler.py b/rel2graph/core/schema_compiler.py index 7af5881..1ee3f37 100644 --- a/rel2graph/core/schema_compiler.py +++ b/rel2graph/core/schema_compiler.py @@ -44,7 +44,7 @@ class SchemaConfigParser: 'BOOL', 'ENTITY', 'NODE', - 'RELATION', + 'RELATIONSHIP', 'MATCH', 'NAME', 'DOT', @@ -57,10 +57,10 @@ class SchemaConfigParser: ) t_STRING = r'"(?:(?!"|\\).|\\.)*"|\'(?:(?!\'|\\).|\\.)*\'' - t_NAME = r'\b(?!\b(?:False|True|ENTITY|NODE|RELATION|MATCH)\b)[a-zA-Z_]\w*\b' + t_NAME = r'\b(?!\b(?:False|True|ENTITY|NODE|RELATIONSHIP|MATCH)\b)[a-zA-Z_]\w*\b' t_ENTITY = r'\bENTITY\b' t_NODE = r'\bNODE\b' - t_RELATION = r'\bRELATION\b' + t_RELATIONSHIP = r'\bRELATIONSHIP\b' t_MATCH = r'\bMATCH\b' t_DOT = r'\.(?!\d+\b)' t_COMMA = r'\,' @@ -154,7 +154,7 @@ def _inject_graphelement_args(instructions, attributes, identifier): Returns: Instructions with injected arguments, flag if this is a node """ - if instructions[0] in ["NodeFactory", "RelationFactory"]: + if instructions[0] in ["NodeFactory", "RelationshipFactory"]: is_node = instructions[0] == "NodeFactory" # Find primary attribute and extract attributes raw_attributes = [] @@ -177,7 +177,7 @@ def p_graphelements(self, p): '''graphelements : graphelement identifier COLON attributes graphelements | empty''' case = len(p)-1 - instructions = [[], []] # nodes, relations + instructions = [[], []] # nodes, relationships if case == 5: # We need to inject the attributes into the correct location in the graph element instructions # The attributes are always the first argument for any graphelement @@ -198,7 +198,7 @@ def p_identifier(self, p): def p_graphelement(self, p): '''graphelement : node - | relation + | relationship | NAME LPAR graphelement staticarguments RPAR''' case = len(p)-1 instructions = [] @@ -212,9 +212,9 @@ def p_node(self, p): '''node : NODE LPAR arguments RPAR''' p[0] = ["NodeFactory", [p[3]]] - def p_relation(self, p): - '''relation : RELATION LPAR destination COMMA argument COMMA destination RPAR''' - p[0] = ["RelationFactory", [p[5], p[3], p[7]]] + def p_relationship(self, p): + '''relationship : RELATIONSHIP LPAR destination COMMA argument COMMA destination RPAR''' + p[0] = ["RelationshipFactory", [p[5], p[3], p[7]]] def p_destination(self, p): '''destination : NAME @@ -381,7 +381,7 @@ def compile_schema(schema: str) -> List["Factory"]: schema: The schema as a string. Returns: A tuple (compiled_factory_dict, node_mask, relationship_mask) - compiled_factory_dict: A dict in form of (entity_type_name, (NodeSupplyChain, RelationSupplyChain)) + compiled_factory_dict: A dict in form of (entity_type_name, (NodeSupplyChain, RelationshipSupplyChain)) for all provided entity_types. node_mask: A set of all entities that produce a node. relationship_mask: A set of all entities that produce a relationship. @@ -392,18 +392,18 @@ def compile_schema(schema: str) -> List["Factory"]: parser = SchemaConfigParser() instructions = parser.parse(precompiled_string) compiled = {} - relation_mask = set() + relationship_mask = set() node_mask = set() for entity_type, entity_instructions in instructions: if entity_type in compiled.keys(): raise SchemaConfigException(f"Found two conflicting definitions of entity '{entity_type}'. Please only specify each entity once.") - node_instructions, relation_instructions = entity_instructions - node_factories, relation_factories = _compile_instructions(node_instructions), _compile_instructions(relation_instructions) + node_instructions, relationship_instructions = entity_instructions + node_factories, relationship_factories = _compile_instructions(node_instructions), _compile_instructions(relationship_instructions) compiled[entity_type] = (get_factory("SupplyChain")(node_factories, "NodeSupplyChain"), - get_factory("SupplyChain")(relation_factories, "RelationSupplyChain")) + get_factory("SupplyChain")(relationship_factories, "RelationSupplyChain")) if len(node_factories) > 0: node_mask.add(entity_type) - if len(relation_factories) > 0: - relation_mask.add(entity_type) - return compiled, node_mask, relation_mask + if len(relationship_factories) > 0: + relationship_mask.add(entity_type) + return compiled, node_mask, relationship_mask diff --git a/rel2graph/neo4j/graph_elements.py b/rel2graph/neo4j/graph_elements.py index d18719c..bb245e2 100644 --- a/rel2graph/neo4j/graph_elements.py +++ b/rel2graph/neo4j/graph_elements.py @@ -522,10 +522,10 @@ def __init__(self, start_node: Node, type: str, end_node: Node, **attributes) -> >>> a_knows_b = Relationship(a, "KNOWS", b, since=1999) Args: - start_node: Origin of the relation - end_node: Destination of the relation - type: Type of the relation - attributes: Key value pairs of attributes for the Relation + start_node: Origin of the relationship + end_node: Destination of the relationship + type: Type of the relationship + attributes: Key value pairs of attributes for the Relationship """ self._type = type diff --git a/rel2graph/relational_modules/odata.py b/rel2graph/relational_modules/odata.py deleted file mode 100644 index 42eb87c..0000000 --- a/rel2graph/relational_modules/odata.py +++ /dev/null @@ -1,66 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -""" -Implementation for odata as the relational module. - -authors: Julian Minder -""" - -from typing import Iterable, List -from .. import ResourceIterator -from .. import Resource -from pyodata.v2.service import EntityProxy - - - - -class ODataResource(Resource): - """Implementation of the oData Resource. Enables access to an odata entity""" - - def __init__(self, entity: EntityProxy) -> None: - super().__init__() - self._entity = entity - - @property - def type(self) -> str: - """Returns the type of the resource. Is used to select correct factory""" - return self._entity.entity_set.name - - def __getitem__(self, key): - """ - Gets the value with key 'key'. - """ - return getattr(self._entity, key) - - def __setitem__(self, key, value): - """ - Sets the value of with key 'key'. - """ - setattr(self._entity, key, value) - - def __repr__(self) -> str: - """ - Gets a string representation of the resource. Only used for logging. - """ - return f"{super().__repr__()} {self._entity}" - - @property - def odata_entity(self): - """Returns the oData Entity behind the resource""" - return self._entity - -class ODataListIterator(ResourceIterator): - """Implements a Iterator that works based on a list of oData Entities.""" - - def __init__(self, entities: List[EntityProxy]) -> None: - super().__init__() - self._entities = [ODataResource(entity) for entity in entities] - - def __iter__(self) -> Iterable: - """Returns the iterator itself in its initial state (must return the first resource).""" - return iter(self._entities) - - def __len__(self) -> None: - """Returns the total amount of resources in the iterator""" - return len(self._entities) \ No newline at end of file diff --git a/tests/integration/helpers.py b/tests/integration/helpers.py index d29eedb..b637250 100644 --- a/tests/integration/helpers.py +++ b/tests/integration/helpers.py @@ -59,7 +59,7 @@ def get_nodes(session, labels=[]): match_list = [Node.from_dict(r['labels'], r['properties'], identity=r["identity"]) for r in res] return match_list -def get_relations(session, types=[]): +def get_relationships(session, types=[]): if not isinstance(types, list) and not isinstance(types, tuple): types = [types] res = session.run("""MATCH (a)-[r{}]->(b) RETURN TYPE(r) as type, PROPERTIES(r) as properties, elementId(r) as identity, @@ -87,7 +87,7 @@ def eq_node(rnode, gnode): return False return True -def eq_relation(rrel, grel): +def eq_relationship(rrel, grel): # same type if rrel[1] != grel.type: return False @@ -116,23 +116,23 @@ def compare_nodes(session, result): break assert found, f"The following node was not found: {rnode}" -def compare_relations(session, result): - graph_relations = get_relations(session) - print("Graph Relations: ") - for grel in graph_relations: +def compare_relationships(session, result): + graph_relationships = get_relationships(session) + print("Graph Relationships: ") + for grel in graph_relationships: print("- ", grel) - print("Result Relations: ") - for rrel in result["relations"]: + print("Result Relationships: ") + for rrel in result["relationships"]: print("- ", rrel) - assert len(graph_relations) == len(result["relations"]), "Same number of relations" - for rrel in result["relations"]: + assert len(graph_relationships) == len(result["relationships"]), "Same number of relationship" + for rrel in result["relationships"]: found = False - for grel in graph_relations: - found = found or eq_relation(rrel, grel) + for grel in graph_relationships: + found = found or eq_relationship(rrel, grel) if found: break - assert found, f"The following relation was not found: {rrel}" + assert found, f"The following relationship was not found: {rrel}" def compare(session, result): compare_nodes(session, result) - compare_relations(session, result) \ No newline at end of file + compare_relationships(session, result) \ No newline at end of file diff --git a/tests/integration/resources/data_end_to_end.py b/tests/integration/resources/data_end_to_end.py index 53a7906..d6313b1 100644 --- a/tests/integration/resources/data_end_to_end.py +++ b/tests/integration/resources/data_end_to_end.py @@ -51,41 +51,41 @@ flower_nodes = [(["Flower"], {"sepal_length": f["sepal_length"], "sepal_width": f["sepal_width"], "petal_length": f["petal_length"], "petal_width": f["petal_width"]}) for f in iris[1].iloc] -is_relations = [((["Flower"], {"sepal_length": f["sepal_length"], "sepal_width": f["sepal_width"], +is_relationships = [((["Flower"], {"sepal_length": f["sepal_length"], "sepal_width": f["sepal_width"], "petal_length": f["petal_length"], "petal_width": f["petal_width"]}),"is", (["Species", "BioEntity", f["species"]], {"Name": f["species"]}), {}) for f in iris[1].iloc] -likes_relations = [((["Person"], {"ID": p["ID"], "FirstName": p["FirstName"], +likes_relationships = [((["Person"], {"ID": p["ID"], "FirstName": p["FirstName"], "Renamed": p["LastName"], "Static": "staticstring"}),"likes", (["Species", "BioEntity", p["FavoriteFlower"]], {"Name": p["FavoriteFlower"]}), {"Since":"4ever", "EntityAttribute": p["ID"]}) for p in no_duplicates[1].iloc] -likes_relations_parallel = [((["Person"], {"ID": 1, "FirstName": "Julian", "Renamed": "Minder", "Static": "staticstring"}),"likes_parallel", +likes_relationships_parallel = [((["Person"], {"ID": 1, "FirstName": "Julian", "Renamed": "Minder", "Static": "staticstring"}),"likes_parallel", (["Species", "BioEntity", "virginica"], {"Name": "virginica"}), {"pk": i}) for i in [1,2,3,4]] -likes_relations_parallel = [((["Person"], {"ID": 1, "FirstName": "Julian", "Renamed": "Minder", "Static": "staticstring"}),"likes_parallel", +likes_relationships_parallel = [((["Person"], {"ID": 1, "FirstName": "Julian", "Renamed": "Minder", "Static": "staticstring"}),"likes_parallel", (["Species", "BioEntity", "virginica"], {"Name": "virginica"}), {"pk": i}) for i in [1,2,3,4]] -likes_relations_merged = [((["Person"], {"ID": 1, "FirstName": "Julian", "Renamed": "Minder", "Static": "staticstring"}),"likes_merged", +likes_relationships_merged = [((["Person"], {"ID": 1, "FirstName": "Julian", "Renamed": "Minder", "Static": "staticstring"}),"likes_merged", (["Species", "BioEntity", "virginica"], {"Name": "virginica"}), {"pk": 1})] person_only_nodes_only_result = { "nodes": person_nodes, - "relations": [] + "relationships": [] } flower_only_result = { "nodes": flower_nodes + species_nodes, - "relations": is_relations + "relationships": is_relationships } full_result = { "nodes": person_nodes + species_nodes + flower_nodes, - "relations": is_relations + likes_relations + "relationships": is_relationships + likes_relationships } result_parallel = { "nodes": person_nodes + species_nodes, - "relations": likes_relations_parallel + likes_relations_merged + "relationships": likes_relationships_parallel + likes_relationships_merged } diff --git a/tests/integration/resources/schema_end_to_end.yaml b/tests/integration/resources/schema_end_to_end.yaml index 7f9b8e9..6a99b6a 100644 --- a/tests/integration/resources/schema_end_to_end.yaml +++ b/tests/integration/resources/schema_end_to_end.yaml @@ -8,7 +8,7 @@ ENTITY("Flower"): NODE("Species", "BioEntity", Flower.species) species: + Name = Flower.species - RELATION(flower, "is", species): + RELATIONSHIP(flower, "is", species): ENTITY("Person"): NODE("Person") person: @@ -16,7 +16,7 @@ ENTITY("Person"): - FirstName = Person.FirstName - Renamed = Person.LastName - Static = "staticstring" - MERGE_RELATIONSHIPS(RELATION(person, "likes", MATCH("Species", Name=Person.FavoriteFlower))): + MERGE_RELATIONSHIPS(RELATIONSHIP(person, "likes", MATCH("Species", Name=Person.FavoriteFlower))): - Since = "4ever" - EntityAttribute = INT(Person.ID) @@ -30,9 +30,9 @@ ENTITY("PersonParallel"): - FirstName = PersonParallel.FirstName - Renamed = PersonParallel.LastName - Static = "staticstring" - RELATION(MATCH("Person", ID = 1), "likes_parallel", MATCH("Species", Name="virginica")): + RELATIONSHIP(MATCH("Person", ID = 1), "likes_parallel", MATCH("Species", Name="virginica")): - pk = INT(PersonParallel.ID) - RELATION(MATCH("Person", ID = 1), "likes_merged", MATCH("Species", Name="virginica")): + RELATIONSHIP(MATCH("Person", ID = 1), "likes_merged", MATCH("Species", Name="virginica")): + pk = 1 ENTITY("Entity"): diff --git a/tests/integration/resources/schema_wrappers.yaml b/tests/integration/resources/schema_wrappers.yaml index 1ccbba3..d44a44d 100644 --- a/tests/integration/resources/schema_wrappers.yaml +++ b/tests/integration/resources/schema_wrappers.yaml @@ -17,10 +17,10 @@ ENTITY("SGPRE"): - First = SGPRE.First sg_pre_change(NODE("To")) to: sg_pre_condition(NODE("WillNotBeCreated")): - sg_pre_change_parametrized(RELATION(from, "relates to", to), "Second", "CHANGED"): + sg_pre_change_parametrized(RELATIONSHIP(from, "relates to", to), "Second", "CHANGED"): - Second = SGPRE.Second - First = SGPRE.First - sg_pre_condition(RELATION(from, "notexisting", to)): + sg_pre_condition(RELATIONSHIP(from, "notexisting", to)): ENTITY("SGPOST"): sg_post_add(NODE("From")) from: - First = SGPOST.First diff --git a/tests/integration/test_dependency.py b/tests/integration/test_dependency.py index 3f675a3..432428c 100644 --- a/tests/integration/test_dependency.py +++ b/tests/integration/test_dependency.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- """ -Integration tests for testing the py2neo extensions and behavior of parallel relations. +Integration tests for testing the py2neo extensions and behavior of parallel relationships. This is especially relevant due to the batch processing. authors: Julian Minder diff --git a/tests/integration/test_end_to_end.py b/tests/integration/test_end_to_end.py index 3e22b29..7267cad 100644 --- a/tests/integration/test_end_to_end.py +++ b/tests/integration/test_end_to_end.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- """ -Integration tests for converting pandas dataframes to graphs. +Integration tests for converting pandas dataframes to graphs - end to end tests. authors: Julian Minder """ @@ -92,7 +92,7 @@ def test_two_types(data_type_1, data_type_2, result, workers, batch_size, sessio "data_type_1,data_type_2,result", [(iris, no_duplicates, result_parallel)] ) -def test_parallel_relations(data_type_1, data_type_2, result, workers, batch_size, session, uri, auth): +def test_parallel_relationships(data_type_1, data_type_2, result, workers, batch_size, session, uri, auth): iterator = IteratorIterator([ PandasDataFrameIterator(data_type_1[1], data_type_1[0]+"Parallel"), PandasDataFrameIterator(data_type_2[1], data_type_2[0]+"Parallel") @@ -140,7 +140,7 @@ def test_serialize(session, uri, auth): data = pd.DataFrame({"ID": list(range(10)), "next": list(range(1,11))}) result = { "nodes": [(["Entity"], {"ID": i}) for i in range(5)], - "relations": [] + "relationships": [] } iterator = PandasDataFrameIterator(data, "Entity") # We run with batchsize 1 to make sure that the serialization is actually used diff --git a/tests/integration/test_merge_relationships.py b/tests/integration/test_merge_relationships.py index b433b6d..3382f18 100644 --- a/tests/integration/test_merge_relationships.py +++ b/tests/integration/test_merge_relationships.py @@ -28,12 +28,12 @@ def test_standart(config, session, uri, auth): NODE("Entity") node: + id = INT(Entity.id) - ENTITY("Relation"): - RELATION(MATCH("Entity", id = INT(Relation.source_id)), "RELATED_TO", MATCH("Entity", id = INT(Relation.target_id))): + ENTITY("Relationship"): + RELATIONSHIP(MATCH("Entity", id = INT(Relation.source_id)), "RELATED_TO", MATCH("Entity", id = INT(Relation.target_id))): """ entities = pd.DataFrame({"id": [1,2]}) relations = pd.DataFrame({"source_id": [1,1], "target_id": [2,2]}) - iterator = IteratorIterator([PandasDataFrameIterator(entities, "Entity"), PandasDataFrameIterator(relations, "Relation")]) + iterator = IteratorIterator([PandasDataFrameIterator(entities, "Entity"), PandasDataFrameIterator(relations, "Relationship")]) converter = Converter(schema, iterator, uri, auth, serialize=config[1], num_workers=config[0]) converter() assert num_relationships(session) == 2 @@ -45,13 +45,13 @@ def test_standart_same_resource(config, session, uri, auth): NODE("Entity") node: + id = INT(Entity.id) - ENTITY("Relation"): - RELATION(MATCH("Entity", id = INT(Relation.source_id)), "RELATED_TO", MATCH("Entity", id = INT(Relation.target_id))): - RELATION(MATCH("Entity", id = INT(Relation.source_id)), "RELATED_TO", MATCH("Entity", id = INT(Relation.target_id))): + ENTITY("Relationship"): + RELATIONSHIP(MATCH("Entity", id = INT(Relation.source_id)), "RELATED_TO", MATCH("Entity", id = INT(Relation.target_id))): + RELATIONSHIP(MATCH("Entity", id = INT(Relation.source_id)), "RELATED_TO", MATCH("Entity", id = INT(Relation.target_id))): """ entities = pd.DataFrame({"id": [1,2]}) relations = pd.DataFrame({"source_id": [1], "target_id": [2]}) - iterator = IteratorIterator([PandasDataFrameIterator(entities, "Entity"), PandasDataFrameIterator(relations, "Relation")]) + iterator = IteratorIterator([PandasDataFrameIterator(entities, "Entity"), PandasDataFrameIterator(relations, "Relationship")]) converter = Converter(schema, iterator, uri, auth, serialize=config[1], num_workers=config[0]) converter() assert num_relationships(session) == 2 @@ -65,7 +65,7 @@ def test_merge(config, session, uri, auth): + id = INT(Entity.id) ENTITY("Relation"): - MERGE_RELATIONSHIPS(RELATION(MATCH("Entity", id = INT(Relation.source_id)), "RELATED_TO", MATCH("Entity", id = INT(Relation.target_id)))): + MERGE_RELATIONSHIPS(RELATIONSHIP(MATCH("Entity", id = INT(Relation.source_id)), "RELATED_TO", MATCH("Entity", id = INT(Relation.target_id)))): """ entities = pd.DataFrame({"id": [1,2]}) relations = pd.DataFrame({"source_id": [1,1], "target_id": [2,2]}) @@ -82,8 +82,8 @@ def test_merge_same_resource(config, session, uri, auth): + id = INT(Entity.id) ENTITY("Relation"): - MERGE_RELATIONSHIPS(RELATION(MATCH("Entity", id = INT(Relation.source_id)), "RELATED_TO", MATCH("Entity", id = INT(Relation.target_id)))): - MERGE_RELATIONSHIPS(RELATION(MATCH("Entity", id = INT(Relation.source_id)), "RELATED_TO", MATCH("Entity", id = INT(Relation.target_id)))): + MERGE_RELATIONSHIPS(RELATIONSHIP(MATCH("Entity", id = INT(Relation.source_id)), "RELATED_TO", MATCH("Entity", id = INT(Relation.target_id)))): + MERGE_RELATIONSHIPS(RELATIONSHIP(MATCH("Entity", id = INT(Relation.source_id)), "RELATED_TO", MATCH("Entity", id = INT(Relation.target_id)))): """ entities = pd.DataFrame({"id": [1,2]}) relations = pd.DataFrame({"source_id": [1], "target_id": [2]}) diff --git a/tests/integration/test_wrappers.py b/tests/integration/test_wrappers.py index 0f4b03b..70afd10 100644 --- a/tests/integration/test_wrappers.py +++ b/tests/integration/test_wrappers.py @@ -182,8 +182,8 @@ def test_subgraph_pre(input, workers, batch_size, session, uri, auth): node_from = [node for node in get_nodes(session) if "From" in node.labels][0] print(get_nodes(session)) assert node_from["First"] == "Changed" - assert len(get_relations(session)) == 1 - rel = get_relations(session)[0] + assert len(get_relationships(session)) == 1 + rel = get_relationships(session)[0] assert rel["First"] == "Changed" # The resource was changed earlier -> this is still the case assert rel["Second"] == "CHANGED" @@ -203,8 +203,8 @@ def test_subgraph_post(input, workers, batch_size, session, uri, auth): node_copy = [node for node in get_nodes(session) if "From Copy" in node.labels][0] # must exist assert node_from["First"] == "F" assert node_copy["First"] == "F" - assert len(get_relations(session)) == 1 - rel = get_relations(session)[0] + assert len(get_relationships(session)) == 1 + rel = get_relationships(session)[0] assert rel.type == "is copied by" assert "From" in rel.start_node.labels assert "From Copy" in rel.end_node.labels diff --git a/tests/unit/core/resources/dynamic_keys.yaml b/tests/unit/core/resources/dynamic_keys.yaml index 19df241..fa13d7b 100644 --- a/tests/unit/core/resources/dynamic_keys.yaml +++ b/tests/unit/core/resources/dynamic_keys.yaml @@ -9,8 +9,8 @@ ENTITY("entity"): NODE(entity.dynamic_key , WRAPPER(entity.dynamic_key) ): NODE(WRAPPER(entity.dynamic_key), entity.dynamic_key): + attr = entity.dynamic_key - RELATION(node, entity.dynamic_key, node): + RELATIONSHIP(node, entity.dynamic_key, node): + attr = entity.dynamic_key - RELATION(node, WRAPPER(entity.dynamic_key), node): - RELATION(node, WRAPPER(entity.dynamic_key) , node): - RELATION(node, WRAPPER( entity.dynamic_key ) , node): \ No newline at end of file + RELATIONSHIP(node, WRAPPER(entity.dynamic_key), node): + RELATIONSHIP(node, WRAPPER(entity.dynamic_key) , node): + RELATIONSHIP(node, WRAPPER( entity.dynamic_key ) , node): \ No newline at end of file diff --git a/tests/unit/core/resources/matcher_condition.yaml b/tests/unit/core/resources/matcher_condition.yaml index 6bb8b18..5973124 100644 --- a/tests/unit/core/resources/matcher_condition.yaml +++ b/tests/unit/core/resources/matcher_condition.yaml @@ -1,12 +1,12 @@ ENTITY("entity"): NODE("node") identifier: - RELATION(identifier, "static", MATCH("entity", Name= "static")): - RELATION(identifier, "dyn", MATCH("entity", Dyn =entity.dyn)): - RELATION(identifier, "static-dyn", MATCH("entity", Name= "static", Dyn= entity.dyn)): - RELATION(identifier, "static-dyn", MATCH("entity", Name ="static", Dyn =entity.dyn)): - RELATION(identifier, "static-dyn", MATCH("entity", Name = "static", Dyn = entity.dyn)): - RELATION(identifier, "static-dyn", MATCH("entity",Name="static",Dyn=entity.dyn)): - RELATION(identifier, "two-static", MATCH("entity", Name= "static", Name2= "static2")): - RELATION(identifier, "two-dyn", MATCH("entity", Dyn = entity.dyn, Dyn2 = entity.dyn2)): - RELATION(identifier, "two-dyn-two-static", MATCH("entity", Name= "static", Name2= "static2", Dyn = entity.dyn, Dyn2 = entity.dyn2)): + RELATIONSHIP(identifier, "static", MATCH("entity", Name= "static")): + RELATIONSHIP(identifier, "dyn", MATCH("entity", Dyn =entity.dyn)): + RELATIONSHIP(identifier, "static-dyn", MATCH("entity", Name= "static", Dyn= entity.dyn)): + RELATIONSHIP(identifier, "static-dyn", MATCH("entity", Name ="static", Dyn =entity.dyn)): + RELATIONSHIP(identifier, "static-dyn", MATCH("entity", Name = "static", Dyn = entity.dyn)): + RELATIONSHIP(identifier, "static-dyn", MATCH("entity",Name="static",Dyn=entity.dyn)): + RELATIONSHIP(identifier, "two-static", MATCH("entity", Name= "static", Name2= "static2")): + RELATIONSHIP(identifier, "two-dyn", MATCH("entity", Dyn = entity.dyn, Dyn2 = entity.dyn2)): + RELATIONSHIP(identifier, "two-dyn-two-static", MATCH("entity", Name= "static", Name2= "static2", Dyn = entity.dyn, Dyn2 = entity.dyn2)): diff --git a/tests/unit/core/resources/primary_keys.yaml b/tests/unit/core/resources/primary_keys.yaml index 144d954..93016cf 100644 --- a/tests/unit/core/resources/primary_keys.yaml +++ b/tests/unit/core/resources/primary_keys.yaml @@ -9,14 +9,14 @@ ENTITY("entity"): + pk = WRAPPER(WRAPPER(entity.pk)) NODE("pk") d: + pk = WRAPPER(entity.pk) - RELATION(a, "noattr", c): - RELATION(a, "nopk", c): + RELATIONSHIP(a, "noattr", c): + RELATIONSHIP(a, "nopk", c): - attr = "someattr" - RELATION(a, "pk", c): + RELATIONSHIP(a, "pk", c): + pk = entity.pk - attr = "someattr" - RELATION(a, "pk", c): + RELATIONSHIP(a, "pk", c): + pk = WRAPPER(entity.pk) - RELATION(a, "pk", c): + RELATIONSHIP(a, "pk", c): + pk = WRAPPER(WRAPPER(entity.pk)) \ No newline at end of file diff --git a/tests/unit/core/resources/typing.yaml b/tests/unit/core/resources/typing.yaml index 165b6b8..d3dbf7c 100644 --- a/tests/unit/core/resources/typing.yaml +++ b/tests/unit/core/resources/typing.yaml @@ -5,4 +5,4 @@ ENTITY("entity"): - myint = 1 #int - myTrue = True # bool - myFalse = False # bool - RELATION(test, "to", MATCH("node", mystr = "1", myint = 1, myfloat = 1.1, myTrue = True, myFalse = False)): \ No newline at end of file + RELATIONSHIP(test, "to", MATCH("node", mystr = "1", myint = 1, myfloat = 1.1, myTrue = True, myFalse = False)): \ No newline at end of file diff --git a/tests/unit/core/test_converter.py b/tests/unit/core/test_converter.py index 262edcc..4b35a14 100644 --- a/tests/unit/core/test_converter.py +++ b/tests/unit/core/test_converter.py @@ -32,4 +32,12 @@ def test_invalid_auth(): exception_msg = excinfo.value.args[0] - assert "The client is unauthorized due to authentication failure" in exception_msg \ No newline at end of file + assert "The client is unauthorized due to authentication failure" in exception_msg + + +def test_deprecated(): + with pytest.raises(DeprecationWarning) as excinfo: + Converter("RELATION()", None, "bolt://localhost:7687", ("neo4j", "password")) + + exception_msg = excinfo.value.args[0] + assert "The RELATION keyword is deprecated. Please use RELATIONSHIP instead." in exception_msg \ No newline at end of file diff --git a/tests/unit/core/test_schema_compiler.py b/tests/unit/core/test_schema_compiler.py index 1ac272c..577cad4 100644 --- a/tests/unit/core/test_schema_compiler.py +++ b/tests/unit/core/test_schema_compiler.py @@ -24,13 +24,13 @@ def test_precompile_commentremoval(): + test = entity.column ####### - test1 = "static \\" string" ##ka asdflkjasdölfj - test2 = WRAP2(WRAP(entity.col)) - RELATION(MATCH("label", "label2", name="test", id=WRAP(test.idcolumn)), "type", to): + RELATIONSHIP(MATCH("label", "label2", name="test", id=WRAP(test.idcolumn)), "type", to): + test = entity.column # testi 123 vier fünf - test1 = "static \\" string" - test2 = WRAP2(WRAP(entity.col)) # this is another comment ,!'_ ENTITY("second"): - RELATION(MATCH("label", "label2", name="test", id=WRAP(test.idcolumn)), "type", to): + RELATIONSHIP(MATCH("label", "label2", name="test", id=WRAP(test.idcolumn)), "type", to): + test = entity.column # this is a coomment - test1 = "static \\" string" - test2 = WRAP2(WRAP(entity.col)) @@ -42,13 +42,13 @@ def test_precompile_commentremoval(): + test = entity.column - test1 = "static \\" string" - test2 = WRAP2(WRAP(entity.col)) - RELATION(MATCH("label", "label2", name="test", id=WRAP(test.idcolumn)), "type", to): + RELATIONSHIP(MATCH("label", "label2", name="test", id=WRAP(test.idcolumn)), "type", to): + test = entity.column - test1 = "static \\" string" - test2 = WRAP2(WRAP(entity.col)) ENTITY("second"): - RELATION(MATCH("label", "label2", name="test", id=WRAP(test.idcolumn)), "type", to): + RELATIONSHIP(MATCH("label", "label2", name="test", id=WRAP(test.idcolumn)), "type", to): + test = entity.column - test1 = "static \\" string" - test2 = WRAP2(WRAP(entity.col)) @@ -66,18 +66,18 @@ def test_parser_complex(): + test = entity.column - test1 = "static \\" string" - test2 = WRAP2(WRAP(entity.col)) - RELATION(MATCH("label", "label2", name="test", id=WRAP(test.idcolumn)), "type", to): + RELATIONSHIP(MATCH("label", "label2", name="test", id=WRAP(test.idcolumn)), "type", to): + test = entity.column - test1 = "static \\" string" - test2 = WRAP2(WRAP(entity.col)) ENTITY("second"): - RELATION(MATCH("label", "label2", name="test", id=WRAP(test.idcolumn)), "type", to): + RELATIONSHIP(MATCH("label", "label2", name="test", id=WRAP(test.idcolumn)), "type", to): + test = entity.column - test1 = "static \\" string" - test2 = WRAP2(WRAP(entity.col)) ENTITY("third"): """ - ground_truth = [['entity', [[['WRAPPER', [['NodeFactory', [[['AttributeFactory', ['test', 'column', None]], ['AttributeFactory', ['test1', None, 'static \\" string']], ['WRAP2', [['WRAP', [['AttributeFactory', ['test2', 'col', None]]]]]]], [['AttributeFactory', [None, None, 'label']], ['WRAP', [['AttributeFactory', [None, None, 'label2']]]], ['WRAP', [['AttributeFactory', [None, None, 'label3']], ['AttributeFactory', [None, None, 1234]]]], ['AttributeFactory', [None, 'column', None]]], 'test', 'nodeid']], ['AttributeFactory', [None, None, 'someargument']], ['AttributeFactory', [None, None, 123]]]]], [['RelationFactory', [[['AttributeFactory', ['test', 'column', None]], ['AttributeFactory', ['test1', None, 'static \\" string']], ['WRAP2', [['WRAP', [['AttributeFactory', ['test2', 'col', None]]]]]]], ['AttributeFactory', [None, None, 'type']], ['Matcher', [None, ['AttributeFactory', [None, None, 'label']], ['AttributeFactory', [None, None, 'label2']], ['AttributeFactory', ['name', None, 'test']], ['WRAP', [['AttributeFactory', ['id', 'idcolumn', None]]]]]], ['Matcher', ['to']], 'test', None]]]]], ['second', [[], [['RelationFactory', [[['AttributeFactory', ['test', 'column', None]], ['AttributeFactory', ['test1', None, 'static \\" string']], ['WRAP2', [['WRAP', [['AttributeFactory', ['test2', 'col', None]]]]]]], ['AttributeFactory', [None, None, 'type']], ['Matcher', [None, ['AttributeFactory', [None, None, 'label']], ['AttributeFactory', [None, None, 'label2']], ['AttributeFactory', ['name', None, 'test']], ['WRAP', [['AttributeFactory', ['id', 'idcolumn', None]]]]]], ['Matcher', ['to']], 'test', None]]]]], ['third', [[], []]]] + ground_truth = [['entity', [[['WRAPPER', [['NodeFactory', [[['AttributeFactory', ['test', 'column', None]], ['AttributeFactory', ['test1', None, 'static \\" string']], ['WRAP2', [['WRAP', [['AttributeFactory', ['test2', 'col', None]]]]]]], [['AttributeFactory', [None, None, 'label']], ['WRAP', [['AttributeFactory', [None, None, 'label2']]]], ['WRAP', [['AttributeFactory', [None, None, 'label3']], ['AttributeFactory', [None, None, 1234]]]], ['AttributeFactory', [None, 'column', None]]], 'test', 'nodeid']], ['AttributeFactory', [None, None, 'someargument']], ['AttributeFactory', [None, None, 123]]]]], [['RelationshipFactory', [[['AttributeFactory', ['test', 'column', None]], ['AttributeFactory', ['test1', None, 'static \\" string']], ['WRAP2', [['WRAP', [['AttributeFactory', ['test2', 'col', None]]]]]]], ['AttributeFactory', [None, None, 'type']], ['Matcher', [None, ['AttributeFactory', [None, None, 'label']], ['AttributeFactory', [None, None, 'label2']], ['AttributeFactory', ['name', None, 'test']], ['WRAP', [['AttributeFactory', ['id', 'idcolumn', None]]]]]], ['Matcher', ['to']], 'test', None]]]]], ['second', [[], [['RelationshipFactory', [[['AttributeFactory', ['test', 'column', None]], ['AttributeFactory', ['test1', None, 'static \\" string']], ['WRAP2', [['WRAP', [['AttributeFactory', ['test2', 'col', None]]]]]]], ['AttributeFactory', [None, None, 'type']], ['Matcher', [None, ['AttributeFactory', [None, None, 'label']], ['AttributeFactory', [None, None, 'label2']], ['AttributeFactory', ['name', None, 'test']], ['WRAP', [['AttributeFactory', ['id', 'idcolumn', None]]]]]], ['Matcher', ['to']], 'test', None]]]]], ['third', [[], []]]] parser = SchemaConfigParser() assert ground_truth == parser.parse(input_string) @@ -163,8 +163,8 @@ def test_parser_raises_illegal_token(): def WRAPPER(resource): return resource -def get_rel_type(relation_factory): - return relation_factory._type.static_attribute_value +def get_rel_type(relationship_factory): + return relationship_factory._type.static_attribute_value def get_labels(node_factory): return [af.static_attribute_value for af in node_factory._labels] @@ -178,8 +178,8 @@ def get_filepath(name): def test_full_compiler_matcher_conditions(): """Tests if conditions of matcher (dynamic and static) are parsed and compiled correctly""" - relation_supplychain = compile_schema(load_file(get_filepath("matcher_condition")))[0]["entity"][1] # get relation supplychain - for rf in relation_supplychain.factories: + relationship_supplychain = compile_schema(load_file(get_filepath("matcher_condition")))[0]["entity"][1] # get relationship supplychain + for rf in relationship_supplychain.factories: type = get_rel_type(rf) assert type in ["static-dyn", "static", "two-static", "dyn", "two-dyn", "two-dyn-two-static"] # check from matcher @@ -211,11 +211,11 @@ def test_full_compiler_node_primary(): if label in ["pk"]: assert(nf._primary_key == "pk") -def test_full_compiler_relations_primary(): - """Test if primary keys for relations are correct parsed""" - _, relation_supplychain = compile_schema(load_file(get_filepath("primary_keys")))[0]["entity"] +def test_full_compiler_relationships_primary(): + """Test if primary keys for relationships are correct parsed""" + _, relationship_supplychain = compile_schema(load_file(get_filepath("primary_keys")))[0]["entity"] - for rf in relation_supplychain.factories: + for rf in relationship_supplychain.factories: type = get_rel_type(rf) assert(type in ["noattr", "nopk", "pk"]) @@ -241,20 +241,20 @@ def check_types(list_of_attribuets): def test_full_compiler_typing(): """Test if different types for static arguments are correctly parsed""" - node_supplychain, relation_supplychain = compile_schema(load_file(get_filepath("typing")))[0]["entity"] + node_supplychain, relationship_supplychain = compile_schema(load_file(get_filepath("typing")))[0]["entity"] for nf in node_supplychain.factories: attributes = af2str(nf._attributes) check_types(attributes) - for rf in relation_supplychain.factories: + for rf in relationship_supplychain.factories: # check to matcher fm = rf._to_matcher conditions = af2str(fm._conditions) check_types(conditions) def test_full_compiler_dynkeys(): - node_supplychain, relation_supplychain = compile_schema(load_file(get_filepath("dynamic_keys")))[0]["entity"] + node_supplychain, relationship_supplychain = compile_schema(load_file(get_filepath("dynamic_keys")))[0]["entity"] for nf in node_supplychain.factories: for label in nf._labels: @@ -262,15 +262,15 @@ def test_full_compiler_dynkeys(): for attr in nf._attributes: assert(attr._entity_attribute == "dynamic_key") - for rf in relation_supplychain.factories: + for rf in relationship_supplychain.factories: assert(rf._type._entity_attribute == "dynamic_key") for attr in nf._attributes: assert(attr._entity_attribute == "dynamic_key") def test_full_compiler_empty_entity(): - node_supplychain, relation_supplychain = compile_schema(load_file(get_filepath("empty_entity")))[0]["entity"] + node_supplychain, relationship_supplychain = compile_schema(load_file(get_filepath("empty_entity")))[0]["entity"] assert len(node_supplychain.factories) == 0 - assert len(relation_supplychain.factories) == 0 + assert len(relationship_supplychain.factories) == 0 def test_compiler_raises_same_entity_twice(): """Make sure compiler raises exception when defining an entity twice.""" diff --git a/tests/unit/neo4j/test_create_merge.py b/tests/unit/neo4j/test_create_merge.py index a3ee63d..ca48f60 100644 --- a/tests/unit/neo4j/test_create_merge.py +++ b/tests/unit/neo4j/test_create_merge.py @@ -34,7 +34,7 @@ def session(): def get_nodes(session): return session.run("MATCH (c:test) RETURN c").data() -def get_relations(session): +def get_relationships(session): return session.run("MATCH p=()-[d]->() RETURN p, properties(d) AS props").data() @@ -69,7 +69,7 @@ def test_create_relations(session): r1 = Relationship(n1, "to", n2) session.execute_write(r1.__db_create__) - rels = get_relations(session) + rels = get_relationships(session) assert(len(rels) == 1) assert rels[0]["p"][0]["id"] == 1 assert rels[0]["p"][2]["id"] == 2 @@ -82,7 +82,7 @@ def test_create_relations(session): graph = r2 | r3 session.execute_write(graph.__db_create__) - rels = get_relations(session) + rels = get_relationships(session) assert(len(rels) == 3) assert(len([rel for rel in rels if rel["p"][0]["id"] == 1 and rel["p"][2]["id"] == 2 and rel["p"][1] == "to"]) == 1) assert(len([rel for rel in rels if rel["p"][0]["id"] == 1 and rel["p"][2]["id"] == 2 and rel["p"][1] == "another"]) == 1) @@ -95,7 +95,7 @@ def test_create_relations(session): r4 = Relationship(n1, "attribute", n2, attribute=1, another_attribute="test") session.execute_write(r4.__db_create__) - rels = get_relations(session) + rels = get_relationships(session) assert(len(rels) == 1) assert rels[0]["p"][0]["id"] == 1 assert rels[0]["p"][2]["id"] == 2 @@ -143,7 +143,7 @@ def test_merge_relationships(session): r2.set_primary_key("pk") session.execute_write(r2.__db_merge__) - rels = get_relations(session) + rels = get_relationships(session) assert(len(rels) == 1) assert rels[0]["p"][0]["id"] == 1 assert rels[0]["p"][2]["id"] == 2 @@ -172,7 +172,7 @@ def test_merge_relationships(session): session.execute_write(graph.__db_merge__) - rels = get_relations(session) + rels = get_relationships(session) assert(len(rels) == 2) assert(len([rel for rel in rels if rel["p"][0]["id"] == 1 and rel["p"][2]["id"] == 2 and rel["p"][1] == "to"]) == 1) assert(len([rel for rel in rels if rel["p"][0]["id"] == 1 and rel["p"][2]["id"] == 2 and rel["p"][1] == "another"]) == 1) @@ -189,7 +189,7 @@ def test_create_parallel_relations_with_id(session): graph = r1 | r2 | r3 | r4 session.execute_write(graph.__db_create__) # this results in a subgraph with 4 relations (bc of different attribtues) - rels = get_relations(session) + rels = get_relationships(session) assert(len(rels) == 4) assert(len([rel for rel in rels if rel["p"][0]["id"] == 1 and rel["p"][2]["id"] == 2 and rel["p"][1] == "to"]) == 3) assert(len([rel for rel in rels if rel["p"][0]["id"] == 1 and rel["p"][2]["id"] == 2 and rel["p"][1] == "relates to"]) == 1) @@ -206,7 +206,7 @@ def test_create_parallel_relations_no_id(session): graph = r1 | r2 | r3 | r4 session.execute_write(graph.__db_create__) # this results in a subgraph with 4 relations (bc of different attribtues) - rels = get_relations(session) + rels = get_relationships(session) assert(len(rels) == 4) assert(len([rel for rel in rels if rel["p"][0]["id"] == 1 and rel["p"][2]["id"] == 2 and rel["p"][1] == "to"]) == 3) assert(len([rel for rel in rels if rel["p"][0]["id"] == 1 and rel["p"][2]["id"] == 2 and rel["p"][1] == "relates to"]) == 1) @@ -251,7 +251,7 @@ def test_merge_nodes_pk_in_node(session): assert(len([node for node in nodes if node["c"]["id"] == 2]) == 1) assert(len([node for node in nodes if node["c"]["id"] == 1]) == 1) -def test_merge_relations_no_pk(session): +def test_merge_relationships_no_pk(session): n1 = Node("test", id=1) n2 = Node("test", id=2) r1 = Relationship(n1, "to", n2, id=1) @@ -261,7 +261,7 @@ def test_merge_relations_no_pk(session): with pytest.raises(ValueError): # without primary keys session.execute_write(r2.__db_merge__) -def test_merge_relations_pk_arg(session): +def test_merge_relationships_pk_arg(session): n1 = Node("test", id=1) n2 = Node("test", id=2) r1 = Relationship(n1, "to", n2, id=1) @@ -269,11 +269,11 @@ def test_merge_relations_pk_arg(session): session.execute_write(r1.__db_create__) r2 = Relationship(n1, "to", n2, id=1) session.execute_write(r2.__db_merge__, primary_key="id") - rels = get_relations(session=session) + rels = get_relationships(session=session) assert(len(rels) == 1) assert(rels[0]["p"][0]["id"] == 1) -def test_merge_relations_pk_in_rel(session): +def test_merge_relationships_pk_in_rel(session): """Additional test for merge_relations""" n1 = Node("test", id=1) n2 = Node("test", id=2) @@ -287,7 +287,7 @@ def test_merge_relations_pk_in_rel(session): r4.set_primary_key("id") graph = r3 | r4 session.execute_write(graph.__db_merge__) - rels = get_relations(session=session) + rels = get_relationships(session=session) assert(len(rels) == 2) assert(len([rel for rel in rels if rel["p"][0]["id"] == 1 and rel["p"][2]["id"] == 2 and rel["p"][1] == "to" and rel["props"]["id"] == 1]) == 1) assert(len([rel for rel in rels if rel["p"][0]["id"] == 1 and rel["p"][2]["id"] == 2 and rel["p"][1] == "to" and rel["props"]["id"] == 2]) == 1) @@ -300,6 +300,6 @@ def test_MERGE_RELATIONSHIPS(session): r2 = MERGE_RELATIONSHIPS(Relationship(n1, "to", n2)) session.execute_write(Subgraph(relationships=[r2]).__db_merge__) - rels = get_relations(session=session) + rels = get_relationships(session=session) assert(len(rels) == 1) assert(len([r for r in rels if r["p"][0]["id"] == 1 and r["p"][2]["id"] == 2 and r["p"][1] == "to"]) == 1)