From 85e07ac67d72565a0100d520088d0caa94973265 Mon Sep 17 00:00:00 2001
From: Dejan Lozanovic <dejan.lozanovic@digital.trade.gov.uk>
Date: Wed, 5 Feb 2025 11:26:20 +0000
Subject: [PATCH 1/5] Pre pr adbc ingestion

---
 .gitignore                                 |   2 +-
 src/matchbox/server/postgresql/utils/db.py | 108 +++++++++++++++++++++
 2 files changed, 109 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index 16df8831..b04cc875 100644
--- a/.gitignore
+++ b/.gitignore
@@ -190,4 +190,4 @@ cython_debug/
 #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
-#.idea/
\ No newline at end of file
+.idea/
\ No newline at end of file
diff --git a/src/matchbox/server/postgresql/utils/db.py b/src/matchbox/server/postgresql/utils/db.py
index e07fffb8..85351382 100644
--- a/src/matchbox/server/postgresql/utils/db.py
+++ b/src/matchbox/server/postgresql/utils/db.py
@@ -4,6 +4,11 @@
 import pstats
 from itertools import islice
 from typing import Any, Callable, Iterable
+from datetime import datetime
+
+import pyarrow as pa
+import adbc_driver_postgresql.dbapi
+from adbc_driver_manager.dbapi import Connection as ADBCConnection
 
 from pg_bulk_ingest import Delete, Upsert, ingest
 from sqlalchemy import Engine, Index, MetaData, Table, func
@@ -162,3 +167,106 @@ def batch_ingest(
         upsert=Upsert.IF_PRIMARY_KEY,
         delete=Delete.OFF,
     )
+
+POSTGRESQL_URI = "postgresql://postgres:postgres@localhost:5432/postgres"
+def adbc_ingest_data(clusters:pa.Table, contains:pa.Table, probabilities:pa.Table) -> bool:
+    """ Ingest data from PostgreSQL using pyarrow adbc ingest.
+    Args: clusters: pa.Table, contains: pa.Table, probabilities: pa.Table
+    """
+    suffix = datetime.now().strftime("%Y%m%d%H%M%S")
+    if _adbc_insert_data(clusters, contains, probabilities, suffix):
+        _create_adbc_table_constraints(suffix)
+
+    else:
+        return False
+
+def _create_adbc_table_constraints(db_schema:str, sufix:str) -> bool:
+    """ Creating primary and secondary keys indexes and constraints.
+    Args: db_schema: str, the name of the schema
+    """
+    # Cluster
+    _run_query(f"ALTER TABLE {db_schema}.clusters_{sufix} ADD PRIMARY KEY (cluster_id)")
+    _run_query(f"""ALTER TABLE {db_schema}.probabilities_{sufix} ADD PRIMARY KEY (resolution, "cluster")""")
+    _run_query(f"CREATE UNIQUE INDEX cluster_hash_index_{sufix} ON {db_schema}.clusters_{sufix} USING btree (cluster_hash)")
+   # _run_query(f"CREATE UNIQUE INDEX clusters_adbc_clusters_is_{sufix} ON {db_schema}.clusters_{sufix} USING btree (cluster_id)")
+    _run_query(f"CREATE INDEX ix_clusters_id_gin_{sufix} ON {db_schema}.clusters_{sufix} USING gin (source_pk)")
+    _run_query(f"CREATE INDEX ix_mb_clusters_source_pk_{sufix} ON {db_schema}.clusters_{sufix} USING btree (source_pk)")
+
+    # Contains
+    _run_query(f"CREATE UNIQUE INDEX ix_contains_child_parent_{sufix} ON {db_schema}.contains_{sufix} USING btree (child, parent)")
+    _run_query(f"CREATE UNIQUE INDEX ix_contains_parent_child_{sufix} ON {db_schema}.contains_{sufix} USING btree (parent, child)")
+
+    # Foreign keys
+    _run_query(f"ALTER TABLE {db_schema}.clusters_{sufix} ADD CONSTRAINT clusters_dataset_fkey FOREIGN KEY (dataset) REFERENCES {db_schema}.sources(resolution_id)")
+    _run_query(f"""ALTER TABLE {db_schema}."contains_{sufix}" ADD CONSTRAINT contains_child_fkey FOREIGN KEY (child) REFERENCES {db_schema}.clusters_{sufix}(cluster_id) ON DELETE CASCADE""")
+    _run_query(f"""ALTER TABLE {db_schema}."contains_{sufix}" ADD CONSTRAINT contains_parent_fkey FOREIGN KEY (parent) REFERENCES {db_schema}.clusters_{sufix}(cluster_id) ON DELETE CASCADE""")
+    _run_query(f"""ALTER TABLE {db_schema}.probabilities_{sufix} ADD CONSTRAINT probabilities_cluster_fkey FOREIGN KEY ("cluster") REFERENCES {db_schema}.clusters_{sufix}(cluster_id) ON DELETE CASCADE""")
+    _run_query(f"ALTER TABLE {db_schema}.probabilities_{sufix} ADD CONSTRAINT probabilities_resolution_fkey FOREIGN KEY (resolution) REFERENCES {db_schema}.resolutions(resolution_id) ON DELETE CASCADE")
+
+    _run_queries([
+        f"""DROP TABLE IF EXISTS {db_schema}.clusters""",
+        f"""DROP TABLE IF EXISTS {db_schema}.contains""",
+        f"""DROP TABLE IF EXISTS {db_schema}.probabilities""",
+
+        f"""ALTER TABLE {db_schema}.clusters_{sufix} RENAME TO clusters""",
+        f"""ALTER TABLE {db_schema}.contains_{sufix} RENAME TO contains""",
+        f"""ALTER TABLE {db_schema}.probabilities_{sufix} RENAME TO probabilities"""
+    ])
+
+def _adbc_insert_data(clusters, contains, probabilities, suffix) -> bool:
+    # TODO: try except proper exception type from adbc
+    conn = adbc_driver_postgresql.dbapi.connect(POSTGRESQL_URI)
+    _run_query(f"CREATE TABLE clusters_{suffix} AS SELECT * FROM clusters")
+    _save_to_postgresql(
+        table=clusters,
+        conn=conn,
+        schema="",
+        table_name=f"clusters_{suffix}",
+    )
+    _run_query(f"CREATE TABLE contains_{suffix} AS SELECT * FROM contains")
+    _save_to_postgresql(
+        table=contains,
+        conn=conn,
+        schema="",
+        table_name=f"contains_{suffix}",
+    )
+    _run_query(f"CREATE TABLE probabilities_{suffix} AS SELECT * FROM probabilities")
+    _save_to_postgresql(
+        table=probabilities,
+        conn=conn,
+        schema="",
+        table_name=f"probabilities_{suffix}",
+    )
+    conn.commit()
+    return True
+
+def _run_query(query: str) -> None:
+    conn = get_engine()
+    conn.execute(text(query))
+    conn.commit()
+
+
+def _run_queries(queries: list[str]) -> None:
+    conn = get_engine()
+    conn.begin()
+    for query in queries:
+        conn.execute(text(query))
+    conn.commit()
+
+def _save_to_postgresql(
+        table: pa.Table, conn: ADBCConnection, schema: str, table_name: str
+):
+    """
+    Saves a PyArrow Table to PostgreSQL using ADBC.
+    """
+    with conn.cursor() as cursor:
+        # Convert PyArrow Table to Arrow RecordBatchStream for efficient transfer
+        batch_reader = pa.RecordBatchReader.from_batches(
+            table.schema, table.to_batches()
+        )
+        cursor.adbc_ingest(
+            table_name=table_name,
+            data=batch_reader,
+            mode="append",
+            db_schema_name=schema,
+        )

From 6748dacbd112615e661851b8fa4e447b8f1e1389 Mon Sep 17 00:00:00 2001
From: Dejan Lozanovic <dejan.lozanovic@digital.trade.gov.uk>
Date: Thu, 6 Feb 2025 09:06:36 +0000
Subject: [PATCH 2/5] Pre pr adbc ingestion

---
 pyproject.toml                                |   1 +
 src/matchbox/server/postgresql/utils/db.py    | 126 ++++++++++--------
 .../server/postgresql/utils/insert.py         |  67 +---------
 uv.lock                                       |  96 ++++++++++---
 4 files changed, 150 insertions(+), 140 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index b6b0a540..b8559262 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -22,6 +22,7 @@ dependencies = [
     "splink>=4.0.5,<4.1.0",
     "sqlalchemy>=2.0.35",
     "rich>=13.9.4",
+    "adbc-driver-postgresql>=1.4.0",
 ]
 
 [project.optional-dependencies]
diff --git a/src/matchbox/server/postgresql/utils/db.py b/src/matchbox/server/postgresql/utils/db.py
index 85351382..5bef6999 100644
--- a/src/matchbox/server/postgresql/utils/db.py
+++ b/src/matchbox/server/postgresql/utils/db.py
@@ -1,6 +1,7 @@
 import contextlib
 import cProfile
 import io
+import os
 import pstats
 from itertools import islice
 from typing import Any, Callable, Iterable
@@ -9,11 +10,13 @@
 import pyarrow as pa
 import adbc_driver_postgresql.dbapi
 from adbc_driver_manager.dbapi import Connection as ADBCConnection
+from adbc_driver_manager import DatabaseError as ADBCDatabaseError
 
 from pg_bulk_ingest import Delete, Upsert, ingest
-from sqlalchemy import Engine, Index, MetaData, Table, func
+from sqlalchemy import Engine, Index, MetaData, Table, func, text
 from sqlalchemy.engine.base import Connection
 from sqlalchemy.orm import DeclarativeMeta, Session
+from sqlalchemy.exc import DatabaseError as AlchemyDatabaseError
 
 from matchbox.common.graph import (
     ResolutionEdge,
@@ -168,40 +171,51 @@ def batch_ingest(
         delete=Delete.OFF,
     )
 
-POSTGRESQL_URI = "postgresql://postgres:postgres@localhost:5432/postgres"
-def adbc_ingest_data(clusters:pa.Table, contains:pa.Table, probabilities:pa.Table) -> bool:
+
+MB__POSTGRES__PASSWORD = os.environ["MB__POSTGRES__PASSWORD"]
+MB__POSTGRES__PORT = os.environ["MB__POSTGRES__PORT"]
+MB__POSTGRES__USER = os.environ["MB__POSTGRES__USER"]
+MB__POSTGRES__DATABASE = os.environ["MB__POSTGRES__DATABASE"]
+MB__POSTGRES__HOST = os.environ["MB__POSTGRES__HOST"]
+MB__POSTGRES__SCHEMA = os.environ["MB__POSTGRES__DB_SCHEMA"]
+
+
+POSTGRESQL_URI = f"postgresql://{MB__POSTGRES__USER}:{MB__POSTGRES__PASSWORD}@{MB__POSTGRES__HOST}:{MB__POSTGRES__PORT}/{MB__POSTGRES__DATABASE}"
+
+def adbc_ingest_data(clusters:pa.Table, contains:pa.Table, probabilities:pa.Table, engine:Engine, resolution_id:int) -> bool:
     """ Ingest data from PostgreSQL using pyarrow adbc ingest.
-    Args: clusters: pa.Table, contains: pa.Table, probabilities: pa.Table
+    Args: clusters: pa.Table, contains: pa.Table, probabilities: pa.Table, engine: Engine
     """
-    suffix = datetime.now().strftime("%Y%m%d%H%M%S")
-    if _adbc_insert_data(clusters, contains, probabilities, suffix):
-        _create_adbc_table_constraints(suffix)
 
-    else:
-        return False
+    with engine.connect() as alchemy_conn:
+        suffix = datetime.now().strftime("%Y%m%d%H%M%S")
+        if _adbc_insert_data(clusters, contains, probabilities, suffix, alchemy_conn, resolution_id):
+            return _create_adbc_table_constraints(suffix, alchemy_conn)
+        else:
+            return False
 
-def _create_adbc_table_constraints(db_schema:str, sufix:str) -> bool:
+def _create_adbc_table_constraints(db_schema:str, sufix:str, conn:Connection) -> bool:
     """ Creating primary and secondary keys indexes and constraints.
     Args: db_schema: str, the name of the schema
     """
     # Cluster
-    _run_query(f"ALTER TABLE {db_schema}.clusters_{sufix} ADD PRIMARY KEY (cluster_id)")
-    _run_query(f"""ALTER TABLE {db_schema}.probabilities_{sufix} ADD PRIMARY KEY (resolution, "cluster")""")
-    _run_query(f"CREATE UNIQUE INDEX cluster_hash_index_{sufix} ON {db_schema}.clusters_{sufix} USING btree (cluster_hash)")
-   # _run_query(f"CREATE UNIQUE INDEX clusters_adbc_clusters_is_{sufix} ON {db_schema}.clusters_{sufix} USING btree (cluster_id)")
-    _run_query(f"CREATE INDEX ix_clusters_id_gin_{sufix} ON {db_schema}.clusters_{sufix} USING gin (source_pk)")
-    _run_query(f"CREATE INDEX ix_mb_clusters_source_pk_{sufix} ON {db_schema}.clusters_{sufix} USING btree (source_pk)")
+    _run_query(f"ALTER TABLE {db_schema}.clusters_{sufix} ADD PRIMARY KEY (cluster_id)", conn)
+    _run_query(f"""ALTER TABLE {db_schema}.probabilities_{sufix} ADD PRIMARY KEY (resolution, "cluster")""", conn)
+    _run_query(f"CREATE UNIQUE INDEX cluster_hash_index_{sufix} ON {db_schema}.clusters_{sufix} USING btree (cluster_hash)", conn)
+   # _run_query(f"CREATE UNIQUE INDEX clusters_adbc_clusters_is_{sufix} ON {db_schema}.clusters_{sufix} USING btree (cluster_id)", conn)
+    _run_query(f"CREATE INDEX ix_clusters_id_gin_{sufix} ON {db_schema}.clusters_{sufix} USING gin (source_pk)", conn)
+    _run_query(f"CREATE INDEX ix_mb_clusters_source_pk_{sufix} ON {db_schema}.clusters_{sufix} USING btree (source_pk)", conn)
 
     # Contains
-    _run_query(f"CREATE UNIQUE INDEX ix_contains_child_parent_{sufix} ON {db_schema}.contains_{sufix} USING btree (child, parent)")
-    _run_query(f"CREATE UNIQUE INDEX ix_contains_parent_child_{sufix} ON {db_schema}.contains_{sufix} USING btree (parent, child)")
+    _run_query(f"CREATE UNIQUE INDEX ix_contains_child_parent_{sufix} ON {db_schema}.contains_{sufix} USING btree (child, parent)", conn)
+    _run_query(f"CREATE UNIQUE INDEX ix_contains_parent_child_{sufix} ON {db_schema}.contains_{sufix} USING btree (parent, child)", conn)
 
     # Foreign keys
-    _run_query(f"ALTER TABLE {db_schema}.clusters_{sufix} ADD CONSTRAINT clusters_dataset_fkey FOREIGN KEY (dataset) REFERENCES {db_schema}.sources(resolution_id)")
-    _run_query(f"""ALTER TABLE {db_schema}."contains_{sufix}" ADD CONSTRAINT contains_child_fkey FOREIGN KEY (child) REFERENCES {db_schema}.clusters_{sufix}(cluster_id) ON DELETE CASCADE""")
-    _run_query(f"""ALTER TABLE {db_schema}."contains_{sufix}" ADD CONSTRAINT contains_parent_fkey FOREIGN KEY (parent) REFERENCES {db_schema}.clusters_{sufix}(cluster_id) ON DELETE CASCADE""")
-    _run_query(f"""ALTER TABLE {db_schema}.probabilities_{sufix} ADD CONSTRAINT probabilities_cluster_fkey FOREIGN KEY ("cluster") REFERENCES {db_schema}.clusters_{sufix}(cluster_id) ON DELETE CASCADE""")
-    _run_query(f"ALTER TABLE {db_schema}.probabilities_{sufix} ADD CONSTRAINT probabilities_resolution_fkey FOREIGN KEY (resolution) REFERENCES {db_schema}.resolutions(resolution_id) ON DELETE CASCADE")
+    _run_query(f"ALTER TABLE {db_schema}.clusters_{sufix} ADD CONSTRAINT clusters_dataset_fkey FOREIGN KEY (dataset) REFERENCES {db_schema}.sources(resolution_id)", conn)
+    _run_query(f"""ALTER TABLE {db_schema}."contains_{sufix}" ADD CONSTRAINT contains_child_fkey FOREIGN KEY (child) REFERENCES {db_schema}.clusters_{sufix}(cluster_id) ON DELETE CASCADE""", conn)
+    _run_query(f"""ALTER TABLE {db_schema}."contains_{sufix}" ADD CONSTRAINT contains_parent_fkey FOREIGN KEY (parent) REFERENCES {db_schema}.clusters_{sufix}(cluster_id) ON DELETE CASCADE""", conn)
+    _run_query(f"""ALTER TABLE {db_schema}.probabilities_{sufix} ADD CONSTRAINT probabilities_cluster_fkey FOREIGN KEY ("cluster") REFERENCES {db_schema}.clusters_{sufix}(cluster_id) ON DELETE CASCADE""", conn)
+    _run_query(f"ALTER TABLE {db_schema}.probabilities_{sufix} ADD CONSTRAINT probabilities_resolution_fkey FOREIGN KEY (resolution) REFERENCES {db_schema}.resolutions(resolution_id) ON DELETE CASCADE", conn)
 
     _run_queries([
         f"""DROP TABLE IF EXISTS {db_schema}.clusters""",
@@ -211,43 +225,47 @@ def _create_adbc_table_constraints(db_schema:str, sufix:str) -> bool:
         f"""ALTER TABLE {db_schema}.clusters_{sufix} RENAME TO clusters""",
         f"""ALTER TABLE {db_schema}.contains_{sufix} RENAME TO contains""",
         f"""ALTER TABLE {db_schema}.probabilities_{sufix} RENAME TO probabilities"""
-    ])
-
-def _adbc_insert_data(clusters, contains, probabilities, suffix) -> bool:
-    # TODO: try except proper exception type from adbc
-    conn = adbc_driver_postgresql.dbapi.connect(POSTGRESQL_URI)
-    _run_query(f"CREATE TABLE clusters_{suffix} AS SELECT * FROM clusters")
-    _save_to_postgresql(
-        table=clusters,
-        conn=conn,
-        schema="",
-        table_name=f"clusters_{suffix}",
-    )
-    _run_query(f"CREATE TABLE contains_{suffix} AS SELECT * FROM contains")
-    _save_to_postgresql(
-        table=contains,
-        conn=conn,
-        schema="",
-        table_name=f"contains_{suffix}",
-    )
-    _run_query(f"CREATE TABLE probabilities_{suffix} AS SELECT * FROM probabilities")
-    _save_to_postgresql(
-        table=probabilities,
-        conn=conn,
-        schema="",
-        table_name=f"probabilities_{suffix}",
-    )
-    conn.commit()
+    ], conn)
     return True
 
-def _run_query(query: str) -> None:
-    conn = get_engine()
+def _adbc_insert_data(clusters:pa.Table, contains:pa.Table, probabilities:pa.Table, suffix:str, alchemy_conn:Connection, resolution_id:int) -> bool:
+    # TODO: try except proper exception type from adbc
+    with adbc_driver_postgresql.dbapi.connect(POSTGRESQL_URI) as conn:
+        try:
+            _run_query(f"CREATE TABLE clusters_{suffix} AS SELECT * FROM clusters", alchemy_conn)
+            _save_to_postgresql(
+                table=clusters,
+                conn=conn,
+                schema=MB__POSTGRES__SCHEMA,
+                table_name=f"clusters_{suffix}",
+            )
+            _run_query(f"CREATE TABLE contains_{suffix} AS SELECT * FROM contains", alchemy_conn)
+            _save_to_postgresql(
+                table=contains,
+                conn=conn,
+                schema=MB__POSTGRES__SCHEMA,
+                table_name=f"contains_{suffix}",
+            )
+            _run_query(f"CREATE TABLE probabilities_{suffix} AS SELECT * FROM probabilities WHERE resolution != {resolution_id}", alchemy_conn)
+            _save_to_postgresql(
+                table=probabilities,
+                conn=conn,
+                schema=MB__POSTGRES__SCHEMA,
+                table_name=f"probabilities_{suffix}",
+            )
+            conn.commit()
+            return True
+        except ADBCConnection as e:
+            return False
+        except AlchemyDatabaseError as e:
+            return False
+
+def _run_query(query: str,conn:Connection) -> None:
     conn.execute(text(query))
     conn.commit()
 
 
-def _run_queries(queries: list[str]) -> None:
-    conn = get_engine()
+def _run_queries(queries: list[str], conn:Connection) -> None:
     conn.begin()
     for query in queries:
         conn.execute(text(query))
diff --git a/src/matchbox/server/postgresql/utils/insert.py b/src/matchbox/server/postgresql/utils/insert.py
index 773f5be7..5b9c800e 100644
--- a/src/matchbox/server/postgresql/utils/insert.py
+++ b/src/matchbox/server/postgresql/utils/insert.py
@@ -25,7 +25,7 @@
     Resolutions,
     Sources,
 )
-from matchbox.server.postgresql.utils.db import batch_ingest, hash_to_hex_decode
+from matchbox.server.postgresql.utils.db import batch_ingest, hash_to_hex_decode , adbc_ingest_data
 
 logic_logger = logging.getLogger("mb_logic")
 
@@ -493,69 +493,6 @@ def insert_results(
         resolution=resolution, results=results, engine=engine
     )
 
-    with Session(engine) as session:
-        try:
-            # Clear existing probabilities for this resolution
-            session.execute(
-                delete(Probabilities).where(
-                    Probabilities.resolution == resolution.resolution_id
-                )
-            )
-
-            session.commit()
-            logic_logger.info(f"[{resolution.name}] Removed old probabilities")
-
-        except SQLAlchemyError as e:
-            session.rollback()
-            logic_logger.error(
-                f"[{resolution.name}] Failed to clear old probabilities: {str(e)}"
-            )
-            raise
-
-    with engine.connect() as conn:
-        try:
-            logic_logger.info(
-                f"[{resolution.name}] Inserting {clusters.shape[0]:,} results objects"
-            )
-
-            batch_ingest(
-                records=[tuple(c.values()) for c in clusters.to_pylist()],
-                table=Clusters,
-                conn=conn,
-                batch_size=batch_size,
-            )
-
-            logic_logger.info(
-                f"[{resolution.name}] Successfully inserted {clusters.shape[0]} "
-                "objects into Clusters table"
-            )
-
-            batch_ingest(
-                records=[tuple(c.values()) for c in contains.to_pylist()],
-                table=Contains,
-                conn=conn,
-                batch_size=batch_size,
-            )
-
-            logic_logger.info(
-                f"[{resolution.name}] Successfully inserted {contains.shape[0]} "
-                "objects into Contains table"
-            )
-
-            batch_ingest(
-                records=[tuple(c.values()) for c in probabilities.to_pylist()],
-                table=Probabilities,
-                conn=conn,
-                batch_size=batch_size,
-            )
-
-            logic_logger.info(
-                f"[{resolution.name}] Successfully inserted "
-                f"{probabilities.shape[0]} objects into Probabilities table"
-            )
-
-        except SQLAlchemyError as e:
-            logic_logger.error(f"[{resolution.name}] Failed to insert data: {str(e)}")
-            raise
+    adbc_ingest_data(clusters=clusters, contains=contains, probabilities=probabilities, engine=engine, resolution_id=resolution.resolution_id)
 
     logic_logger.info(f"[{resolution.name}] Insert operation complete!")
diff --git a/uv.lock b/uv.lock
index 2a33a846..6e30c485 100644
--- a/uv.lock
+++ b/uv.lock
@@ -7,6 +7,49 @@ resolution-markers = [
     "python_full_version < '3.12' and platform_python_implementation == 'PyPy'",
 ]
 
+[[package]]
+name = "adbc-driver-manager"
+version = "1.4.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/20/2c/580f0e2024ce0c6330c5e990eaeffd2b206355b5929d19889b3eac008064/adbc_driver_manager-1.4.0.tar.gz", hash = "sha256:fb8437f3e3aad9aa119ccbdfe05b83418ae552ca7e1e3f6d46d53e1f316610b8", size = 107712 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b9/07/72cfaec3fb1e5090e4495bb310e4ae62d4262ea2330ee7f7395909b3505d/adbc_driver_manager-1.4.0-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:7bd274386c3decd52212d225cc46f934ce3503a3f9e0e823e4f8a40162d89b2b", size = 381818 },
+    { url = "https://files.pythonhosted.org/packages/20/80/efb076dd9148f903a4e96f969dd7a0cdafeeabba8e14c1db9bf21452ce31/adbc_driver_manager-1.4.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:90d1055f2557d703fa2e7073d7b26cc6394159ff4b1422d2dae05d08b774f687", size = 368704 },
+    { url = "https://files.pythonhosted.org/packages/0f/29/ed9525e46d474230a0fb310ab077a681b49c45c6e4e5a305e0c704702256/adbc_driver_manager-1.4.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:db899b18caeb9e26b99c0baf17e239f35274d6c3dc3aa0e3afc4d278ef3c6747", size = 2150204 },
+    { url = "https://files.pythonhosted.org/packages/a5/32/c00c7b5dd4c187003f0f6799090d17db42effc3396b5a6971228e0d1cbb4/adbc_driver_manager-1.4.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4c1a7079c7f254c7010afe08872c4bfab6e716a507fc343c32204e8ce6bfd898", size = 2164967 },
+    { url = "https://files.pythonhosted.org/packages/37/30/3b62800f5f7aad8c51e2e71fc8e9a87dadb007588289fddab09d180ea1ae/adbc_driver_manager-1.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:27a8943ba838a4038b4ec6466e11eafe336ca5d382adb7c5d2cc9c00dd44bd10", size = 538168 },
+    { url = "https://files.pythonhosted.org/packages/59/30/e76d5bdb044b4126b4deed32ff5cf02b62d9e7eba4eec5a06c6005a3952f/adbc_driver_manager-1.4.0-cp312-cp312-macosx_10_15_x86_64.whl", hash = "sha256:3770180aa2cccc6c18ffd189d623ebbf35dccad75895f0fd4275b73d39297849", size = 381431 },
+    { url = "https://files.pythonhosted.org/packages/c8/25/a96b04c0162253ff9e014b774789cc76e84e536f9ef874c9d2af240bfa42/adbc_driver_manager-1.4.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6ba26a0510d1e7596c827d1ef58185d32fb4f97837e463608801ec3b4850ce74", size = 365687 },
+    { url = "https://files.pythonhosted.org/packages/1e/8d/caae84fceed7e2cff414ce9a17f62eee0ceca98058bb8b1fbde1a1941904/adbc_driver_manager-1.4.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:50eedc6b173a80a0a8f46ef6352761a2c53063ac94327b1f45603db33176010d", size = 2129076 },
+    { url = "https://files.pythonhosted.org/packages/fb/8b/7d4ce1622b2930205261c5b7dae7ded7f3328033fdfe02f213f2bb41720f/adbc_driver_manager-1.4.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:168a07ce237902dd9aa0e079d7e8c131ad6f61fb20f3b43020597b2a3c29d3ea", size = 2161148 },
+    { url = "https://files.pythonhosted.org/packages/59/ab/991d5dc4d8b65adab2990f949524ce5ca504aa84727bc501fa6ba919288f/adbc_driver_manager-1.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:1c165069fe3bcffc25e593cb933958b4a0563aff317cb6a37fc390c8dd0ed26f", size = 535674 },
+    { url = "https://files.pythonhosted.org/packages/80/97/e33d558177e8dcbdaec2191bc37970e5068e46095a21dc157aaa65530e58/adbc_driver_manager-1.4.0-cp313-cp313-macosx_10_15_x86_64.whl", hash = "sha256:acfe7ca9c227a0835abc52289601337bde7a96c20befc443c9ba4eb78d673214", size = 379612 },
+    { url = "https://files.pythonhosted.org/packages/1e/4f/11aacce2421902b9bed07970d5f4565c5948f58788392962ffeceadbac21/adbc_driver_manager-1.4.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:b5fbf822e90fc6df5321067a25564cae11b404d4c9ba56fe4471c73c5b54e38f", size = 363412 },
+    { url = "https://files.pythonhosted.org/packages/0d/bd/68672ab658dbcb154500cb668e2fe630861b3ac3348c5cdb6bf501ae10ab/adbc_driver_manager-1.4.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0e1d255116003514088990cc5af5995cc7f5d2ebea96b00e5b2a1f4d922d7137", size = 2123358 },
+    { url = "https://files.pythonhosted.org/packages/f7/4a/5a966873541d580bf27711355ed9fd40cd46bea702eb092c6825306957a6/adbc_driver_manager-1.4.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:682364a252de1029fa12a2f7526d4cb9e8e03e009d8d0f64bc3530a7539a74f6", size = 2156251 },
+    { url = "https://files.pythonhosted.org/packages/ca/4b/19d32beccfcb647451db25cc2e40dbeb6b763bf274cdc85d22e68511baa4/adbc_driver_manager-1.4.0-cp313-cp313-win_amd64.whl", hash = "sha256:f01441fb8cc7859037ae0c39692bffa26f2fa0da68664589ced5b3794540f402", size = 533846 },
+]
+
+[[package]]
+name = "adbc-driver-postgresql"
+version = "1.4.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "adbc-driver-manager" },
+    { name = "importlib-resources" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/2a/8b/5d088a3c4d739a3a62df081a819117df924318b5044ab3214d72723591ad/adbc_driver_postgresql-1.4.0.tar.gz", hash = "sha256:0bc9c34911b8d1a2e333ee5ef3d284a6517a8f20d5509b2c183cb4cf8337efc7", size = 18402 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/fa/69/165a7155a9514aa472beae1e16fc93c51a363c7deed0db7d2d470c28788d/adbc_driver_postgresql-1.4.0-py3-none-macosx_10_15_x86_64.whl", hash = "sha256:ca62087f1496192c3131a30fc3e4d2710fa3b31b1982bf8374b6a05fdc446513", size = 2683068 },
+    { url = "https://files.pythonhosted.org/packages/7c/37/d991e047b931a3f005156a43c8be92ffbfd234589d5969b38adee03faa80/adbc_driver_postgresql-1.4.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:c75827a5cfdc8ea429c21292ac6bd598a64a42af53797c7ace3ad2b3044ddc71", size = 2996305 },
+    { url = "https://files.pythonhosted.org/packages/2d/5e/dc25c82cf2055e500b2f8e47093cf6037fd636866e0e6c54ccadccda1e6a/adbc_driver_postgresql-1.4.0-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6ce4e30ced25802acd6d0baad70e7b9c7394869043c1349206218d51d27613c1", size = 3190063 },
+    { url = "https://files.pythonhosted.org/packages/42/bd/910e7b8e0d6e91f94348fe23a2f8d3dfc4e86a0b933b26d41a4298f14772/adbc_driver_postgresql-1.4.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d04b2ea0353bacca814fb15c5da0d407888d27e86b4f3f58d66ebe71a2e8d3da", size = 2846350 },
+    { url = "https://files.pythonhosted.org/packages/7d/fc/19fb644ab228040a01971e21bb42c98c48a95d3701e68dd38fa1bfb0bc61/adbc_driver_postgresql-1.4.0-py3-none-win_amd64.whl", hash = "sha256:10ae2584e47928e15da8ca541bbf594de13d6c1afc8c1adad6af3f832cf0a5fc", size = 2656378 },
+]
+
 [[package]]
 name = "altair"
 version = "5.5.0"
@@ -822,6 +865,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/b0/17/621d3a59430851a327421fdbec9ec8494d7fadaffc6dfdd42d4a95accbf2/igraph-0.11.8-cp39-abi3-win_amd64.whl", hash = "sha256:248831a6162130f16909c1f776cc246b48f692339ea4baca489cad4ed8dc0e13", size = 1976778 },
 ]
 
+[[package]]
+name = "importlib-resources"
+version = "6.5.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/cf/8c/f834fbf984f691b4f7ff60f50b514cc3de5cc08abfc3295564dd89c5e2e7/importlib_resources-6.5.2.tar.gz", hash = "sha256:185f87adef5bcc288449d98fb4fba07cea78bc036455dd44c5fc4a2fe78fed2c", size = 44693 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a4/ed/1f1afb2e9e7f38a545d628f864d562a5ae64fe6f7a10e28ffb9b185b4e89/importlib_resources-6.5.2-py3-none-any.whl", hash = "sha256:789cfdc3ed28c78b67a06acb8126751ced69a3d5f79c095a98298cd8a760ccec", size = 37461 },
+]
+
 [[package]]
 name = "iniconfig"
 version = "2.0.0"
@@ -1131,6 +1183,7 @@ name = "matchbox"
 version = "0.2.1"
 source = { editable = "." }
 dependencies = [
+    { name = "adbc-driver-postgresql" },
     { name = "click" },
     { name = "connectorx" },
     { name = "duckdb" },
@@ -1184,6 +1237,7 @@ typing = [
 
 [package.metadata]
 requires-dist = [
+    { name = "adbc-driver-postgresql", specifier = ">=1.4.0" },
     { name = "boto3", marker = "extra == 'server'", specifier = ">=1.35.99" },
     { name = "click", specifier = ">=8.1.7" },
     { name = "connectorx", specifier = ">=0.3.3" },
@@ -2480,27 +2534,27 @@ wheels = [
 
 [[package]]
 name = "ruff"
-version = "0.9.3"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/1e/7f/60fda2eec81f23f8aa7cbbfdf6ec2ca11eb11c273827933fb2541c2ce9d8/ruff-0.9.3.tar.gz", hash = "sha256:8293f89985a090ebc3ed1064df31f3b4b56320cdfcec8b60d3295bddb955c22a", size = 3586740 }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/f9/77/4fb790596d5d52c87fd55b7160c557c400e90f6116a56d82d76e95d9374a/ruff-0.9.3-py3-none-linux_armv6l.whl", hash = "sha256:7f39b879064c7d9670197d91124a75d118d00b0990586549949aae80cdc16624", size = 11656815 },
-    { url = "https://files.pythonhosted.org/packages/a2/a8/3338ecb97573eafe74505f28431df3842c1933c5f8eae615427c1de32858/ruff-0.9.3-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:a187171e7c09efa4b4cc30ee5d0d55a8d6c5311b3e1b74ac5cb96cc89bafc43c", size = 11594821 },
-    { url = "https://files.pythonhosted.org/packages/8e/89/320223c3421962762531a6b2dd58579b858ca9916fb2674874df5e97d628/ruff-0.9.3-py3-none-macosx_11_0_arm64.whl", hash = "sha256:c59ab92f8e92d6725b7ded9d4a31be3ef42688a115c6d3da9457a5bda140e2b4", size = 11040475 },
-    { url = "https://files.pythonhosted.org/packages/b2/bd/1d775eac5e51409535804a3a888a9623e87a8f4b53e2491580858a083692/ruff-0.9.3-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2dc153c25e715be41bb228bc651c1e9b1a88d5c6e5ed0194fa0dfea02b026439", size = 11856207 },
-    { url = "https://files.pythonhosted.org/packages/7f/c6/3e14e09be29587393d188454064a4aa85174910d16644051a80444e4fd88/ruff-0.9.3-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:646909a1e25e0dc28fbc529eab8eb7bb583079628e8cbe738192853dbbe43af5", size = 11420460 },
-    { url = "https://files.pythonhosted.org/packages/ef/42/b7ca38ffd568ae9b128a2fa76353e9a9a3c80ef19746408d4ce99217ecc1/ruff-0.9.3-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5a5a46e09355695fbdbb30ed9889d6cf1c61b77b700a9fafc21b41f097bfbba4", size = 12605472 },
-    { url = "https://files.pythonhosted.org/packages/a6/a1/3167023f23e3530fde899497ccfe239e4523854cb874458ac082992d206c/ruff-0.9.3-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:c4bb09d2bbb394e3730d0918c00276e79b2de70ec2a5231cd4ebb51a57df9ba1", size = 13243123 },
-    { url = "https://files.pythonhosted.org/packages/d0/b4/3c600758e320f5bf7de16858502e849f4216cb0151f819fa0d1154874802/ruff-0.9.3-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:96a87ec31dc1044d8c2da2ebbed1c456d9b561e7d087734336518181b26b3aa5", size = 12744650 },
-    { url = "https://files.pythonhosted.org/packages/be/38/266fbcbb3d0088862c9bafa8b1b99486691d2945a90b9a7316336a0d9a1b/ruff-0.9.3-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9bb7554aca6f842645022fe2d301c264e6925baa708b392867b7a62645304df4", size = 14458585 },
-    { url = "https://files.pythonhosted.org/packages/63/a6/47fd0e96990ee9b7a4abda62de26d291bd3f7647218d05b7d6d38af47c30/ruff-0.9.3-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cabc332b7075a914ecea912cd1f3d4370489c8018f2c945a30bcc934e3bc06a6", size = 12419624 },
-    { url = "https://files.pythonhosted.org/packages/84/5d/de0b7652e09f7dda49e1a3825a164a65f4998175b6486603c7601279baad/ruff-0.9.3-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:33866c3cc2a575cbd546f2cd02bdd466fed65118e4365ee538a3deffd6fcb730", size = 11843238 },
-    { url = "https://files.pythonhosted.org/packages/9e/be/3f341ceb1c62b565ec1fb6fd2139cc40b60ae6eff4b6fb8f94b1bb37c7a9/ruff-0.9.3-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:006e5de2621304c8810bcd2ee101587712fa93b4f955ed0985907a36c427e0c2", size = 11484012 },
-    { url = "https://files.pythonhosted.org/packages/a3/c8/ff8acbd33addc7e797e702cf00bfde352ab469723720c5607b964491d5cf/ruff-0.9.3-py3-none-musllinux_1_2_i686.whl", hash = "sha256:ba6eea4459dbd6b1be4e6bfc766079fb9b8dd2e5a35aff6baee4d9b1514ea519", size = 12038494 },
-    { url = "https://files.pythonhosted.org/packages/73/b1/8d9a2c0efbbabe848b55f877bc10c5001a37ab10aca13c711431673414e5/ruff-0.9.3-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:90230a6b8055ad47d3325e9ee8f8a9ae7e273078a66401ac66df68943ced029b", size = 12473639 },
-    { url = "https://files.pythonhosted.org/packages/cb/44/a673647105b1ba6da9824a928634fe23186ab19f9d526d7bdf278cd27bc3/ruff-0.9.3-py3-none-win32.whl", hash = "sha256:eabe5eb2c19a42f4808c03b82bd313fc84d4e395133fb3fc1b1516170a31213c", size = 9834353 },
-    { url = "https://files.pythonhosted.org/packages/c3/01/65cadb59bf8d4fbe33d1a750103e6883d9ef302f60c28b73b773092fbde5/ruff-0.9.3-py3-none-win_amd64.whl", hash = "sha256:040ceb7f20791dfa0e78b4230ee9dce23da3b64dd5848e40e3bf3ab76468dcf4", size = 10821444 },
-    { url = "https://files.pythonhosted.org/packages/69/cb/b3fe58a136a27d981911cba2f18e4b29f15010623b79f0f2510fd0d31fd3/ruff-0.9.3-py3-none-win_arm64.whl", hash = "sha256:800d773f6d4d33b0a3c60e2c6ae8f4c202ea2de056365acfa519aa48acf28e0b", size = 10038168 },
+version = "0.9.4"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/c0/17/529e78f49fc6f8076f50d985edd9a2cf011d1dbadb1cdeacc1d12afc1d26/ruff-0.9.4.tar.gz", hash = "sha256:6907ee3529244bb0ed066683e075f09285b38dd5b4039370df6ff06041ca19e7", size = 3599458 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b6/f8/3fafb7804d82e0699a122101b5bee5f0d6e17c3a806dcbc527bb7d3f5b7a/ruff-0.9.4-py3-none-linux_armv6l.whl", hash = "sha256:64e73d25b954f71ff100bb70f39f1ee09e880728efb4250c632ceed4e4cdf706", size = 11668400 },
+    { url = "https://files.pythonhosted.org/packages/2e/a6/2efa772d335da48a70ab2c6bb41a096c8517ca43c086ea672d51079e3d1f/ruff-0.9.4-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:6ce6743ed64d9afab4fafeaea70d3631b4d4b28b592db21a5c2d1f0ef52934bf", size = 11628395 },
+    { url = "https://files.pythonhosted.org/packages/dc/d7/cd822437561082f1c9d7225cc0d0fbb4bad117ad7ac3c41cd5d7f0fa948c/ruff-0.9.4-py3-none-macosx_11_0_arm64.whl", hash = "sha256:54499fb08408e32b57360f6f9de7157a5fec24ad79cb3f42ef2c3f3f728dfe2b", size = 11090052 },
+    { url = "https://files.pythonhosted.org/packages/9e/67/3660d58e893d470abb9a13f679223368ff1684a4ef40f254a0157f51b448/ruff-0.9.4-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:37c892540108314a6f01f105040b5106aeb829fa5fb0561d2dcaf71485021137", size = 11882221 },
+    { url = "https://files.pythonhosted.org/packages/79/d1/757559995c8ba5f14dfec4459ef2dd3fcea82ac43bc4e7c7bf47484180c0/ruff-0.9.4-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:de9edf2ce4b9ddf43fd93e20ef635a900e25f622f87ed6e3047a664d0e8f810e", size = 11424862 },
+    { url = "https://files.pythonhosted.org/packages/c0/96/7915a7c6877bb734caa6a2af424045baf6419f685632469643dbd8eb2958/ruff-0.9.4-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:87c90c32357c74f11deb7fbb065126d91771b207bf9bfaaee01277ca59b574ec", size = 12626735 },
+    { url = "https://files.pythonhosted.org/packages/0e/cc/dadb9b35473d7cb17c7ffe4737b4377aeec519a446ee8514123ff4a26091/ruff-0.9.4-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:56acd6c694da3695a7461cc55775f3a409c3815ac467279dfa126061d84b314b", size = 13255976 },
+    { url = "https://files.pythonhosted.org/packages/5f/c3/ad2dd59d3cabbc12df308cced780f9c14367f0321e7800ca0fe52849da4c/ruff-0.9.4-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e0c93e7d47ed951b9394cf352d6695b31498e68fd5782d6cbc282425655f687a", size = 12752262 },
+    { url = "https://files.pythonhosted.org/packages/c7/17/5f1971e54bd71604da6788efd84d66d789362b1105e17e5ccc53bba0289b/ruff-0.9.4-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1d4c8772670aecf037d1bf7a07c39106574d143b26cfe5ed1787d2f31e800214", size = 14401648 },
+    { url = "https://files.pythonhosted.org/packages/30/24/6200b13ea611b83260501b6955b764bb320e23b2b75884c60ee7d3f0b68e/ruff-0.9.4-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bfc5f1d7afeda8d5d37660eeca6d389b142d7f2b5a1ab659d9214ebd0e025231", size = 12414702 },
+    { url = "https://files.pythonhosted.org/packages/34/cb/f5d50d0c4ecdcc7670e348bd0b11878154bc4617f3fdd1e8ad5297c0d0ba/ruff-0.9.4-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:faa935fc00ae854d8b638c16a5f1ce881bc3f67446957dd6f2af440a5fc8526b", size = 11859608 },
+    { url = "https://files.pythonhosted.org/packages/d6/f4/9c8499ae8426da48363bbb78d081b817b0f64a9305f9b7f87eab2a8fb2c1/ruff-0.9.4-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:a6c634fc6f5a0ceae1ab3e13c58183978185d131a29c425e4eaa9f40afe1e6d6", size = 11485702 },
+    { url = "https://files.pythonhosted.org/packages/18/59/30490e483e804ccaa8147dd78c52e44ff96e1c30b5a95d69a63163cdb15b/ruff-0.9.4-py3-none-musllinux_1_2_i686.whl", hash = "sha256:433dedf6ddfdec7f1ac7575ec1eb9844fa60c4c8c2f8887a070672b8d353d34c", size = 12067782 },
+    { url = "https://files.pythonhosted.org/packages/3d/8c/893fa9551760b2f8eb2a351b603e96f15af167ceaf27e27ad873570bc04c/ruff-0.9.4-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:d612dbd0f3a919a8cc1d12037168bfa536862066808960e0cc901404b77968f0", size = 12483087 },
+    { url = "https://files.pythonhosted.org/packages/23/15/f6751c07c21ca10e3f4a51ea495ca975ad936d780c347d9808bcedbd7182/ruff-0.9.4-py3-none-win32.whl", hash = "sha256:db1192ddda2200671f9ef61d9597fcef89d934f5d1705e571a93a67fb13a4402", size = 9852302 },
+    { url = "https://files.pythonhosted.org/packages/12/41/2d2d2c6a72e62566f730e49254f602dfed23019c33b5b21ea8f8917315a1/ruff-0.9.4-py3-none-win_amd64.whl", hash = "sha256:05bebf4cdbe3ef75430d26c375773978950bbf4ee3c95ccb5448940dc092408e", size = 10850051 },
+    { url = "https://files.pythonhosted.org/packages/c6/e6/3d6ec3bc3d254e7f005c543a661a41c3e788976d0e52a1ada195bd664344/ruff-0.9.4-py3-none-win_arm64.whl", hash = "sha256:585792f1e81509e38ac5123492f8875fbc36f3ede8185af0a26df348e5154f41", size = 10078251 },
 ]
 
 [[package]]

From 3cb7937dfa1410f916790c68bc013a4b7e1597a3 Mon Sep 17 00:00:00 2001
From: Dejan Lozanovic <dejan.lozanovic@digital.trade.gov.uk>
Date: Mon, 10 Feb 2025 08:25:13 +0000
Subject: [PATCH 3/5] tests

---
 src/matchbox/server/postgresql/utils/db.py |   1 -
 test/unit/__init__.py                      |   0
 test/unit/test_adbcingest.py               | 130 +++++++++++++++++++++
 3 files changed, 130 insertions(+), 1 deletion(-)
 create mode 100644 test/unit/__init__.py
 create mode 100644 test/unit/test_adbcingest.py

diff --git a/src/matchbox/server/postgresql/utils/db.py b/src/matchbox/server/postgresql/utils/db.py
index 5bef6999..e66abd98 100644
--- a/src/matchbox/server/postgresql/utils/db.py
+++ b/src/matchbox/server/postgresql/utils/db.py
@@ -229,7 +229,6 @@ def _create_adbc_table_constraints(db_schema:str, sufix:str, conn:Connection) ->
     return True
 
 def _adbc_insert_data(clusters:pa.Table, contains:pa.Table, probabilities:pa.Table, suffix:str, alchemy_conn:Connection, resolution_id:int) -> bool:
-    # TODO: try except proper exception type from adbc
     with adbc_driver_postgresql.dbapi.connect(POSTGRESQL_URI) as conn:
         try:
             _run_query(f"CREATE TABLE clusters_{suffix} AS SELECT * FROM clusters", alchemy_conn)
diff --git a/test/unit/__init__.py b/test/unit/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/test/unit/test_adbcingest.py b/test/unit/test_adbcingest.py
new file mode 100644
index 00000000..0129c8ba
--- /dev/null
+++ b/test/unit/test_adbcingest.py
@@ -0,0 +1,130 @@
+import unittest
+from unittest.mock import patch, MagicMock
+import pyarrow as pa
+import pandas as pd
+from sqlalchemy.engine import Engine, Connection
+
+
+from matchbox.server.postgresql.utils.db import adbc_ingest_data, _adbc_insert_data, _save_to_postgresql, _run_query, _run_queries
+
+
+class TestAdbcIngestData(unittest.TestCase):
+
+    @patch('my_module._create_adbc_table_constraints')
+    @patch('my_module._adbc_insert_data')
+    @patch('my_module.datetime')
+    def test_adbc_ingest_data(self, mock_datetime, mock_adbc_insert_data, mock_create_adbc_table_constraints):
+        # Mock datetime
+        mock_datetime.now.return_value.strftime.return_value = "20250101123045"
+
+        # Create mock arguments
+        clusters = pa.Table.from_pandas(pd.DataFrame({"column1": [1, 2], "column2": [3, 4]}))
+        contains = pa.Table.from_pandas(pd.DataFrame({"column1": [1, 2], "column2": [3, 4]}))
+        probabilities = pa.Table.from_pandas(pd.DataFrame({"column1": [1, 2], "column2": [3, 4]}))
+        engine = MagicMock(spec=Engine)
+        resolution_id = 1
+
+        # Mock the engine connection context manager
+        mock_connection = engine.connect.return_value.__enter__.return_value
+
+        # Test when _adbc_insert_data returns True
+        mock_adbc_insert_data.return_value = True
+        mock_create_adbc_table_constraints.return_value = True
+        result = adbc_ingest_data(clusters, contains, probabilities, engine, resolution_id)
+        self.assertTrue(result)
+
+        # Test when _adbc_insert_data returns False
+        mock_adbc_insert_data.return_value = False
+        result = adbc_ingest_data(clusters, contains, probabilities, engine, resolution_id)
+        self.assertFalse(result)
+
+
+@patch('my_module._save_to_postgresql')
+@patch('my_module._run_query')
+@patch('my_module.adbc_driver_postgresql.dbapi.connect')
+def test_adbc_insert_data(self, mock_connect, mock_run_query, mock_save_to_postgresql):
+    # Mock the connect method
+    mock_conn = mock_connect.return_value.__enter__.return_value
+
+    # Create mock arguments
+    clusters = pa.Table.from_pandas(pd.DataFrame({"column1": [1, 2], "column2": [3, 4]}))
+    contains = pa.Table.from_pandas(pd.DataFrame({"column1": [1, 2], "column2": [3, 4]}))
+    probabilities = pa.Table.from_pandas(pd.DataFrame({"column1": [1, 2], "column2": [3, 4]}))
+    suffix = "20250101123045"
+    alchemy_conn = MagicMock()
+    resolution_id = 1
+
+    # Test when all queries and saves succeed
+    mock_run_query.side_effect = [None, None, None]
+    mock_save_to_postgresql.side_effect = [None, None, None]
+    result = _adbc_insert_data(clusters, contains, probabilities, suffix, alchemy_conn, resolution_id)
+    self.assertTrue(result)
+
+    # Test when a query fails
+    mock_run_query.side_effect = Exception("Query failed")
+    result = _adbc_insert_data(clusters, contains, probabilities, suffix, alchemy_conn, resolution_id)
+    self.assertFalse(result)
+
+    # Test when save_to_postgresql fails
+    mock_run_query.side_effect = [None, None, None]
+    mock_save_to_postgresql.side_effect = [None, Exception("Save failed"), None]
+    result = _adbc_insert_data(clusters, contains, probabilities, suffix, alchemy_conn, resolution_id)
+    self.assertFalse(result)
+
+    @patch('my_module.pa.RecordBatchReader.from_batches')
+    def test_save_to_postgresql(self, mock_from_batches):
+        # Mock the from_batches method
+        mock_batch_reader = MagicMock()
+        mock_from_batches.return_value = mock_batch_reader
+
+        # Create mock arguments
+        table = pa.Table.from_pandas(pd.DataFrame({"column1": [1, 2], "column2": [3, 4]}))
+        conn = MagicMock()
+        schema = "test_schema"
+        table_name = "test_table"
+
+        # Mock the cursor context manager
+        mock_cursor = conn.cursor.return_value.__enter__.return_value
+
+        # Call the function
+        _save_to_postgresql(table, conn, schema, table_name)
+
+        # Verify the cursor method was called correctly
+        mock_cursor.adbc_ingest.assert_called_once_with(
+            table_name=table_name,
+            data=mock_batch_reader,
+            mode="append",
+            db_schema_name=schema,
+        )
+
+    @patch('my_module.text')
+    def test_run_query(self, mock_text):
+        # Create mock arguments
+        query = "SELECT * FROM test_table"
+        conn = MagicMock(spec=Connection)
+
+        # Call the function
+        _run_query(query, conn)
+
+        # Verify the execute method was called correctly
+        conn.execute.assert_called_once_with(mock_text(query))
+        conn.commit.assert_called_once()
+
+    @patch('my_module.text')
+    def test_run_queries(self, mock_text):
+        # Create mock arguments
+        queries = ["SELECT * FROM test_table", "DELETE FROM test_table"]
+        conn = MagicMock(spec=Connection)
+
+        # Call the function
+        _run_queries(queries, conn)
+
+        # Verify the execute method was called correctly for each query
+        conn.begin.assert_called_once()
+        self.assertEqual(conn.execute.call_count, len(queries))
+        for query in queries:
+            conn.execute.assert_any_call(mock_text(query))
+
+
+if __name__ == '__main__':
+    unittest.main()

From 75268ee77c466d6d9c79a9b9b8b028187f549911 Mon Sep 17 00:00:00 2001
From: Dejan Lozanovic <dejan.lozanovic@digital.trade.gov.uk>
Date: Thu, 13 Feb 2025 09:23:50 +0000
Subject: [PATCH 4/5] sql alchemy changes

---
 src/matchbox/server/postgresql/orm.py      | 308 +++++++++++----------
 src/matchbox/server/postgresql/utils/db.py |  17 --
 test/unit/test_adbcingest.py               |  18 +-
 uv.lock                                    |  42 +--
 4 files changed, 197 insertions(+), 188 deletions(-)

diff --git a/src/matchbox/server/postgresql/orm.py b/src/matchbox/server/postgresql/orm.py
index 93a1f336..c38897c2 100644
--- a/src/matchbox/server/postgresql/orm.py
+++ b/src/matchbox/server/postgresql/orm.py
@@ -22,27 +22,28 @@
 class ResolutionFrom(CountMixin, MBDB.MatchboxBase):
     """Resolution lineage closure table with cached truth values."""
 
-    __tablename__ = "resolution_from"
-
-    # Columns
-    parent = Column(
-        BIGINT,
-        ForeignKey("resolutions.resolution_id", ondelete="CASCADE"),
-        primary_key=True,
-    )
-    child = Column(
-        BIGINT,
-        ForeignKey("resolutions.resolution_id", ondelete="CASCADE"),
-        primary_key=True,
-    )
-    level = Column(INTEGER, nullable=False)
-    truth_cache = Column(FLOAT, nullable=True)
-
-    # Constraints
-    __table_args__ = (
-        CheckConstraint("parent != child", name="no_self_reference"),
-        CheckConstraint("level > 0", name="positive_level"),
-    )
+    def __init__(self, suffix=""):
+        self.__tablename__ = f"resolution_from{suffix}"
+
+        # Columns
+        self.parent = Column(
+            BIGINT,
+            ForeignKey(f"resolutions{suffix}.resolution_id", ondelete="CASCADE"),
+            primary_key=True,
+        )
+        self.child = Column(
+            BIGINT,
+            ForeignKey(f"resolutions{suffix}.resolution_id", ondelete="CASCADE"),
+            primary_key=True,
+        )
+        self.level = Column(INTEGER, nullable=False)
+        self.truth_cache = Column(FLOAT, nullable=True)
+
+        # Constraints
+        self.__table_args__ = (
+            CheckConstraint("parent != child", name="no_self_reference"),
+            CheckConstraint("level > 0", name="positive_level"),
+        )
 
 
 class Resolutions(CountMixin, MBDB.MatchboxBase):
@@ -51,38 +52,39 @@ class Resolutions(CountMixin, MBDB.MatchboxBase):
     Resolutions produce probabilities or own data in the clusters table.
     """
 
-    __tablename__ = "resolutions"
-
-    # Columns
-    resolution_id = Column(BIGINT, primary_key=True)
-    resolution_hash = Column(BYTEA, nullable=False)
-    type = Column(TEXT, nullable=False)
-    name = Column(TEXT, nullable=False)
-    description = Column(TEXT)
-    truth = Column(FLOAT)
-
-    # Relationships
-    source = relationship("Sources", back_populates="dataset_resolution", uselist=False)
-    probabilities = relationship(
-        "Probabilities", back_populates="proposed_by", cascade="all, delete-orphan"
-    )
-    children = relationship(
-        "Resolutions",
-        secondary=ResolutionFrom.__table__,
-        primaryjoin="Resolutions.resolution_id == ResolutionFrom.parent",
-        secondaryjoin="Resolutions.resolution_id == ResolutionFrom.child",
-        backref="parents",
-    )
-
-    # Constraints
-    __table_args__ = (
-        CheckConstraint(
-            "type IN ('model', 'dataset', 'human')",
-            name="resolution_type_constraints",
-        ),
-        UniqueConstraint("resolution_hash", name="resolutions_hash_key"),
-        UniqueConstraint("name", name="resolutions_name_key"),
-    )
+    def __init__(self, suffix=""):
+        self.__tablename__ = f"resolutions{suffix}"
+
+        # Columns
+        self.resolution_id = Column(BIGINT, primary_key=True)
+        self.resolution_hash = Column(BYTEA, nullable=False)
+        self.type = Column(TEXT, nullable=False)
+        self.name = Column(TEXT, nullable=False)
+        self.description = Column(TEXT)
+        self.truth = Column(FLOAT)
+
+        # Relationships
+        self.source = relationship("Sources", back_populates="dataset_resolution", uselist=False)
+        self.probabilities = relationship(
+            "Probabilities", back_populates="proposed_by", cascade="all, delete-orphan"
+        )
+        self.children = relationship(
+            "Resolutions",
+            secondary=ResolutionFrom.__table__,
+            primaryjoin="Resolutions.resolution_id == ResolutionFrom.parent",
+            secondaryjoin="Resolutions.resolution_id == ResolutionFrom.child",
+            backref="parents",
+        )
+
+        # Constraints
+        self.__table_args__ = (
+            CheckConstraint(
+                "type IN ('model', 'dataset', 'human')",
+                name="resolution_type_constraints",
+            ),
+            UniqueConstraint("resolution_hash", name="resolutions_hash_key"),
+            UniqueConstraint("name", name="resolutions_name_key"),
+        )
 
     @property
     def ancestors(self) -> set["Resolutions"]:
@@ -171,30 +173,31 @@ def next_id(cls) -> int:
 class Sources(CountMixin, MBDB.MatchboxBase):
     """Table of sources of data for Matchbox."""
 
-    __tablename__ = "sources"
-
-    # Columns
-    resolution_id = Column(
-        BIGINT,
-        ForeignKey("resolutions.resolution_id", ondelete="CASCADE"),
-        primary_key=True,
-    )
-    alias = Column(TEXT, nullable=False)
-    full_name = Column(TEXT, nullable=False)
-    warehouse_hash = Column(BYTEA, nullable=False)
-    id = Column(TEXT, nullable=False)
-    column_names = Column(ARRAY(TEXT), nullable=False)
-    column_aliases = Column(ARRAY(TEXT), nullable=False)
-    column_types = Column(ARRAY(TEXT), nullable=False)
-
-    # Relationships
-    dataset_resolution = relationship("Resolutions", back_populates="source")
-    clusters = relationship("Clusters", back_populates="source")
-
-    # Constraints
-    __table_args__ = (
-        UniqueConstraint("full_name", "warehouse_hash", name="unique_source_address"),
-    )
+    def __init__(self, suffix="", contains_temporary=False):
+        self.__tablename__ = "sources"
+
+        # Columns
+        self.resolution_id = Column(
+            BIGINT,
+            ForeignKey("resolutions.resolution_id", ondelete="CASCADE"),
+            primary_key=True,
+        )
+        self.alias = Column(TEXT, nullable=False)
+        self.full_name = Column(TEXT, nullable=False)
+        self.warehouse_hash = Column(BYTEA, nullable=False)
+        self.id = Column(TEXT, nullable=False)
+        self.column_names = Column(ARRAY(TEXT), nullable=False)
+        self.column_aliases = Column(ARRAY(TEXT), nullable=False)
+        self.column_types = Column(ARRAY(TEXT), nullable=False)
+
+        # Relationships
+        self.dataset_resolution = relationship("Resolutions", back_populates="source")
+        self.clusters = relationship("Clusters", back_populates="source")
+
+        # Constraints
+        self.__table_args__ = (
+            UniqueConstraint("full_name", "warehouse_hash", name="unique_source_address"),
+        )
 
     @classmethod
     def list(cls) -> list["Sources"]:
@@ -205,56 +208,69 @@ def list(cls) -> list["Sources"]:
 class Contains(CountMixin, MBDB.MatchboxBase):
     """Cluster lineage table."""
 
-    __tablename__ = "contains"
+    def __init__(self, suffix="", clusters_temporary=False):
+        self.__tablename__ = f"contains{suffix}"
 
-    # Columns
-    parent = Column(
-        BIGINT, ForeignKey("clusters.cluster_id", ondelete="CASCADE"), primary_key=True
-    )
-    child = Column(
-        BIGINT, ForeignKey("clusters.cluster_id", ondelete="CASCADE"), primary_key=True
-    )
+        # Columns
+        if clusters_temporary:
+            clusters_name = f"clusters{suffix}"
+        else:
+            clusters_name = "clusters"
 
-    # Constraints and indices
-    __table_args__ = (
-        CheckConstraint("parent != child", name="no_self_containment"),
-        Index("ix_contains_parent_child", "parent", "child"),
-        Index("ix_contains_child_parent", "child", "parent"),
-    )
+        self.parent = Column(
+            BIGINT, ForeignKey(f"{clusters_name}.cluster_id", ondelete="CASCADE"), primary_key=True
+        )
+        self.child = Column(
+            BIGINT, ForeignKey(f"{clusters_name}.cluster_id", ondelete="CASCADE"), primary_key=True
+        )
+
+        # Constraints and indices
+        self.__table_args__ = (
+            CheckConstraint("parent != child", name="no_self_containment"),
+            Index(f"ix_contains_parent_child{suffix}", "parent", "child"),
+            Index(f"ix_contains_child_parent", "child", "parent"),
+        )
 
 
 class Clusters(CountMixin, MBDB.MatchboxBase):
     """Table of indexed data and clusters that match it."""
 
-    __tablename__ = "clusters"
-
-    # Columns
-    cluster_id = Column(BIGINT, primary_key=True)
-    cluster_hash = Column(BYTEA, nullable=False)
-    dataset = Column(BIGINT, ForeignKey("sources.resolution_id"), nullable=True)
-    # Uses array as source data may have identical rows. We can't control this
-    # Must be indexed or PostgreSQL incorrectly tries to use nested joins
-    # when retrieving small datasets in query() -- extremely slow
-    source_pk = Column(ARRAY(TEXT), index=True, nullable=True)
-
-    # Relationships
-    source = relationship("Sources", back_populates="clusters")
-    probabilities = relationship(
-        "Probabilities", back_populates="proposes", cascade="all, delete-orphan"
-    )
-    children = relationship(
-        "Clusters",
-        secondary=Contains.__table__,
-        primaryjoin="Clusters.cluster_id == Contains.parent",
-        secondaryjoin="Clusters.cluster_id == Contains.child",
-        backref="parents",
-    )
-
-    # Constraints and indices
-    __table_args__ = (
-        Index("ix_clusters_id_gin", source_pk, postgresql_using="gin"),
-        UniqueConstraint("cluster_hash", name="clusters_hash_key"),
-    )
+    def __init__(self, suffix="", contains_temporary=False):
+        self.__tablename__ = f"clusters{suffix}"
+
+        # Columns
+        self.cluster_id = Column(BIGINT, primary_key=True)
+        self.cluster_hash = Column(BYTEA, nullable=False)
+
+        self.dataset = Column(BIGINT, ForeignKey("sources.resolution_id"), nullable=True)
+        # Uses array as source data may have identical rows. We can't control this
+        # Must be indexed or PostgreSQL incorrectly tries to use nested joins
+        # when retrieving small datasets in query() -- extremely slow
+        self.source_pk = Column(ARRAY(TEXT), index=True, nullable=True)
+
+        # Relationships
+        self.source = relationship("Sources", back_populates="clusters")
+        self.probabilities = relationship(
+            "Probabilities", back_populates="proposes", cascade="all, delete-orphan"
+        )
+
+        if contains_temporary:
+            contains_name = f"{Contains.__table__}{suffix}"
+        else:
+            contains_name = Contains.__table__
+        self.children = relationship(
+            "Clusters",
+            secondary=contains_name,
+            primaryjoin="Clusters.cluster_id == Contains.parent",
+            secondaryjoin="Clusters.cluster_id == Contains.child",
+            backref="parents",
+        )
+
+        # Constraints and indices
+        self.__table_args__ = (
+            Index(f"ix_clusters_id_gin{suffix}", self.source_pk, postgresql_using="gin"),
+            UniqueConstraint("cluster_hash", name=f"clusters_hash_key{suffix}"),
+        )
 
     @classmethod
     def next_id(cls) -> int:
@@ -269,24 +285,34 @@ def next_id(cls) -> int:
 class Probabilities(CountMixin, MBDB.MatchboxBase):
     """Table of probabilities that a cluster is correct, according to a resolution."""
 
-    __tablename__ = "probabilities"
-
-    # Columns
-    resolution = Column(
-        BIGINT,
-        ForeignKey("resolutions.resolution_id", ondelete="CASCADE"),
-        primary_key=True,
-    )
-    cluster = Column(
-        BIGINT, ForeignKey("clusters.cluster_id", ondelete="CASCADE"), primary_key=True
-    )
-    probability = Column(SMALLINT, nullable=False)
-
-    # Relationships
-    proposed_by = relationship("Resolutions", back_populates="probabilities")
-    proposes = relationship("Clusters", back_populates="probabilities")
-
-    # Constraints
-    __table_args__ = (
-        CheckConstraint("probability BETWEEN 0 AND 100", name="valid_probability"),
-    )
+    def __init__(self, suffix="", resolutions_temporary=False, clusters_temporary=False):
+        self.__tablename__ = f"probabilities{suffix}"
+
+        # Columns
+        if resolutions_temporary:
+            resolutions_name = f"resolutions{suffix}"
+        else:
+            resolutions_name = "resolutions"
+        if clusters_temporary:
+            clusters_name = f"clusters{suffix}"
+        else:
+            clusters_name = "clusters"
+
+        self.resolution = Column(
+            BIGINT,
+            ForeignKey(f"{resolutions_name}.resolution_id", ondelete="CASCADE"),
+            primary_key=True,
+        )
+        self.cluster = Column(
+            BIGINT, ForeignKey(f"{clusters_name}.cluster_id", ondelete="CASCADE"), primary_key=True
+        )
+        self.probability = Column(SMALLINT, nullable=False)
+
+        # Relationships
+        self.proposed_by = relationship("Resolutions", back_populates="probabilities")
+        self.proposes = relationship("Clusters", back_populates="probabilities")
+
+        # Constraints
+        self.__table_args__ = (
+            CheckConstraint("probability BETWEEN 0 AND 100", name="valid_probability"),
+        )
diff --git a/src/matchbox/server/postgresql/utils/db.py b/src/matchbox/server/postgresql/utils/db.py
index e66abd98..71c4144a 100644
--- a/src/matchbox/server/postgresql/utils/db.py
+++ b/src/matchbox/server/postgresql/utils/db.py
@@ -199,23 +199,6 @@ def _create_adbc_table_constraints(db_schema:str, sufix:str, conn:Connection) ->
     Args: db_schema: str, the name of the schema
     """
     # Cluster
-    _run_query(f"ALTER TABLE {db_schema}.clusters_{sufix} ADD PRIMARY KEY (cluster_id)", conn)
-    _run_query(f"""ALTER TABLE {db_schema}.probabilities_{sufix} ADD PRIMARY KEY (resolution, "cluster")""", conn)
-    _run_query(f"CREATE UNIQUE INDEX cluster_hash_index_{sufix} ON {db_schema}.clusters_{sufix} USING btree (cluster_hash)", conn)
-   # _run_query(f"CREATE UNIQUE INDEX clusters_adbc_clusters_is_{sufix} ON {db_schema}.clusters_{sufix} USING btree (cluster_id)", conn)
-    _run_query(f"CREATE INDEX ix_clusters_id_gin_{sufix} ON {db_schema}.clusters_{sufix} USING gin (source_pk)", conn)
-    _run_query(f"CREATE INDEX ix_mb_clusters_source_pk_{sufix} ON {db_schema}.clusters_{sufix} USING btree (source_pk)", conn)
-
-    # Contains
-    _run_query(f"CREATE UNIQUE INDEX ix_contains_child_parent_{sufix} ON {db_schema}.contains_{sufix} USING btree (child, parent)", conn)
-    _run_query(f"CREATE UNIQUE INDEX ix_contains_parent_child_{sufix} ON {db_schema}.contains_{sufix} USING btree (parent, child)", conn)
-
-    # Foreign keys
-    _run_query(f"ALTER TABLE {db_schema}.clusters_{sufix} ADD CONSTRAINT clusters_dataset_fkey FOREIGN KEY (dataset) REFERENCES {db_schema}.sources(resolution_id)", conn)
-    _run_query(f"""ALTER TABLE {db_schema}."contains_{sufix}" ADD CONSTRAINT contains_child_fkey FOREIGN KEY (child) REFERENCES {db_schema}.clusters_{sufix}(cluster_id) ON DELETE CASCADE""", conn)
-    _run_query(f"""ALTER TABLE {db_schema}."contains_{sufix}" ADD CONSTRAINT contains_parent_fkey FOREIGN KEY (parent) REFERENCES {db_schema}.clusters_{sufix}(cluster_id) ON DELETE CASCADE""", conn)
-    _run_query(f"""ALTER TABLE {db_schema}.probabilities_{sufix} ADD CONSTRAINT probabilities_cluster_fkey FOREIGN KEY ("cluster") REFERENCES {db_schema}.clusters_{sufix}(cluster_id) ON DELETE CASCADE""", conn)
-    _run_query(f"ALTER TABLE {db_schema}.probabilities_{sufix} ADD CONSTRAINT probabilities_resolution_fkey FOREIGN KEY (resolution) REFERENCES {db_schema}.resolutions(resolution_id) ON DELETE CASCADE", conn)
 
     _run_queries([
         f"""DROP TABLE IF EXISTS {db_schema}.clusters""",
diff --git a/test/unit/test_adbcingest.py b/test/unit/test_adbcingest.py
index 0129c8ba..c01b8b5f 100644
--- a/test/unit/test_adbcingest.py
+++ b/test/unit/test_adbcingest.py
@@ -10,9 +10,9 @@
 
 class TestAdbcIngestData(unittest.TestCase):
 
-    @patch('my_module._create_adbc_table_constraints')
-    @patch('my_module._adbc_insert_data')
-    @patch('my_module.datetime')
+    @patch('matchbox.server.postgresql.utils.db._create_adbc_table_constraints')
+    @patch('matchbox.server.postgresql.utils.db._adbc_insert_data')
+    @patch('matchbox.server.postgresql.utils.db.datetime')
     def test_adbc_ingest_data(self, mock_datetime, mock_adbc_insert_data, mock_create_adbc_table_constraints):
         # Mock datetime
         mock_datetime.now.return_value.strftime.return_value = "20250101123045"
@@ -39,9 +39,9 @@ def test_adbc_ingest_data(self, mock_datetime, mock_adbc_insert_data, mock_creat
         self.assertFalse(result)
 
 
-@patch('my_module._save_to_postgresql')
-@patch('my_module._run_query')
-@patch('my_module.adbc_driver_postgresql.dbapi.connect')
+@patch('matchbox.server.postgresql.utils.db._save_to_postgresql')
+@patch('matchbox.server.postgresql.utils.db._run_query')
+@patch('matchbox.server.postgresql.utils.db.adbc_driver_postgresql.dbapi.connect')
 def test_adbc_insert_data(self, mock_connect, mock_run_query, mock_save_to_postgresql):
     # Mock the connect method
     mock_conn = mock_connect.return_value.__enter__.return_value
@@ -71,7 +71,7 @@ def test_adbc_insert_data(self, mock_connect, mock_run_query, mock_save_to_postg
     result = _adbc_insert_data(clusters, contains, probabilities, suffix, alchemy_conn, resolution_id)
     self.assertFalse(result)
 
-    @patch('my_module.pa.RecordBatchReader.from_batches')
+    @patch('matchbox.server.postgresql.utils.db.pa.RecordBatchReader.from_batches')
     def test_save_to_postgresql(self, mock_from_batches):
         # Mock the from_batches method
         mock_batch_reader = MagicMock()
@@ -97,7 +97,7 @@ def test_save_to_postgresql(self, mock_from_batches):
             db_schema_name=schema,
         )
 
-    @patch('my_module.text')
+    @patch('matchbox.server.postgresql.utils.db.text')
     def test_run_query(self, mock_text):
         # Create mock arguments
         query = "SELECT * FROM test_table"
@@ -110,7 +110,7 @@ def test_run_query(self, mock_text):
         conn.execute.assert_called_once_with(mock_text(query))
         conn.commit.assert_called_once()
 
-    @patch('my_module.text')
+    @patch('matchbox.server.postgresql.utils.db.text')
     def test_run_queries(self, mock_text):
         # Create mock arguments
         queries = ["SELECT * FROM test_table", "DELETE FROM test_table"]
diff --git a/uv.lock b/uv.lock
index 6e30c485..52cc2f04 100644
--- a/uv.lock
+++ b/uv.lock
@@ -2534,27 +2534,27 @@ wheels = [
 
 [[package]]
 name = "ruff"
-version = "0.9.4"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/c0/17/529e78f49fc6f8076f50d985edd9a2cf011d1dbadb1cdeacc1d12afc1d26/ruff-0.9.4.tar.gz", hash = "sha256:6907ee3529244bb0ed066683e075f09285b38dd5b4039370df6ff06041ca19e7", size = 3599458 }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/b6/f8/3fafb7804d82e0699a122101b5bee5f0d6e17c3a806dcbc527bb7d3f5b7a/ruff-0.9.4-py3-none-linux_armv6l.whl", hash = "sha256:64e73d25b954f71ff100bb70f39f1ee09e880728efb4250c632ceed4e4cdf706", size = 11668400 },
-    { url = "https://files.pythonhosted.org/packages/2e/a6/2efa772d335da48a70ab2c6bb41a096c8517ca43c086ea672d51079e3d1f/ruff-0.9.4-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:6ce6743ed64d9afab4fafeaea70d3631b4d4b28b592db21a5c2d1f0ef52934bf", size = 11628395 },
-    { url = "https://files.pythonhosted.org/packages/dc/d7/cd822437561082f1c9d7225cc0d0fbb4bad117ad7ac3c41cd5d7f0fa948c/ruff-0.9.4-py3-none-macosx_11_0_arm64.whl", hash = "sha256:54499fb08408e32b57360f6f9de7157a5fec24ad79cb3f42ef2c3f3f728dfe2b", size = 11090052 },
-    { url = "https://files.pythonhosted.org/packages/9e/67/3660d58e893d470abb9a13f679223368ff1684a4ef40f254a0157f51b448/ruff-0.9.4-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:37c892540108314a6f01f105040b5106aeb829fa5fb0561d2dcaf71485021137", size = 11882221 },
-    { url = "https://files.pythonhosted.org/packages/79/d1/757559995c8ba5f14dfec4459ef2dd3fcea82ac43bc4e7c7bf47484180c0/ruff-0.9.4-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:de9edf2ce4b9ddf43fd93e20ef635a900e25f622f87ed6e3047a664d0e8f810e", size = 11424862 },
-    { url = "https://files.pythonhosted.org/packages/c0/96/7915a7c6877bb734caa6a2af424045baf6419f685632469643dbd8eb2958/ruff-0.9.4-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:87c90c32357c74f11deb7fbb065126d91771b207bf9bfaaee01277ca59b574ec", size = 12626735 },
-    { url = "https://files.pythonhosted.org/packages/0e/cc/dadb9b35473d7cb17c7ffe4737b4377aeec519a446ee8514123ff4a26091/ruff-0.9.4-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:56acd6c694da3695a7461cc55775f3a409c3815ac467279dfa126061d84b314b", size = 13255976 },
-    { url = "https://files.pythonhosted.org/packages/5f/c3/ad2dd59d3cabbc12df308cced780f9c14367f0321e7800ca0fe52849da4c/ruff-0.9.4-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e0c93e7d47ed951b9394cf352d6695b31498e68fd5782d6cbc282425655f687a", size = 12752262 },
-    { url = "https://files.pythonhosted.org/packages/c7/17/5f1971e54bd71604da6788efd84d66d789362b1105e17e5ccc53bba0289b/ruff-0.9.4-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1d4c8772670aecf037d1bf7a07c39106574d143b26cfe5ed1787d2f31e800214", size = 14401648 },
-    { url = "https://files.pythonhosted.org/packages/30/24/6200b13ea611b83260501b6955b764bb320e23b2b75884c60ee7d3f0b68e/ruff-0.9.4-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bfc5f1d7afeda8d5d37660eeca6d389b142d7f2b5a1ab659d9214ebd0e025231", size = 12414702 },
-    { url = "https://files.pythonhosted.org/packages/34/cb/f5d50d0c4ecdcc7670e348bd0b11878154bc4617f3fdd1e8ad5297c0d0ba/ruff-0.9.4-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:faa935fc00ae854d8b638c16a5f1ce881bc3f67446957dd6f2af440a5fc8526b", size = 11859608 },
-    { url = "https://files.pythonhosted.org/packages/d6/f4/9c8499ae8426da48363bbb78d081b817b0f64a9305f9b7f87eab2a8fb2c1/ruff-0.9.4-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:a6c634fc6f5a0ceae1ab3e13c58183978185d131a29c425e4eaa9f40afe1e6d6", size = 11485702 },
-    { url = "https://files.pythonhosted.org/packages/18/59/30490e483e804ccaa8147dd78c52e44ff96e1c30b5a95d69a63163cdb15b/ruff-0.9.4-py3-none-musllinux_1_2_i686.whl", hash = "sha256:433dedf6ddfdec7f1ac7575ec1eb9844fa60c4c8c2f8887a070672b8d353d34c", size = 12067782 },
-    { url = "https://files.pythonhosted.org/packages/3d/8c/893fa9551760b2f8eb2a351b603e96f15af167ceaf27e27ad873570bc04c/ruff-0.9.4-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:d612dbd0f3a919a8cc1d12037168bfa536862066808960e0cc901404b77968f0", size = 12483087 },
-    { url = "https://files.pythonhosted.org/packages/23/15/f6751c07c21ca10e3f4a51ea495ca975ad936d780c347d9808bcedbd7182/ruff-0.9.4-py3-none-win32.whl", hash = "sha256:db1192ddda2200671f9ef61d9597fcef89d934f5d1705e571a93a67fb13a4402", size = 9852302 },
-    { url = "https://files.pythonhosted.org/packages/12/41/2d2d2c6a72e62566f730e49254f602dfed23019c33b5b21ea8f8917315a1/ruff-0.9.4-py3-none-win_amd64.whl", hash = "sha256:05bebf4cdbe3ef75430d26c375773978950bbf4ee3c95ccb5448940dc092408e", size = 10850051 },
-    { url = "https://files.pythonhosted.org/packages/c6/e6/3d6ec3bc3d254e7f005c543a661a41c3e788976d0e52a1ada195bd664344/ruff-0.9.4-py3-none-win_arm64.whl", hash = "sha256:585792f1e81509e38ac5123492f8875fbc36f3ede8185af0a26df348e5154f41", size = 10078251 },
+version = "0.9.5"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/02/74/6c359f6b9ed85b88df6ef31febce18faeb852f6c9855651dfb1184a46845/ruff-0.9.5.tar.gz", hash = "sha256:11aecd7a633932875ab3cb05a484c99970b9d52606ce9ea912b690b02653d56c", size = 3634177 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/17/4b/82b7c9ac874e72b82b19fd7eab57d122e2df44d2478d90825854f9232d02/ruff-0.9.5-py3-none-linux_armv6l.whl", hash = "sha256:d466d2abc05f39018d53f681fa1c0ffe9570e6d73cde1b65d23bb557c846f442", size = 11681264 },
+    { url = "https://files.pythonhosted.org/packages/27/5c/f5ae0a9564e04108c132e1139d60491c0abc621397fe79a50b3dc0bd704b/ruff-0.9.5-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:38840dbcef63948657fa7605ca363194d2fe8c26ce8f9ae12eee7f098c85ac8a", size = 11657554 },
+    { url = "https://files.pythonhosted.org/packages/2a/83/c6926fa3ccb97cdb3c438bb56a490b395770c750bf59f9bc1fe57ae88264/ruff-0.9.5-py3-none-macosx_11_0_arm64.whl", hash = "sha256:d56ba06da53536b575fbd2b56517f6f95774ff7be0f62c80b9e67430391eeb36", size = 11088959 },
+    { url = "https://files.pythonhosted.org/packages/af/a7/42d1832b752fe969ffdbfcb1b4cb477cb271bed5835110fb0a16ef31ab81/ruff-0.9.5-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4f7cb2a01da08244c50b20ccfaeb5972e4228c3c3a1989d3ece2bc4b1f996001", size = 11902041 },
+    { url = "https://files.pythonhosted.org/packages/53/cf/1fffa09fb518d646f560ccfba59f91b23c731e461d6a4dedd21a393a1ff1/ruff-0.9.5-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:96d5c76358419bc63a671caac70c18732d4fd0341646ecd01641ddda5c39ca0b", size = 11421069 },
+    { url = "https://files.pythonhosted.org/packages/09/27/bb8f1b7304e2a9431f631ae7eadc35550fe0cf620a2a6a0fc4aa3d736f94/ruff-0.9.5-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:deb8304636ed394211f3a6d46c0e7d9535b016f53adaa8340139859b2359a070", size = 12625095 },
+    { url = "https://files.pythonhosted.org/packages/d7/ce/ab00bc9d3df35a5f1b64f5117458160a009f93ae5caf65894ebb63a1842d/ruff-0.9.5-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:df455000bf59e62b3e8c7ba5ed88a4a2bc64896f900f311dc23ff2dc38156440", size = 13257797 },
+    { url = "https://files.pythonhosted.org/packages/88/81/c639a082ae6d8392bc52256058ec60f493c6a4d06d5505bccface3767e61/ruff-0.9.5-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:de92170dfa50c32a2b8206a647949590e752aca8100a0f6b8cefa02ae29dce80", size = 12763793 },
+    { url = "https://files.pythonhosted.org/packages/b3/d0/0a3d8f56d1e49af466dc770eeec5c125977ba9479af92e484b5b0251ce9c/ruff-0.9.5-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3d28532d73b1f3f627ba88e1456f50748b37f3a345d2be76e4c653bec6c3e393", size = 14386234 },
+    { url = "https://files.pythonhosted.org/packages/04/70/e59c192a3ad476355e7f45fb3a87326f5219cc7c472e6b040c6c6595c8f0/ruff-0.9.5-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2c746d7d1df64f31d90503ece5cc34d7007c06751a7a3bbeee10e5f2463d52d2", size = 12437505 },
+    { url = "https://files.pythonhosted.org/packages/55/4e/3abba60a259d79c391713e7a6ccabf7e2c96e5e0a19100bc4204f1a43a51/ruff-0.9.5-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:11417521d6f2d121fda376f0d2169fb529976c544d653d1d6044f4c5562516ee", size = 11884799 },
+    { url = "https://files.pythonhosted.org/packages/a3/db/b0183a01a9f25b4efcae919c18fb41d32f985676c917008620ad692b9d5f/ruff-0.9.5-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:5b9d71c3879eb32de700f2f6fac3d46566f644a91d3130119a6378f9312a38e1", size = 11527411 },
+    { url = "https://files.pythonhosted.org/packages/0a/e4/3ebfcebca3dff1559a74c6becff76e0b64689cea02b7aab15b8b32ea245d/ruff-0.9.5-py3-none-musllinux_1_2_i686.whl", hash = "sha256:2e36c61145e70febcb78483903c43444c6b9d40f6d2f800b5552fec6e4a7bb9a", size = 12078868 },
+    { url = "https://files.pythonhosted.org/packages/ec/b2/5ab808833e06c0a1b0d046a51c06ec5687b73c78b116e8d77687dc0cd515/ruff-0.9.5-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:2f71d09aeba026c922aa7aa19a08d7bd27c867aedb2f74285a2639644c1c12f5", size = 12524374 },
+    { url = "https://files.pythonhosted.org/packages/e0/51/1432afcc3b7aa6586c480142caae5323d59750925c3559688f2a9867343f/ruff-0.9.5-py3-none-win32.whl", hash = "sha256:134f958d52aa6fdec3b294b8ebe2320a950d10c041473c4316d2e7d7c2544723", size = 9853682 },
+    { url = "https://files.pythonhosted.org/packages/b7/ad/c7a900591bd152bb47fc4882a27654ea55c7973e6d5d6396298ad3fd6638/ruff-0.9.5-py3-none-win_amd64.whl", hash = "sha256:78cc6067f6d80b6745b67498fb84e87d32c6fc34992b52bffefbdae3442967d6", size = 10865744 },
+    { url = "https://files.pythonhosted.org/packages/75/d9/fde7610abd53c0c76b6af72fc679cb377b27c617ba704e25da834e0a0608/ruff-0.9.5-py3-none-win_arm64.whl", hash = "sha256:18a29f1a005bddb229e580795627d297dfa99f16b30c7039e73278cf6b5f9fa9", size = 10064595 },
 ]
 
 [[package]]

From 3b049244e008a80c9e072efac92f49aefb98ca30 Mon Sep 17 00:00:00 2001
From: Dejan Lozanovic <dejan.lozanovic@digital.trade.gov.uk>
Date: Fri, 14 Feb 2025 10:00:59 +0000
Subject: [PATCH 5/5] sql alchemy creating constrains

---
 src/matchbox/server/postgresql/orm.py      | 308 ++++++++++-----------
 src/matchbox/server/postgresql/utils/db.py |  16 +-
 2 files changed, 155 insertions(+), 169 deletions(-)

diff --git a/src/matchbox/server/postgresql/orm.py b/src/matchbox/server/postgresql/orm.py
index c38897c2..93a1f336 100644
--- a/src/matchbox/server/postgresql/orm.py
+++ b/src/matchbox/server/postgresql/orm.py
@@ -22,28 +22,27 @@
 class ResolutionFrom(CountMixin, MBDB.MatchboxBase):
     """Resolution lineage closure table with cached truth values."""
 
-    def __init__(self, suffix=""):
-        self.__tablename__ = f"resolution_from{suffix}"
-
-        # Columns
-        self.parent = Column(
-            BIGINT,
-            ForeignKey(f"resolutions{suffix}.resolution_id", ondelete="CASCADE"),
-            primary_key=True,
-        )
-        self.child = Column(
-            BIGINT,
-            ForeignKey(f"resolutions{suffix}.resolution_id", ondelete="CASCADE"),
-            primary_key=True,
-        )
-        self.level = Column(INTEGER, nullable=False)
-        self.truth_cache = Column(FLOAT, nullable=True)
-
-        # Constraints
-        self.__table_args__ = (
-            CheckConstraint("parent != child", name="no_self_reference"),
-            CheckConstraint("level > 0", name="positive_level"),
-        )
+    __tablename__ = "resolution_from"
+
+    # Columns
+    parent = Column(
+        BIGINT,
+        ForeignKey("resolutions.resolution_id", ondelete="CASCADE"),
+        primary_key=True,
+    )
+    child = Column(
+        BIGINT,
+        ForeignKey("resolutions.resolution_id", ondelete="CASCADE"),
+        primary_key=True,
+    )
+    level = Column(INTEGER, nullable=False)
+    truth_cache = Column(FLOAT, nullable=True)
+
+    # Constraints
+    __table_args__ = (
+        CheckConstraint("parent != child", name="no_self_reference"),
+        CheckConstraint("level > 0", name="positive_level"),
+    )
 
 
 class Resolutions(CountMixin, MBDB.MatchboxBase):
@@ -52,39 +51,38 @@ class Resolutions(CountMixin, MBDB.MatchboxBase):
     Resolutions produce probabilities or own data in the clusters table.
     """
 
-    def __init__(self, suffix=""):
-        self.__tablename__ = f"resolutions{suffix}"
-
-        # Columns
-        self.resolution_id = Column(BIGINT, primary_key=True)
-        self.resolution_hash = Column(BYTEA, nullable=False)
-        self.type = Column(TEXT, nullable=False)
-        self.name = Column(TEXT, nullable=False)
-        self.description = Column(TEXT)
-        self.truth = Column(FLOAT)
-
-        # Relationships
-        self.source = relationship("Sources", back_populates="dataset_resolution", uselist=False)
-        self.probabilities = relationship(
-            "Probabilities", back_populates="proposed_by", cascade="all, delete-orphan"
-        )
-        self.children = relationship(
-            "Resolutions",
-            secondary=ResolutionFrom.__table__,
-            primaryjoin="Resolutions.resolution_id == ResolutionFrom.parent",
-            secondaryjoin="Resolutions.resolution_id == ResolutionFrom.child",
-            backref="parents",
-        )
-
-        # Constraints
-        self.__table_args__ = (
-            CheckConstraint(
-                "type IN ('model', 'dataset', 'human')",
-                name="resolution_type_constraints",
-            ),
-            UniqueConstraint("resolution_hash", name="resolutions_hash_key"),
-            UniqueConstraint("name", name="resolutions_name_key"),
-        )
+    __tablename__ = "resolutions"
+
+    # Columns
+    resolution_id = Column(BIGINT, primary_key=True)
+    resolution_hash = Column(BYTEA, nullable=False)
+    type = Column(TEXT, nullable=False)
+    name = Column(TEXT, nullable=False)
+    description = Column(TEXT)
+    truth = Column(FLOAT)
+
+    # Relationships
+    source = relationship("Sources", back_populates="dataset_resolution", uselist=False)
+    probabilities = relationship(
+        "Probabilities", back_populates="proposed_by", cascade="all, delete-orphan"
+    )
+    children = relationship(
+        "Resolutions",
+        secondary=ResolutionFrom.__table__,
+        primaryjoin="Resolutions.resolution_id == ResolutionFrom.parent",
+        secondaryjoin="Resolutions.resolution_id == ResolutionFrom.child",
+        backref="parents",
+    )
+
+    # Constraints
+    __table_args__ = (
+        CheckConstraint(
+            "type IN ('model', 'dataset', 'human')",
+            name="resolution_type_constraints",
+        ),
+        UniqueConstraint("resolution_hash", name="resolutions_hash_key"),
+        UniqueConstraint("name", name="resolutions_name_key"),
+    )
 
     @property
     def ancestors(self) -> set["Resolutions"]:
@@ -173,31 +171,30 @@ def next_id(cls) -> int:
 class Sources(CountMixin, MBDB.MatchboxBase):
     """Table of sources of data for Matchbox."""
 
-    def __init__(self, suffix="", contains_temporary=False):
-        self.__tablename__ = "sources"
-
-        # Columns
-        self.resolution_id = Column(
-            BIGINT,
-            ForeignKey("resolutions.resolution_id", ondelete="CASCADE"),
-            primary_key=True,
-        )
-        self.alias = Column(TEXT, nullable=False)
-        self.full_name = Column(TEXT, nullable=False)
-        self.warehouse_hash = Column(BYTEA, nullable=False)
-        self.id = Column(TEXT, nullable=False)
-        self.column_names = Column(ARRAY(TEXT), nullable=False)
-        self.column_aliases = Column(ARRAY(TEXT), nullable=False)
-        self.column_types = Column(ARRAY(TEXT), nullable=False)
-
-        # Relationships
-        self.dataset_resolution = relationship("Resolutions", back_populates="source")
-        self.clusters = relationship("Clusters", back_populates="source")
-
-        # Constraints
-        self.__table_args__ = (
-            UniqueConstraint("full_name", "warehouse_hash", name="unique_source_address"),
-        )
+    __tablename__ = "sources"
+
+    # Columns
+    resolution_id = Column(
+        BIGINT,
+        ForeignKey("resolutions.resolution_id", ondelete="CASCADE"),
+        primary_key=True,
+    )
+    alias = Column(TEXT, nullable=False)
+    full_name = Column(TEXT, nullable=False)
+    warehouse_hash = Column(BYTEA, nullable=False)
+    id = Column(TEXT, nullable=False)
+    column_names = Column(ARRAY(TEXT), nullable=False)
+    column_aliases = Column(ARRAY(TEXT), nullable=False)
+    column_types = Column(ARRAY(TEXT), nullable=False)
+
+    # Relationships
+    dataset_resolution = relationship("Resolutions", back_populates="source")
+    clusters = relationship("Clusters", back_populates="source")
+
+    # Constraints
+    __table_args__ = (
+        UniqueConstraint("full_name", "warehouse_hash", name="unique_source_address"),
+    )
 
     @classmethod
     def list(cls) -> list["Sources"]:
@@ -208,69 +205,56 @@ def list(cls) -> list["Sources"]:
 class Contains(CountMixin, MBDB.MatchboxBase):
     """Cluster lineage table."""
 
-    def __init__(self, suffix="", clusters_temporary=False):
-        self.__tablename__ = f"contains{suffix}"
+    __tablename__ = "contains"
 
-        # Columns
-        if clusters_temporary:
-            clusters_name = f"clusters{suffix}"
-        else:
-            clusters_name = "clusters"
+    # Columns
+    parent = Column(
+        BIGINT, ForeignKey("clusters.cluster_id", ondelete="CASCADE"), primary_key=True
+    )
+    child = Column(
+        BIGINT, ForeignKey("clusters.cluster_id", ondelete="CASCADE"), primary_key=True
+    )
 
-        self.parent = Column(
-            BIGINT, ForeignKey(f"{clusters_name}.cluster_id", ondelete="CASCADE"), primary_key=True
-        )
-        self.child = Column(
-            BIGINT, ForeignKey(f"{clusters_name}.cluster_id", ondelete="CASCADE"), primary_key=True
-        )
-
-        # Constraints and indices
-        self.__table_args__ = (
-            CheckConstraint("parent != child", name="no_self_containment"),
-            Index(f"ix_contains_parent_child{suffix}", "parent", "child"),
-            Index(f"ix_contains_child_parent", "child", "parent"),
-        )
+    # Constraints and indices
+    __table_args__ = (
+        CheckConstraint("parent != child", name="no_self_containment"),
+        Index("ix_contains_parent_child", "parent", "child"),
+        Index("ix_contains_child_parent", "child", "parent"),
+    )
 
 
 class Clusters(CountMixin, MBDB.MatchboxBase):
     """Table of indexed data and clusters that match it."""
 
-    def __init__(self, suffix="", contains_temporary=False):
-        self.__tablename__ = f"clusters{suffix}"
-
-        # Columns
-        self.cluster_id = Column(BIGINT, primary_key=True)
-        self.cluster_hash = Column(BYTEA, nullable=False)
-
-        self.dataset = Column(BIGINT, ForeignKey("sources.resolution_id"), nullable=True)
-        # Uses array as source data may have identical rows. We can't control this
-        # Must be indexed or PostgreSQL incorrectly tries to use nested joins
-        # when retrieving small datasets in query() -- extremely slow
-        self.source_pk = Column(ARRAY(TEXT), index=True, nullable=True)
-
-        # Relationships
-        self.source = relationship("Sources", back_populates="clusters")
-        self.probabilities = relationship(
-            "Probabilities", back_populates="proposes", cascade="all, delete-orphan"
-        )
-
-        if contains_temporary:
-            contains_name = f"{Contains.__table__}{suffix}"
-        else:
-            contains_name = Contains.__table__
-        self.children = relationship(
-            "Clusters",
-            secondary=contains_name,
-            primaryjoin="Clusters.cluster_id == Contains.parent",
-            secondaryjoin="Clusters.cluster_id == Contains.child",
-            backref="parents",
-        )
-
-        # Constraints and indices
-        self.__table_args__ = (
-            Index(f"ix_clusters_id_gin{suffix}", self.source_pk, postgresql_using="gin"),
-            UniqueConstraint("cluster_hash", name=f"clusters_hash_key{suffix}"),
-        )
+    __tablename__ = "clusters"
+
+    # Columns
+    cluster_id = Column(BIGINT, primary_key=True)
+    cluster_hash = Column(BYTEA, nullable=False)
+    dataset = Column(BIGINT, ForeignKey("sources.resolution_id"), nullable=True)
+    # Uses array as source data may have identical rows. We can't control this
+    # Must be indexed or PostgreSQL incorrectly tries to use nested joins
+    # when retrieving small datasets in query() -- extremely slow
+    source_pk = Column(ARRAY(TEXT), index=True, nullable=True)
+
+    # Relationships
+    source = relationship("Sources", back_populates="clusters")
+    probabilities = relationship(
+        "Probabilities", back_populates="proposes", cascade="all, delete-orphan"
+    )
+    children = relationship(
+        "Clusters",
+        secondary=Contains.__table__,
+        primaryjoin="Clusters.cluster_id == Contains.parent",
+        secondaryjoin="Clusters.cluster_id == Contains.child",
+        backref="parents",
+    )
+
+    # Constraints and indices
+    __table_args__ = (
+        Index("ix_clusters_id_gin", source_pk, postgresql_using="gin"),
+        UniqueConstraint("cluster_hash", name="clusters_hash_key"),
+    )
 
     @classmethod
     def next_id(cls) -> int:
@@ -285,34 +269,24 @@ def next_id(cls) -> int:
 class Probabilities(CountMixin, MBDB.MatchboxBase):
     """Table of probabilities that a cluster is correct, according to a resolution."""
 
-    def __init__(self, suffix="", resolutions_temporary=False, clusters_temporary=False):
-        self.__tablename__ = f"probabilities{suffix}"
-
-        # Columns
-        if resolutions_temporary:
-            resolutions_name = f"resolutions{suffix}"
-        else:
-            resolutions_name = "resolutions"
-        if clusters_temporary:
-            clusters_name = f"clusters{suffix}"
-        else:
-            clusters_name = "clusters"
-
-        self.resolution = Column(
-            BIGINT,
-            ForeignKey(f"{resolutions_name}.resolution_id", ondelete="CASCADE"),
-            primary_key=True,
-        )
-        self.cluster = Column(
-            BIGINT, ForeignKey(f"{clusters_name}.cluster_id", ondelete="CASCADE"), primary_key=True
-        )
-        self.probability = Column(SMALLINT, nullable=False)
-
-        # Relationships
-        self.proposed_by = relationship("Resolutions", back_populates="probabilities")
-        self.proposes = relationship("Clusters", back_populates="probabilities")
-
-        # Constraints
-        self.__table_args__ = (
-            CheckConstraint("probability BETWEEN 0 AND 100", name="valid_probability"),
-        )
+    __tablename__ = "probabilities"
+
+    # Columns
+    resolution = Column(
+        BIGINT,
+        ForeignKey("resolutions.resolution_id", ondelete="CASCADE"),
+        primary_key=True,
+    )
+    cluster = Column(
+        BIGINT, ForeignKey("clusters.cluster_id", ondelete="CASCADE"), primary_key=True
+    )
+    probability = Column(SMALLINT, nullable=False)
+
+    # Relationships
+    proposed_by = relationship("Resolutions", back_populates="probabilities")
+    proposes = relationship("Clusters", back_populates="probabilities")
+
+    # Constraints
+    __table_args__ = (
+        CheckConstraint("probability BETWEEN 0 AND 100", name="valid_probability"),
+    )
diff --git a/src/matchbox/server/postgresql/utils/db.py b/src/matchbox/server/postgresql/utils/db.py
index 71c4144a..1db04c12 100644
--- a/src/matchbox/server/postgresql/utils/db.py
+++ b/src/matchbox/server/postgresql/utils/db.py
@@ -18,6 +18,8 @@
 from sqlalchemy.orm import DeclarativeMeta, Session
 from sqlalchemy.exc import DatabaseError as AlchemyDatabaseError
 
+from matchbox.server.postgresql.db import MBDB
+
 from matchbox.common.graph import (
     ResolutionEdge,
     ResolutionGraph,
@@ -200,7 +202,7 @@ def _create_adbc_table_constraints(db_schema:str, sufix:str, conn:Connection) ->
     """
     # Cluster
 
-    _run_queries([
+    statements = [
         f"""DROP TABLE IF EXISTS {db_schema}.clusters""",
         f"""DROP TABLE IF EXISTS {db_schema}.contains""",
         f"""DROP TABLE IF EXISTS {db_schema}.probabilities""",
@@ -208,7 +210,17 @@ def _create_adbc_table_constraints(db_schema:str, sufix:str, conn:Connection) ->
         f"""ALTER TABLE {db_schema}.clusters_{sufix} RENAME TO clusters""",
         f"""ALTER TABLE {db_schema}.contains_{sufix} RENAME TO contains""",
         f"""ALTER TABLE {db_schema}.probabilities_{sufix} RENAME TO probabilities"""
-    ], conn)
+    ]
+    #start the transaction
+    conn.begin()
+    for query in statements:
+        conn.execute(text(query))
+
+    MBDB.MatchboxBase.metadata.create_all(conn)
+
+    conn.commit()
+
+
     return True
 
 def _adbc_insert_data(clusters:pa.Table, contains:pa.Table, probabilities:pa.Table, suffix:str, alchemy_conn:Connection, resolution_id:int) -> bool: