From 730840a98dc9e587c9bd93e0579b2e20a7ecc4c1 Mon Sep 17 00:00:00 2001 From: pedro-cf Date: Sat, 13 Apr 2024 15:09:12 +0100 Subject: [PATCH 01/12] possible pagination-fix using skip --- stac_fastapi/mongo/database_logic.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/stac_fastapi/mongo/database_logic.py b/stac_fastapi/mongo/database_logic.py index c7357d0..c0e3b2c 100644 --- a/stac_fastapi/mongo/database_logic.py +++ b/stac_fastapi/mongo/database_logic.py @@ -554,28 +554,32 @@ async def execute_search( query = {"$and": search.filters} if search and search.filters else {} print("Query: ", query) + if collection_ids: query["collection"] = {"$in": collection_ids} sort_criteria = sort if sort else [("id", 1)] # Default sort + try: if token: last_id = decode_token(token) - query["id"] = {"$gt": last_id} + skip_count = int(last_id) + else: + skip_count = 0 - cursor = collection.find(query).sort(sort_criteria).limit(limit + 1) + cursor = collection.find(query).sort(sort_criteria).skip(skip_count).limit(limit + 1) items = await cursor.to_list(length=limit + 1) next_token = None if len(items) > limit: - next_token = base64.urlsafe_b64encode( - str(items[-1]["id"]).encode() - ).decode() + next_skip = skip_count + limit + next_token = base64.urlsafe_b64encode(str(next_skip).encode()).decode() items = items[:-1] maybe_count = None if not token: maybe_count = await collection.count_documents(query) + return items, maybe_count, next_token except PyMongoError as e: print(f"Database operation failed: {e}") From ef8f08d90460187e01345f042eda4def0f613c17 Mon Sep 17 00:00:00 2001 From: pedro-cf Date: Sat, 13 Apr 2024 17:56:11 +0100 Subject: [PATCH 02/12] changelog updates; pre-commit ran successsfully; unskip pagination pytests --- CHANGELOG.md | 12 ++++++++++-- stac_fastapi/mongo/database_logic.py | 7 ++++++- stac_fastapi/tests/resources/test_item.py | 5 ----- 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1097805..546be98 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) -and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). +and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0/). ## [Unreleased] @@ -27,5 +27,13 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### Fixed -[Unreleased]: +## [v3.0.1] - 2024-04-13 + +### Fixed + +- Fixed pagination issue with MongoDB. Fixes #1. + + +[Unreleased]: [v3.0.0]: +[v3.0.1]: diff --git a/stac_fastapi/mongo/database_logic.py b/stac_fastapi/mongo/database_logic.py index c0e3b2c..0444013 100644 --- a/stac_fastapi/mongo/database_logic.py +++ b/stac_fastapi/mongo/database_logic.py @@ -567,7 +567,12 @@ async def execute_search( else: skip_count = 0 - cursor = collection.find(query).sort(sort_criteria).skip(skip_count).limit(limit + 1) + cursor = ( + collection.find(query) + .sort(sort_criteria) + .skip(skip_count) + .limit(limit + 1) + ) items = await cursor.to_list(length=limit + 1) next_token = None diff --git a/stac_fastapi/tests/resources/test_item.py b/stac_fastapi/tests/resources/test_item.py index 4fae107..ee4fc96 100644 --- a/stac_fastapi/tests/resources/test_item.py +++ b/stac_fastapi/tests/resources/test_item.py @@ -553,9 +553,6 @@ async def test_get_missing_item_collection(app_client): assert resp.status_code == 404 -@pytest.mark.skip( - reason="Pagination is not working in mongo, setting the limit doesn't limit the number of results. You can keep going to the next result." -) @pytest.mark.asyncio async def test_pagination_item_collection(app_client, ctx, txn_client): """Test item collection pagination links (paging extension)""" @@ -593,7 +590,6 @@ async def test_pagination_item_collection(app_client, ctx, txn_client): assert not set(item_ids) - set(ids) -@pytest.mark.skip(reason="fix pagination in mongo") @pytest.mark.asyncio async def test_pagination_post(app_client, ctx, txn_client): """Test POST pagination (paging extension)""" @@ -630,7 +626,6 @@ async def test_pagination_post(app_client, ctx, txn_client): assert not set(item_ids) - set(ids) -@pytest.mark.skip(reason="fix pagination in mongo") @pytest.mark.asyncio async def test_pagination_token_idempotent(app_client, ctx, txn_client): """Test that pagination tokens are idempotent (paging extension)""" From c5f282d965dde3c711c346266014256273241d94 Mon Sep 17 00:00:00 2001 From: Jonathan Healy Date: Sun, 14 Apr 2024 01:05:58 +0800 Subject: [PATCH 03/12] Update CHANGELOG.md --- CHANGELOG.md | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 546be98..2f54353 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,7 +15,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0/ ### Fixed - Removed bulk transactions extension from app.py -- Fixed pagination bug so pagination functions +- Fixed pagination issue with MongoDB. Fixes [#1](https://github.com/Healy-Hyperspatial/stac-fastapi-mongo/issues/1) ## [v3.0.0] @@ -27,13 +27,6 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0/ ### Fixed -## [v3.0.1] - 2024-04-13 - -### Fixed - -- Fixed pagination issue with MongoDB. Fixes #1. - [Unreleased]: [v3.0.0]: -[v3.0.1]: From a32d1fe4adf89ed0f77412f39f6060dbff1af98b Mon Sep 17 00:00:00 2001 From: pedro-cf Date: Sun, 14 Apr 2024 10:42:28 +0100 Subject: [PATCH 04/12] pytests fixes --- stac_fastapi/tests/resources/test_item.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/stac_fastapi/tests/resources/test_item.py b/stac_fastapi/tests/resources/test_item.py index ee4fc96..de26006 100644 --- a/stac_fastapi/tests/resources/test_item.py +++ b/stac_fastapi/tests/resources/test_item.py @@ -570,12 +570,12 @@ async def test_pagination_item_collection(app_client, ctx, txn_client): ) item_ids = [] - idx = 0 + idx = 1 # start at 1 because of conftest.py > ctx adds test_item by default for idx in range(100): page_data = page.json() next_link = list(filter(lambda link: link["rel"] == "next", page_data["links"])) if not next_link: - assert not page_data["features"] + assert page_data["features"] break assert len(page_data["features"]) == 1 @@ -598,13 +598,13 @@ async def test_pagination_post(app_client, ctx, txn_client): # Ingest 5 items for _ in range(5): ctx.item["id"] = str(uuid.uuid4()) - await create_item(txn_client, ctx.item) - ids.append(ctx.item["id"]) + await create_item(txn_client, ctx.item) + ids.append(ctx.item["id"]) # Paginate through all 5 items with a limit of 1 (expecting 5 requests) request_body = {"ids": ids, "limit": 1} page = await app_client.post("/search", json=request_body) - idx = 0 + idx = 1 # start at 1 because of conftest.py > ctx adds test_item by default item_ids = [] for _ in range(100): idx += 1 @@ -634,8 +634,8 @@ async def test_pagination_token_idempotent(app_client, ctx, txn_client): # Ingest 5 items for _ in range(5): ctx.item["id"] = str(uuid.uuid4()) - await create_item(txn_client, ctx.item) - ids.append(ctx.item["id"]) + await create_item(txn_client, ctx.item) + ids.append(ctx.item["id"]) page = await app_client.get("/search", params={"ids": ",".join(ids), "limit": 3}) page_data = page.json() From a11e805bf41f1b41af02f9a1c166ed0b355e9cf5 Mon Sep 17 00:00:00 2001 From: pedro-cf Date: Sun, 14 Apr 2024 11:45:08 +0100 Subject: [PATCH 05/12] pre-commit run --all-files; test_pagination_item_collection fixes --- stac_fastapi/tests/resources/test_item.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/stac_fastapi/tests/resources/test_item.py b/stac_fastapi/tests/resources/test_item.py index de26006..be29932 100644 --- a/stac_fastapi/tests/resources/test_item.py +++ b/stac_fastapi/tests/resources/test_item.py @@ -570,8 +570,9 @@ async def test_pagination_item_collection(app_client, ctx, txn_client): ) item_ids = [] - idx = 1 # start at 1 because of conftest.py > ctx adds test_item by default - for idx in range(100): + idx = 0 + for _ in range(100): + idx += 1 page_data = page.json() next_link = list(filter(lambda link: link["rel"] == "next", page_data["links"])) if not next_link: @@ -604,7 +605,7 @@ async def test_pagination_post(app_client, ctx, txn_client): # Paginate through all 5 items with a limit of 1 (expecting 5 requests) request_body = {"ids": ids, "limit": 1} page = await app_client.post("/search", json=request_body) - idx = 1 # start at 1 because of conftest.py > ctx adds test_item by default + idx = 1 # start at 1 because of conftest.py > ctx adds test_item by default item_ids = [] for _ in range(100): idx += 1 From 0328b13bb75a9f2ffec5c253fe62981ab090208f Mon Sep 17 00:00:00 2001 From: pedro-cf Date: Sun, 14 Apr 2024 11:53:33 +0100 Subject: [PATCH 06/12] cleaner idx incrementing --- stac_fastapi/tests/resources/test_item.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/stac_fastapi/tests/resources/test_item.py b/stac_fastapi/tests/resources/test_item.py index be29932..f5f1990 100644 --- a/stac_fastapi/tests/resources/test_item.py +++ b/stac_fastapi/tests/resources/test_item.py @@ -570,9 +570,8 @@ async def test_pagination_item_collection(app_client, ctx, txn_client): ) item_ids = [] - idx = 0 - for _ in range(100): - idx += 1 + # idx starts at 1 because of conftest.py > ctx adds test_item by default + for idx in range(1, 100): page_data = page.json() next_link = list(filter(lambda link: link["rel"] == "next", page_data["links"])) if not next_link: @@ -605,10 +604,9 @@ async def test_pagination_post(app_client, ctx, txn_client): # Paginate through all 5 items with a limit of 1 (expecting 5 requests) request_body = {"ids": ids, "limit": 1} page = await app_client.post("/search", json=request_body) - idx = 1 # start at 1 because of conftest.py > ctx adds test_item by default item_ids = [] - for _ in range(100): - idx += 1 + # idx starts at 1 because of conftest.py > ctx adds test_item by default + for idx in range(1, 100): page_data = page.json() next_link = list(filter(lambda link: link["rel"] == "next", page_data["links"])) if not next_link: From 4f6a4bbeda593c6aed436d0665b3fc9adff9c137 Mon Sep 17 00:00:00 2001 From: pedro-cf Date: Sun, 14 Apr 2024 12:09:06 +0100 Subject: [PATCH 07/12] wip --- stac_fastapi/tests/resources/test_item.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/stac_fastapi/tests/resources/test_item.py b/stac_fastapi/tests/resources/test_item.py index f5f1990..e7464cb 100644 --- a/stac_fastapi/tests/resources/test_item.py +++ b/stac_fastapi/tests/resources/test_item.py @@ -570,8 +570,9 @@ async def test_pagination_item_collection(app_client, ctx, txn_client): ) item_ids = [] - # idx starts at 1 because of conftest.py > ctx adds test_item by default - for idx in range(1, 100): + idx = 1 # start at 1 because of conftest.py > ctx adds test_item by default + for _ in range(100): + idx += 1 page_data = page.json() next_link = list(filter(lambda link: link["rel"] == "next", page_data["links"])) if not next_link: @@ -604,9 +605,10 @@ async def test_pagination_post(app_client, ctx, txn_client): # Paginate through all 5 items with a limit of 1 (expecting 5 requests) request_body = {"ids": ids, "limit": 1} page = await app_client.post("/search", json=request_body) + idx = 1 # start at 1 because of conftest.py > ctx adds test_item by default item_ids = [] - # idx starts at 1 because of conftest.py > ctx adds test_item by default - for idx in range(1, 100): + for _ in range(100): + idx += 1 page_data = page.json() next_link = list(filter(lambda link: link["rel"] == "next", page_data["links"])) if not next_link: From 99c70cb5014aec2eaea450f2c87545a98df64619 Mon Sep 17 00:00:00 2001 From: pedro-cf Date: Sun, 14 Apr 2024 12:12:38 +0100 Subject: [PATCH 08/12] test_pagination_item_collection temporary fix --- stac_fastapi/tests/resources/test_item.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stac_fastapi/tests/resources/test_item.py b/stac_fastapi/tests/resources/test_item.py index e7464cb..be29932 100644 --- a/stac_fastapi/tests/resources/test_item.py +++ b/stac_fastapi/tests/resources/test_item.py @@ -570,7 +570,7 @@ async def test_pagination_item_collection(app_client, ctx, txn_client): ) item_ids = [] - idx = 1 # start at 1 because of conftest.py > ctx adds test_item by default + idx = 0 for _ in range(100): idx += 1 page_data = page.json() From 095ead4b7e932d3c9cb1e895502f21e089c48d26 Mon Sep 17 00:00:00 2001 From: pedro-cf Date: Sun, 14 Apr 2024 12:28:26 +0100 Subject: [PATCH 09/12] improved tests: better variable naming; comment clarity; code improvement --- stac_fastapi/tests/resources/test_item.py | 94 ++++++++++++++--------- 1 file changed, 59 insertions(+), 35 deletions(-) diff --git a/stac_fastapi/tests/resources/test_item.py b/stac_fastapi/tests/resources/test_item.py index be29932..c71f2c1 100644 --- a/stac_fastapi/tests/resources/test_item.py +++ b/stac_fastapi/tests/resources/test_item.py @@ -556,99 +556,123 @@ async def test_get_missing_item_collection(app_client): @pytest.mark.asyncio async def test_pagination_item_collection(app_client, ctx, txn_client): """Test item collection pagination links (paging extension)""" - ids = [ctx.item["id"]] + # Initialize a list to store the expected item IDs + expected_item_ids = [ctx.item["id"]] - # Ingest 5 items + # Ingest 5 items in addition to the default test-item for _ in range(5): ctx.item["id"] = str(uuid.uuid4()) await create_item(txn_client, item=ctx.item) - ids.append(ctx.item["id"]) + expected_item_ids.append(ctx.item["id"]) - # Paginate through all 6 items with a limit of 1 (expecting 7 requests) + # Paginate through all items with a limit of 1 (expecting 6 requests) page = await app_client.get( f"/collections/{ctx.item['collection']}/items", params={"limit": 1} ) - item_ids = [] - idx = 0 + # Initialize a list to store the retrieved item IDs + retrieved_item_ids = [] + request_count = 0 for _ in range(100): - idx += 1 + request_count += 1 page_data = page.json() next_link = list(filter(lambda link: link["rel"] == "next", page_data["links"])) if not next_link: + # Ensure that the last page contains features assert page_data["features"] break + # Assert that each page contains only one feature assert len(page_data["features"]) == 1 - item_ids.append(page_data["features"][0]["id"]) + retrieved_item_ids.append(page_data["features"][0]["id"]) + # Extract the next page URL href = next_link[0]["href"][len("http://test-server") :] page = await app_client.get(href) - assert idx == len(ids) + # Assert that the number of requests made is equal to the total number of items ingested + assert request_count == len(expected_item_ids) - # Confirm we have paginated through all items - assert not set(item_ids) - set(ids) + # Confirm we have paginated through all items by comparing the expected and retrieved item IDs + assert not set(retrieved_item_ids) - set(expected_item_ids) @pytest.mark.asyncio async def test_pagination_post(app_client, ctx, txn_client): """Test POST pagination (paging extension)""" - ids = [ctx.item["id"]] + # Initialize a list to store the expected item IDs + expected_item_ids = [ctx.item["id"]] - # Ingest 5 items + # Ingest 5 items in addition to the default test-item for _ in range(5): ctx.item["id"] = str(uuid.uuid4()) await create_item(txn_client, ctx.item) - ids.append(ctx.item["id"]) + expected_item_ids.append(ctx.item["id"]) - # Paginate through all 5 items with a limit of 1 (expecting 5 requests) - request_body = {"ids": ids, "limit": 1} + # Prepare the initial request body with item IDs and a limit of 1 + request_body = {"ids": expected_item_ids, "limit": 1} + + # Perform the initial POST request to start pagination page = await app_client.post("/search", json=request_body) - idx = 1 # start at 1 because of conftest.py > ctx adds test_item by default - item_ids = [] + + # Initialize variables to keep track of request count and retrieved item IDs + request_count = ( + 1 # Start at 1 because of conftest.py > ctx adds test_item by default + ) + retrieved_item_ids = [] + for _ in range(100): - idx += 1 + request_count += 1 page_data = page.json() + + # Extract the next link from the page data next_link = list(filter(lambda link: link["rel"] == "next", page_data["links"])) + + # If there is no next link, exit the loop if not next_link: break - item_ids.append(page_data["features"][0]["id"]) + # Retrieve the ID of the first item on the current page and add it to the list + retrieved_item_ids.append(page_data["features"][0]["id"]) - # Merge request bodies + # Update the request body with the parameters from the next link request_body.update(next_link[0]["body"]) + + # Perform the next POST request using the updated request body page = await app_client.post("/search", json=request_body) - # Our limit is 1, so we expect len(ids) number of requests before we run out of pages - assert idx == len(ids) + 1 + # Our limit is 1, so we expect len(ids) + 1 number of requests before we run out of pages + assert request_count == len(expected_item_ids) + 1 - # Confirm we have paginated through all items - assert not set(item_ids) - set(ids) + # Confirm we have paginated through all items by comparing the expected and retrieved item IDs + assert not set(retrieved_item_ids) - set(expected_item_ids) @pytest.mark.asyncio async def test_pagination_token_idempotent(app_client, ctx, txn_client): """Test that pagination tokens are idempotent (paging extension)""" - ids = [ctx.item["id"]] + # Initialize a list to store the expected item IDs + expected_item_ids = [ctx.item["id"]] - # Ingest 5 items + # Ingest 5 items in addition to the default test-item for _ in range(5): ctx.item["id"] = str(uuid.uuid4()) await create_item(txn_client, ctx.item) - ids.append(ctx.item["id"]) + expected_item_ids.append(ctx.item["id"]) - page = await app_client.get("/search", params={"ids": ",".join(ids), "limit": 3}) + # Perform the initial GET request to start pagination with a limit of 3 + page = await app_client.get( + "/search", params={"ids": ",".join(expected_item_ids), "limit": 3} + ) page_data = page.json() next_link = list(filter(lambda link: link["rel"] == "next", page_data["links"])) + # Extract the pagination token from the next link + pagination_token = parse_qs(urlparse(next_link[0]["href"]).query) + # Confirm token is idempotent - resp1 = await app_client.get( - "/search", params=parse_qs(urlparse(next_link[0]["href"]).query) - ) - resp2 = await app_client.get( - "/search", params=parse_qs(urlparse(next_link[0]["href"]).query) - ) + resp1 = await app_client.get("/search", params=pagination_token) + resp2 = await app_client.get("/search", params=pagination_token) resp1_data = resp1.json() resp2_data = resp2.json() From 01a0cec410961eb78abda130eba7bb679a73cf33 Mon Sep 17 00:00:00 2001 From: pedro-cf Date: Sun, 14 Apr 2024 12:48:27 +0100 Subject: [PATCH 10/12] small fix --- stac_fastapi/tests/resources/test_item.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/stac_fastapi/tests/resources/test_item.py b/stac_fastapi/tests/resources/test_item.py index c71f2c1..01944e3 100644 --- a/stac_fastapi/tests/resources/test_item.py +++ b/stac_fastapi/tests/resources/test_item.py @@ -616,9 +616,7 @@ async def test_pagination_post(app_client, ctx, txn_client): page = await app_client.post("/search", json=request_body) # Initialize variables to keep track of request count and retrieved item IDs - request_count = ( - 1 # Start at 1 because of conftest.py > ctx adds test_item by default - ) + request_count = 1 retrieved_item_ids = [] for _ in range(100): From de3228234596a2b5fecfd2538297d41f4ea024f6 Mon Sep 17 00:00:00 2001 From: pedro-cf Date: Sun, 14 Apr 2024 13:20:03 +0100 Subject: [PATCH 11/12] additions test fixes --- stac_fastapi/tests/resources/test_item.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/stac_fastapi/tests/resources/test_item.py b/stac_fastapi/tests/resources/test_item.py index 01944e3..087fdc4 100644 --- a/stac_fastapi/tests/resources/test_item.py +++ b/stac_fastapi/tests/resources/test_item.py @@ -570,7 +570,6 @@ async def test_pagination_item_collection(app_client, ctx, txn_client): f"/collections/{ctx.item['collection']}/items", params={"limit": 1} ) - # Initialize a list to store the retrieved item IDs retrieved_item_ids = [] request_count = 0 for _ in range(100): @@ -615,10 +614,8 @@ async def test_pagination_post(app_client, ctx, txn_client): # Perform the initial POST request to start pagination page = await app_client.post("/search", json=request_body) - # Initialize variables to keep track of request count and retrieved item IDs - request_count = 1 retrieved_item_ids = [] - + request_count = 0 for _ in range(100): request_count += 1 page_data = page.json() @@ -640,7 +637,7 @@ async def test_pagination_post(app_client, ctx, txn_client): page = await app_client.post("/search", json=request_body) # Our limit is 1, so we expect len(ids) + 1 number of requests before we run out of pages - assert request_count == len(expected_item_ids) + 1 + assert request_count == len(expected_item_ids) # Confirm we have paginated through all items by comparing the expected and retrieved item IDs assert not set(retrieved_item_ids) - set(expected_item_ids) From a94938ce21d40acf3cdca78111b1179bcc2cd20e Mon Sep 17 00:00:00 2001 From: pedro-cf Date: Sun, 14 Apr 2024 13:22:07 +0100 Subject: [PATCH 12/12] small fixes --- stac_fastapi/tests/resources/test_item.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stac_fastapi/tests/resources/test_item.py b/stac_fastapi/tests/resources/test_item.py index 087fdc4..17eab57 100644 --- a/stac_fastapi/tests/resources/test_item.py +++ b/stac_fastapi/tests/resources/test_item.py @@ -636,7 +636,7 @@ async def test_pagination_post(app_client, ctx, txn_client): # Perform the next POST request using the updated request body page = await app_client.post("/search", json=request_body) - # Our limit is 1, so we expect len(ids) + 1 number of requests before we run out of pages + # Our limit is 1, so we expect len(expected_item_ids) number of requests before we run out of pages assert request_count == len(expected_item_ids) # Confirm we have paginated through all items by comparing the expected and retrieved item IDs