Skip to content

Commit

Permalink
Merge pull request #9 from pedro-cf/pagination-fix
Browse files Browse the repository at this point in the history
possible pagination-fix using mongo skip
  • Loading branch information
jonhealy1 authored Apr 14, 2024
2 parents e59c421 + a94938c commit 0dd85f4
Show file tree
Hide file tree
Showing 3 changed files with 76 additions and 51 deletions.
7 changes: 4 additions & 3 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
All notable changes to this project will be documented in this file.

The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0/).


## [Unreleased]
Expand All @@ -15,7 +15,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
### Fixed

- Removed bulk transactions extension from app.py
- Fixed pagination bug so pagination functions
- Fixed pagination issue with MongoDB. Fixes [#1](https://github.com/Healy-Hyperspatial/stac-fastapi-mongo/issues/1)


## [v3.0.0]
Expand All @@ -27,5 +27,6 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
### Fixed


[Unreleased]: <https://github.com/Healy-Hyperspatial/stac-fastapi-mongo/tree/v3.0.0...main>

[Unreleased]: <https://github.com/Healy-Hyperspatial/stac-fastapi-mongo/compare/v3.0.0...main>
[v3.0.0]: <https://github.com/Healy-Hyperspatial/stac-fastapi-mongo/tree/v3.0.0>
19 changes: 14 additions & 5 deletions stac_fastapi/mongo/database_logic.py
Original file line number Diff line number Diff line change
Expand Up @@ -554,28 +554,37 @@ async def execute_search(
query = {"$and": search.filters} if search and search.filters else {}

print("Query: ", query)

if collection_ids:
query["collection"] = {"$in": collection_ids}

sort_criteria = sort if sort else [("id", 1)] # Default sort

try:
if token:
last_id = decode_token(token)
query["id"] = {"$gt": last_id}
skip_count = int(last_id)
else:
skip_count = 0

cursor = collection.find(query).sort(sort_criteria).limit(limit + 1)
cursor = (
collection.find(query)
.sort(sort_criteria)
.skip(skip_count)
.limit(limit + 1)
)
items = await cursor.to_list(length=limit + 1)

next_token = None
if len(items) > limit:
next_token = base64.urlsafe_b64encode(
str(items[-1]["id"]).encode()
).decode()
next_skip = skip_count + limit
next_token = base64.urlsafe_b64encode(str(next_skip).encode()).decode()
items = items[:-1]

maybe_count = None
if not token:
maybe_count = await collection.count_documents(query)

return items, maybe_count, next_token
except PyMongoError as e:
print(f"Database operation failed: {e}")
Expand Down
101 changes: 58 additions & 43 deletions stac_fastapi/tests/resources/test_item.py
Original file line number Diff line number Diff line change
Expand Up @@ -553,106 +553,121 @@ async def test_get_missing_item_collection(app_client):
assert resp.status_code == 404


@pytest.mark.skip(
reason="Pagination is not working in mongo, setting the limit doesn't limit the number of results. You can keep going to the next result."
)
@pytest.mark.asyncio
async def test_pagination_item_collection(app_client, ctx, txn_client):
"""Test item collection pagination links (paging extension)"""
ids = [ctx.item["id"]]
# Initialize a list to store the expected item IDs
expected_item_ids = [ctx.item["id"]]

# Ingest 5 items
# Ingest 5 items in addition to the default test-item
for _ in range(5):
ctx.item["id"] = str(uuid.uuid4())
await create_item(txn_client, item=ctx.item)
ids.append(ctx.item["id"])
expected_item_ids.append(ctx.item["id"])

# Paginate through all 6 items with a limit of 1 (expecting 7 requests)
# Paginate through all items with a limit of 1 (expecting 6 requests)
page = await app_client.get(
f"/collections/{ctx.item['collection']}/items", params={"limit": 1}
)

item_ids = []
idx = 0
for idx in range(100):
retrieved_item_ids = []
request_count = 0
for _ in range(100):
request_count += 1
page_data = page.json()
next_link = list(filter(lambda link: link["rel"] == "next", page_data["links"]))
if not next_link:
assert not page_data["features"]
# Ensure that the last page contains features
assert page_data["features"]
break

# Assert that each page contains only one feature
assert len(page_data["features"]) == 1
item_ids.append(page_data["features"][0]["id"])
retrieved_item_ids.append(page_data["features"][0]["id"])

# Extract the next page URL
href = next_link[0]["href"][len("http://test-server") :]
page = await app_client.get(href)

assert idx == len(ids)
# Assert that the number of requests made is equal to the total number of items ingested
assert request_count == len(expected_item_ids)

# Confirm we have paginated through all items
assert not set(item_ids) - set(ids)
# Confirm we have paginated through all items by comparing the expected and retrieved item IDs
assert not set(retrieved_item_ids) - set(expected_item_ids)


@pytest.mark.skip(reason="fix pagination in mongo")
@pytest.mark.asyncio
async def test_pagination_post(app_client, ctx, txn_client):
"""Test POST pagination (paging extension)"""
ids = [ctx.item["id"]]
# Initialize a list to store the expected item IDs
expected_item_ids = [ctx.item["id"]]

# Ingest 5 items
# Ingest 5 items in addition to the default test-item
for _ in range(5):
ctx.item["id"] = str(uuid.uuid4())
await create_item(txn_client, ctx.item)
ids.append(ctx.item["id"])
await create_item(txn_client, ctx.item)
expected_item_ids.append(ctx.item["id"])

# Paginate through all 5 items with a limit of 1 (expecting 5 requests)
request_body = {"ids": ids, "limit": 1}
# Prepare the initial request body with item IDs and a limit of 1
request_body = {"ids": expected_item_ids, "limit": 1}

# Perform the initial POST request to start pagination
page = await app_client.post("/search", json=request_body)
idx = 0
item_ids = []

retrieved_item_ids = []
request_count = 0
for _ in range(100):
idx += 1
request_count += 1
page_data = page.json()

# Extract the next link from the page data
next_link = list(filter(lambda link: link["rel"] == "next", page_data["links"]))

# If there is no next link, exit the loop
if not next_link:
break

item_ids.append(page_data["features"][0]["id"])
# Retrieve the ID of the first item on the current page and add it to the list
retrieved_item_ids.append(page_data["features"][0]["id"])

# Merge request bodies
# Update the request body with the parameters from the next link
request_body.update(next_link[0]["body"])

# Perform the next POST request using the updated request body
page = await app_client.post("/search", json=request_body)

# Our limit is 1, so we expect len(ids) number of requests before we run out of pages
assert idx == len(ids) + 1
# Our limit is 1, so we expect len(expected_item_ids) number of requests before we run out of pages
assert request_count == len(expected_item_ids)

# Confirm we have paginated through all items
assert not set(item_ids) - set(ids)
# Confirm we have paginated through all items by comparing the expected and retrieved item IDs
assert not set(retrieved_item_ids) - set(expected_item_ids)


@pytest.mark.skip(reason="fix pagination in mongo")
@pytest.mark.asyncio
async def test_pagination_token_idempotent(app_client, ctx, txn_client):
"""Test that pagination tokens are idempotent (paging extension)"""
ids = [ctx.item["id"]]
# Initialize a list to store the expected item IDs
expected_item_ids = [ctx.item["id"]]

# Ingest 5 items
# Ingest 5 items in addition to the default test-item
for _ in range(5):
ctx.item["id"] = str(uuid.uuid4())
await create_item(txn_client, ctx.item)
ids.append(ctx.item["id"])
await create_item(txn_client, ctx.item)
expected_item_ids.append(ctx.item["id"])

page = await app_client.get("/search", params={"ids": ",".join(ids), "limit": 3})
# Perform the initial GET request to start pagination with a limit of 3
page = await app_client.get(
"/search", params={"ids": ",".join(expected_item_ids), "limit": 3}
)
page_data = page.json()
next_link = list(filter(lambda link: link["rel"] == "next", page_data["links"]))

# Extract the pagination token from the next link
pagination_token = parse_qs(urlparse(next_link[0]["href"]).query)

# Confirm token is idempotent
resp1 = await app_client.get(
"/search", params=parse_qs(urlparse(next_link[0]["href"]).query)
)
resp2 = await app_client.get(
"/search", params=parse_qs(urlparse(next_link[0]["href"]).query)
)
resp1 = await app_client.get("/search", params=pagination_token)
resp2 = await app_client.get("/search", params=pagination_token)
resp1_data = resp1.json()
resp2_data = resp2.json()

Expand Down

0 comments on commit 0dd85f4

Please sign in to comment.