Skip to content

Commit

Permalink
Add datasets/status/update endpoint
Browse files Browse the repository at this point in the history
There is currently no migration test because the test infra-
structure makes this hard. Since the PHP calls directly
affect the database, we need to ensure we can recover the old
database state after the test. We can not generally do this
for the dataset status update call, as for that we need to know
the initial state of the database. While we do know that tech-
nically, we would need to hard-code that information into the
tests. A better approach would be to start up a new database
container for PHP. A second issue that arises it that if we
call an update on the status table from both Python and PHP
then the transaction from the first will block the call of
the other. This too is mitigated by introducing a second
database. The only potential risk you introduce is to
be working on different databases. Overall though, the effect
of this should be rather minimal as the database would be
effectively reset after every individual test.
  • Loading branch information
PGijsbers committed Nov 30, 2023
1 parent 405e0f3 commit a8fa27a
Show file tree
Hide file tree
Showing 4 changed files with 181 additions and 4 deletions.
35 changes: 35 additions & 0 deletions src/database/datasets.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
""" Translation from https://github.com/openml/OpenML/blob/c19c9b99568c0fabb001e639ff6724b9a754bbc9/openml_OS/models/api/v1/Api_data.php#L707"""
import datetime
from collections import defaultdict
from typing import Any, Iterable

Expand Down Expand Up @@ -198,3 +199,37 @@ def get_feature_values(dataset_id: int, feature_index: int, connection: Connecti
parameters={"dataset_id": dataset_id, "feature_index": feature_index},
)
return [row.value for row in rows]


def insert_status_for_dataset(
dataset_id: int,
user_id: int,
status: str,
connection: Connection,
) -> None:
connection.execute(
text(
"""
INSERT INTO dataset_status(`did`,`status`,`status_date`,`user_id`)
VALUES (:dataset, :status, :date, :user)
""",
),
parameters={
"dataset": dataset_id,
"status": status,
"date": datetime.datetime.now(),
"user": user_id,
},
)


def remove_deactivated_status(dataset_id: int, connection: Connection) -> None:
connection.execute(
text(
"""
DELETE FROM dataset_status
WHERE `did` = :data AND `status`='deactivated'
""",
),
parameters={"data": dataset_id},
)
59 changes: 58 additions & 1 deletion src/routers/openml/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import re
from datetime import datetime
from enum import StrEnum
from typing import Annotated, Any, NamedTuple
from typing import Annotated, Any, Literal, NamedTuple

from core.access import _user_has_access
from core.errors import DatasetError
Expand All @@ -27,6 +27,8 @@
get_latest_status_update,
get_qualities_for_datasets,
get_tags,
insert_status_for_dataset,
remove_deactivated_status,
)
from database.datasets import tag_dataset as db_tag_dataset
from database.users import User, UserGroup
Expand Down Expand Up @@ -317,6 +319,61 @@ def get_dataset_features(
return features


@router.post(
path="/status/update",
)
def update_dataset_status(
dataset_id: Annotated[int, Body()],
status: Annotated[Literal[DatasetStatus.ACTIVE] | Literal[DatasetStatus.DEACTIVATED], Body()],
user: Annotated[User | None, Depends(fetch_user)],
expdb: Annotated[Connection, Depends(expdb_connection)],
) -> dict[str, str | int]:
if user is None:
raise HTTPException(
status_code=http.client.UNAUTHORIZED,
detail="Updating dataset status required authorization",
)

dataset = _get_dataset_raise_otherwise(dataset_id, user, expdb)

can_deactivate = dataset["uploader"] == user.user_id or UserGroup.ADMIN in user.groups
if status == DatasetStatus.DEACTIVATED and not can_deactivate:
raise HTTPException(
status_code=http.client.FORBIDDEN,
detail={"code": 693, "message": "Dataset is not owned by you"},
)
if status == DatasetStatus.ACTIVE and UserGroup.ADMIN not in user.groups:
raise HTTPException(
status_code=http.client.FORBIDDEN,
detail={"code": 696, "message": "Only administrators can activate datasets."},
)

current_status = get_latest_status_update(dataset_id, expdb)
if current_status and current_status["status"] == status:
raise HTTPException(
status_code=http.client.PRECONDITION_FAILED,
detail={"code": 694, "message": "Illegal status transition."},
)

# If current status is unknown, it is effectively "in preparation",
# So the following transitions are allowed (first 3 transitions are first clause)
# - in preparation => active (add a row)
# - in preparation => deactivated (add a row)
# - active => deactivated (add a row)
# - deactivated => active (delete a row)
if current_status is None or status == DatasetStatus.DEACTIVATED:
insert_status_for_dataset(dataset_id, user.user_id, status, expdb)
elif current_status["status"] == DatasetStatus.DEACTIVATED:
remove_deactivated_status(dataset_id, expdb)
else:
raise HTTPException(
status_code=http.client.INTERNAL_SERVER_ERROR,
detail={"message": f"Unknown status transition: {current_status} -> {status}"},
)

return {"dataset_id": dataset_id, "status": status}


@router.get(
path="/{dataset_id}",
description="Get meta-data for dataset with ID `dataset_id`.",
Expand Down
6 changes: 3 additions & 3 deletions tests/constants.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
NUMBER_OF_DATASETS = 131
NUMBER_OF_DEACTIVATED_DATASETS = 1
NUMBER_OF_DATASETS_IN_PREPARATION = 1
NUMBER_OF_DATASETS_IN_PREPARATION = 2
NUMBER_OF_PRIVATE_DATASETS = 1
NUMBER_OF_ACTIVE_DATASETS = 128
NUMBER_OF_ACTIVE_DATASETS = 127

PRIVATE_DATASET_ID = 130
IN_PREPARATION_ID = 1
IN_PREPARATION_ID = [1, 33]
85 changes: 85 additions & 0 deletions tests/routers/openml/datasets_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import httpx
import pytest
from schemas.datasets.openml import DatasetStatus
from starlette.testclient import TestClient

from tests.conftest import ApiKey
Expand Down Expand Up @@ -144,3 +145,87 @@ def test_dataset_features_with_processing_error(py_api: TestClient) -> None:
def test_dataset_features_dataset_does_not_exist(py_api: TestClient) -> None:
resource = py_api.get("/datasets/features/1000")
assert resource.status_code == http.client.NOT_FOUND


def _assert_status_update_is_successful(
apikey: ApiKey,
dataset_id: int,
status: str,
py_api: TestClient,
) -> None:
response = py_api.post(
f"/datasets/status/update?api_key={apikey}",
json={"dataset_id": dataset_id, "status": status},
)
assert response.status_code == http.client.OK
assert response.json() == {
"dataset_id": dataset_id,
"status": status,
}


@pytest.mark.mut()
@pytest.mark.parametrize(
"dataset_id",
[2, 3],
)
def test_dataset_status_update_active_to_deactivated(dataset_id: int, py_api: TestClient) -> None:
_assert_status_update_is_successful(
apikey=ApiKey.ADMIN,
dataset_id=dataset_id,
status=DatasetStatus.DEACTIVATED,
py_api=py_api,
)


@pytest.mark.mut()
def test_dataset_status_update_in_preparation_to_active(py_api: TestClient) -> None:
_assert_status_update_is_successful(
apikey=ApiKey.ADMIN,
dataset_id=1,
status=DatasetStatus.ACTIVE,
py_api=py_api,
)


@pytest.mark.mut()
def test_dataset_status_update_in_preparation_to_deactivated(py_api: TestClient) -> None:
_assert_status_update_is_successful(
apikey=ApiKey.ADMIN,
dataset_id=1,
status=DatasetStatus.DEACTIVATED,
py_api=py_api,
)


@pytest.mark.mut()
def test_dataset_status_update_deactivated_to_active(py_api: TestClient) -> None:
_assert_status_update_is_successful(
apikey=ApiKey.ADMIN,
dataset_id=131,
status=DatasetStatus.ACTIVE,
py_api=py_api,
)


@pytest.mark.parametrize(
("dataset_id", "api_key", "status"),
[
(1, ApiKey.REGULAR_USER, DatasetStatus.ACTIVE),
(1, ApiKey.REGULAR_USER, DatasetStatus.DEACTIVATED),
(2, ApiKey.REGULAR_USER, DatasetStatus.DEACTIVATED),
(33, ApiKey.REGULAR_USER, DatasetStatus.ACTIVE),
(131, ApiKey.REGULAR_USER, DatasetStatus.ACTIVE),
],
)
def test_dataset_status_unauthorized(
dataset_id: int,
api_key: ApiKey,
status: str,
py_api: TestClient,
) -> None:
response = py_api.post(
f"/datasets/status/update?api_key={api_key}",
json={"dataset_id": dataset_id, "status": status},
)
assert response.status_code == http.client.FORBIDDEN

0 comments on commit a8fa27a

Please sign in to comment.