Skip to content

Commit

Permalink
Add/get flow (#129)
Browse files Browse the repository at this point in the history
* Start adding GET FLOW endpoint

* Return static flow

* Fetch flow information from database

* Load tags and parameters from database

* Add migration tests for get flow

* Add logic for including subflows (components) in response

* Add a test case for flow with subflow in new-style api

* Move database interactions to database submodule

* Ignore numeric type changes in migration test
  • Loading branch information
PGijsbers committed Dec 15, 2023
1 parent 5b3818d commit 5e95710
Show file tree
Hide file tree
Showing 11 changed files with 575 additions and 34 deletions.
67 changes: 67 additions & 0 deletions src/core/conversions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
from typing import Any


def _str_to_num(string: str) -> int | float | str:
"""Tries to convert the string to integer, otherwise float, otherwise returns the input."""
if string.isdigit():
return int(string)
try:
return float(string)
except ValueError:
return string


def nested_str_to_num(obj: Any) -> Any:
"""Recursively tries to convert all strings in the object to numbers.
For dictionaries, only the values will be converted."""
if isinstance(obj, dict):
return {key: nested_str_to_num(val) for key, val in obj.items()}
if isinstance(obj, list):
return [nested_str_to_num(val) for val in obj]
if isinstance(obj, str):
return _str_to_num(obj)
return obj


def nested_num_to_str(obj: Any) -> Any:
"""Recursively tries to convert all numbers in the object to strings.
For dictionaries, only the values will be converted."""
if isinstance(obj, dict):
return {key: nested_num_to_str(val) for key, val in obj.items()}
if isinstance(obj, list):
return [nested_num_to_str(val) for val in obj]
if isinstance(obj, (int, float)):
return str(obj)
return obj


def nested_int_to_str(obj: Any) -> Any:
if isinstance(obj, dict):
return {key: nested_int_to_str(val) for key, val in obj.items()}
if isinstance(obj, list):
return [nested_int_to_str(val) for val in obj]
if isinstance(obj, int):
return str(obj)
return obj


def nested_remove_nones(obj: Any) -> Any:
if isinstance(obj, dict):
return {
key: nested_remove_nones(val)
for key, val in obj.items()
if val is not None and nested_remove_nones(val) is not None
}
if isinstance(obj, list):
return [nested_remove_nones(val) for val in obj if nested_remove_nones(val) is not None]
return obj


def nested_remove_single_element_list(obj: Any) -> Any:
if isinstance(obj, dict):
return {key: nested_remove_single_element_list(val) for key, val in obj.items()}
if isinstance(obj, list):
if len(obj) == 1:
return nested_remove_single_element_list(obj[0])
return [nested_remove_single_element_list(val) for val in obj]
return obj
56 changes: 56 additions & 0 deletions src/database/flows.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
from typing import Any

from sqlalchemy import Connection, CursorResult, text


def get_flow_subflows(flow_id: int, expdb: Connection) -> CursorResult[Any]:
return expdb.execute(
text(
"""
SELECT child as child_id, identifier
FROM implementation_component
WHERE parent = :flow_id
""",
),
parameters={"flow_id": flow_id},
)


def get_flow_tags(flow_id: int, expdb: Connection) -> CursorResult[Any]:
tag_rows = expdb.execute(
text(
"""
SELECT tag
FROM implementation_tag
WHERE id = :flow_id
""",
),
parameters={"flow_id": flow_id},
)
return [tag.tag for tag in tag_rows]


def get_flow_parameters(flow_id: int, expdb: Connection) -> CursorResult[Any]:
return expdb.execute(
text(
"""
SELECT *, defaultValue as default_value, dataType as data_type
FROM input
WHERE implementation_id = :flow_id
""",
),
parameters={"flow_id": flow_id},
)


def get_flow(flow_id: int, expdb: Connection) -> CursorResult[Any]:
return expdb.execute(
text(
"""
SELECT *, uploadDate as upload_date
FROM implementation
WHERE id = :flow_id
""",
),
parameters={"flow_id": flow_id},
)
2 changes: 2 additions & 0 deletions src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from routers.openml.datasets import router as datasets_router
from routers.openml.estimation_procedure import router as estimationprocedure_router
from routers.openml.evaluations import router as evaluationmeasures_router
from routers.openml.flows import router as flows_router
from routers.openml.qualities import router as qualities_router
from routers.openml.tasks import router as task_router
from routers.openml.tasktype import router as ttype_router
Expand Down Expand Up @@ -47,6 +48,7 @@ def create_api() -> FastAPI:
app.include_router(evaluationmeasures_router)
app.include_router(estimationprocedure_router)
app.include_router(task_router)
app.include_router(flows_router)
return app


Expand Down
61 changes: 61 additions & 0 deletions src/routers/openml/flows.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
import http.client
from typing import Annotated

from core.conversions import _str_to_num
from database.flows import get_flow as db_get_flow
from database.flows import get_flow_parameters, get_flow_subflows, get_flow_tags
from fastapi import APIRouter, Depends, HTTPException
from schemas.flows import Flow, Parameter
from sqlalchemy import Connection

from routers.dependencies import expdb_connection

router = APIRouter(prefix="/flows", tags=["flows"])


@router.get("/{flow_id}")
def get_flow(flow_id: int, expdb: Annotated[Connection, Depends(expdb_connection)] = None) -> Flow:
flow_rows = db_get_flow(flow_id, expdb)
if not (flow := next(flow_rows, None)):
raise HTTPException(status_code=http.client.NOT_FOUND, detail="Flow not found")

parameter_rows = get_flow_parameters(flow_id, expdb)
parameters = [
Parameter(
name=parameter.name,
# PHP sets the default value to [], not sure where that comes from.
# In the modern interface, `None` is used instead for now, but I think it might
# make more sense to omit it if there is none.
default_value=_str_to_num(parameter.default_value) if parameter.default_value else None,
data_type=parameter.data_type,
description=parameter.description,
)
for parameter in parameter_rows
]

tags = get_flow_tags(flow_id, expdb)

flow_rows = get_flow_subflows(flow_id, expdb)
subflows = [
{
"identifier": flow.identifier,
"flow": get_flow(flow_id=flow.child_id, expdb=expdb),
}
for flow in flow_rows
]

return Flow(
id_=flow.id,
uploader=flow.uploader,
name=flow.name,
class_name=flow.class_name,
version=flow.version,
external_version=flow.external_version,
description=flow.description,
upload_date=flow.upload_date,
language=flow.language,
dependencies=flow.dependencies,
parameter=parameters,
subflows=subflows,
tag=tags,
)
29 changes: 29 additions & 0 deletions src/schemas/flows.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
from datetime import datetime
from typing import Any, Self

from pydantic import BaseModel, ConfigDict, Field


class Parameter(BaseModel):
name: str
default_value: Any
data_type: str
description: str


class Flow(BaseModel):
id_: int = Field(serialization_alias="id")
uploader: int | None
name: str = Field(max_length=1024)
class_name: str | None = Field(max_length=256)
version: int
external_version: str = Field(max_length=128)
description: str | None
upload_date: datetime
language: str | None = Field(max_length=128)
dependencies: str | None
parameter: list[Parameter]
subflows: list[Self]
tag: list[str]

model_config = ConfigDict(arbitrary_types_allowed=True)
Empty file added tests/routers/__init__.py
Empty file.
Empty file.
Loading

0 comments on commit 5e95710

Please sign in to comment.