diff --git a/doc/examples.rst b/doc/examples.rst index 2c6c6aa..687924c 100644 --- a/doc/examples.rst +++ b/doc/examples.rst @@ -1,12 +1,19 @@ Examples ======== -Various example notebooks. +There are currently notebooks aimed at users and developers. + +Users that are working with PEtab problems should consult the model selection documentation of the calibration tool they wish to use. In this case, there is no need to use the PEtab Select package directly to perform model selection. After model selection, results can be analyzed with PEtab Select. See the API documentation for :mod:`petab_select.analyze`, or the "Visualization gallery" notebook. + +Users who wish to apply model selection methods using a calibration tool that doesn't support PEtab can refer to the "Model selection with non-SBML models" notebook for a demonstration using statsmodels. + +Developers wishing to implement model selection in their tool via PEtab Select can consult the notebooks on workflows ("Example usage with the CLI" and "Example usage with Python 3") or "FAMoS in PEtab Select". .. toctree:: :maxdepth: 1 - examples/example_cli_famos.ipynb examples/visualization.ipynb + examples/other_model_types.ipynb + examples/example_cli_famos.ipynb examples/workflow_cli.ipynb examples/workflow_python.ipynb diff --git a/doc/examples/README.md b/doc/examples/README.md index 85a3b2d..e32d9cc 100644 --- a/doc/examples/README.md +++ b/doc/examples/README.md @@ -1 +1 @@ -These notebooks need to be run to see the output. Pre-computed output can be viewed in the documentation, at https://petab-select.readthedocs.io/en/stable/examples.html +These notebooks need to be run to see the output. Pre-computed output and further information about the notebooks can be viewed in the documentation, at https://petab-select.readthedocs.io/en/stable/examples.html diff --git a/doc/examples/other_model_types.ipynb b/doc/examples/other_model_types.ipynb new file mode 100644 index 0000000..57b34c8 --- /dev/null +++ b/doc/examples/other_model_types.ipynb @@ -0,0 +1,300 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "2e9d5661-b193-4eb1-905c-9b54387933c1", + "metadata": {}, + "source": [ + "# Model selection with non-SBML models\n", + "\n", + "PEtab Select is currently designed for use with PEtab v1, which only supports SBML for model specification. However, the SBML model is irrelevant to PEtab Select, which only reads the parameter table of a model's PEtab problem to determine the estimated parameters (e.g. to accurately compute criteria values).\n", + "\n", + "In this notebook, we demonstrate model selection with a modeling framework that currently does not support PEtab problems (specifically, [statsmodels](https://www.statsmodels.org/stable/index.html)). Since statsmodels is a Python package, we use the Python interface of PEtab Select. Non-Python frameworks can be used with the PEtab Select command-line interface instead. Synthetic data was generated from the model\n", + "$$y(t) = 20t + 10t^2 + 5t^3 + \\epsilon$$\n", + "where $\\epsilon$ is drawn from the standard normal distribution and represents measurement noise. The hypothetical scenario is that you are provided $(t,y)$ data pairs and asked to identify the powers of $t$ that are evidenced by the data (i.e., that were used to generate the data). Here, we only consider linear regression models involving terms up to $t^5$, i.e., the superset model is\n", + "$$y(t) = \\sum_{i=0}^5 k_i t^i,$$\n", + "where $k_i$ is the $i$th coefficient in the linear regression model." + ] + }, + { + "cell_type": "markdown", + "id": "b2a20814-e11d-4d65-9987-b84a50abf59d", + "metadata": {}, + "source": [ + "## \"True\" model\n", + "\n", + "We start by loading the data and demonstrating model calibration of the true model using statsmodels." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6889e520-e13f-4b1c-8dc4-43226e37d165", + "metadata": {}, + "outputs": [], + "source": [ + "# Based on the statsmodels \"Ordinary Least Squares\" example\n", + "# https://www.statsmodels.org/stable/examples/notebooks/generated/ols.html\n", + "\n", + "import numpy as np\n", + "import petab\n", + "import statsmodels.api as sm\n", + "from petab import MEASUREMENT, TIME\n", + "\n", + "import petab_select\n", + "from petab_select.constants import (\n", + " CANDIDATE_SPACE,\n", + " ESTIMATE,\n", + " TERMINATE,\n", + " UNCALIBRATED_MODELS,\n", + " Criterion,\n", + ")\n", + "\n", + "# Data was generated with the formula (see next cell to regenerate data)\n", + "# y(t) = 20*t + 10*t^2 + 5*t^3\n", + "df = petab.get_measurement_df(\n", + " \"other_model_types_problem/petab/measurements.tsv\"\n", + ")\n", + "t = df[TIME].to_numpy()\n", + "y = df[MEASUREMENT].to_numpy()\n", + "\n", + "# Show statsmodels fit for the \"true\" model\n", + "true_T = np.column_stack([t**1, t**2, t**3])\n", + "print(sm.OLS(y, true_T).fit().summary())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d14b4c55-2494-4fdc-a45e-e2983e6bf838", + "metadata": {}, + "outputs": [], + "source": [ + "# Uncomment to regenerate data\n", + "\n", + "# import numpy as np\n", + "# rng = np.random.default_rng(seed=0)\n", + "\n", + "# # Measurement time points\n", + "# t = np.linspace(0, 10, 101)\n", + "# # The first three powers of x are used to generate the data\n", + "# T = np.column_stack([t**1, t**2, t**3])\n", + "# K = np.array([20, 10, 5])\n", + "# noise = rng.normal(size=t.size)\n", + "# y = np.dot(T, K) + noise\n", + "\n", + "# # Write PEtab measurement table\n", + "# lines = [\"observableId\\tsimulationConditionId\\ttime\\tmeasurement\"]\n", + "# for t_, y_ in zip(t, y, strict=True):\n", + "# lines.append(f\"y\\tdefault\\t{round(t_,1)}\\t{round(y_,5)}\")\n", + "# content = \"\\n\".join(lines)\n", + "# with open(\"other_model_types_problem/petab/measurements.tsv\", \"w\") as f:\n", + "# f.write(content)" + ] + }, + { + "cell_type": "markdown", + "id": "c566b1b7-d01b-4964-b7f6-31cb9f4e4acc", + "metadata": {}, + "source": [ + "## Model selection\n", + "\n", + "To perform model selection, we define three methods to:\n", + "\n", + "1. calibrate a single model\n", + "\n", + " This is where we convert a PEtab Select model into a statsmodels model, fit the model with statsmodels, then save the log-likelihood value in the PEtab Select model.\n", + "\n", + "2. perform a single iteration of a model selection method involving calibration of multiple models\n", + "\n", + " This is generic code that executes the required PEtab Select commands.\n", + "\n", + "3. perform a full model selection run, involving all iterations of a model selection method\n", + "\n", + " This loads the PEtab Select problem from disk and performs all iterations of a model selection method.\n", + "\n", + "In this case, the PEtab Select problem is setup to perform backward selection ([see the specification files for this example](https://github.com/PEtab-dev/petab_select/tree/main/doc/examples/other_model_types_problem/petab_select))." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "89fb34cb-3b78-4b89-b58f-78e3f90414c1", + "metadata": {}, + "outputs": [], + "source": [ + "def calibrate_model(model: petab_select.Model, t=t, y=y):\n", + " # Convert the PEtab Select model into a statsmodel model\n", + " powers = [\n", + " int(parameter_id[1:])\n", + " for parameter_id, parameter_value in model.parameters.items()\n", + " if parameter_value == ESTIMATE\n", + " ]\n", + " if not powers:\n", + " return\n", + " T = np.column_stack([t**i for i in powers])\n", + " sm_model = sm.OLS(y, T)\n", + "\n", + " # Calibrate\n", + " results = sm_model.fit()\n", + "\n", + " # Save the log-likelihood\n", + " model.set_criterion(criterion=Criterion.LLH, value=results.llf)\n", + "\n", + "\n", + "def perform_iteration(\n", + " problem: petab_select.Problem, candidate_space: petab_select.CandidateSpace\n", + "):\n", + " # Start the iteration, request models according to the model selection method.\n", + " iteration = petab_select.ui.start_iteration(\n", + " problem=problem, candidate_space=candidate_space\n", + " )\n", + "\n", + " # Calibrate each model.\n", + " models = iteration[UNCALIBRATED_MODELS]\n", + " for model in models:\n", + " if all(\n", + " parameter_value != ESTIMATE\n", + " for parameter_value in model.parameters.values()\n", + " ):\n", + " del models[model.hash]\n", + " continue\n", + " calibrate_model(model=model)\n", + " # Compute the criterion based on the log-likelihood computed by statsmodels.\n", + " model.compute_criterion(criterion=problem.criterion)\n", + "\n", + " # End the iteration and return the results.\n", + " return petab_select.ui.end_iteration(\n", + " problem=problem,\n", + " candidate_space=iteration[CANDIDATE_SPACE],\n", + " calibrated_models=models,\n", + " )\n", + "\n", + "\n", + "def run_model_selection():\n", + " # Load the PEtab Select problem.\n", + " problem = petab_select.Problem.from_yaml(\n", + " \"other_model_types_problem/petab_select/problem.yaml\"\n", + " )\n", + " candidate_space = None\n", + "\n", + " # Perform all iterations of the model selection method until the termination signal is emitted.\n", + " while True:\n", + " iteration_results = perform_iteration(\n", + " problem=problem, candidate_space=candidate_space\n", + " )\n", + " if iteration_results[TERMINATE]:\n", + " break\n", + " candidate_space = iteration_results[CANDIDATE_SPACE]\n", + "\n", + " # Return the problem, which contains the calibrated models.\n", + " return problem" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "93ffcec5-3143-4906-87a8-acf60c541ce4", + "metadata": {}, + "outputs": [], + "source": [ + "# Run the model selection method.\n", + "problem = run_model_selection()" + ] + }, + { + "cell_type": "markdown", + "id": "a93ab68f-7831-40bd-82b0-d365cca8008c", + "metadata": {}, + "source": [ + "## Analysis\n", + "\n", + "As when using a PEtab-compatible calibration tool, the results from calibration with statsmodels can also be analyzed with PEtab Select.\n", + "\n", + "Firstly, we get the best model identified during model selection. It matches the \"true\" model, with the same AIC as computed by statsmodels at the top of this notebook." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ba3a58af-8b13-4525-812a-9c7708292bc7", + "metadata": {}, + "outputs": [], + "source": [ + "best_model = petab_select.analyze.get_best(\n", + " models=problem.state.models, criterion=problem.criterion\n", + ")\n", + "print(f\"\"\"\n", + "Best model information\n", + "----------------------\n", + "PEtab Select model hash: {best_model.hash}\n", + "Selected powers of t: {[int(parameter_id[1:]) for parameter_id, parameter_value in best_model.parameters.items() if parameter_value == ESTIMATE]}\n", + "Log-Likelihood: {best_model.get_criterion(criterion=Criterion.LLH)}\n", + "AIC: {best_model.get_criterion(criterion=Criterion.AIC)}\n", + "\"\"\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "92857455-d8be-4bde-adc6-e6ab8de27748", + "metadata": {}, + "outputs": [], + "source": [ + "# Plot AIC of all models that were calibrated.\n", + "\n", + "import petab_select.plot\n", + "\n", + "plot_data = petab_select.plot.PlotData(\n", + " models=problem.state.models, criterion=problem.criterion\n", + ")\n", + "\n", + "petab_select.plot.scatter_criterion_vs_n_estimated(plot_data=plot_data);" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4c0a4f6f-9099-4e22-a589-80cebaf466cc", + "metadata": {}, + "outputs": [], + "source": [ + "# Plot the history of model selection iterations.\n", + "import matplotlib.pyplot as plt\n", + "\n", + "draw_networkx_kwargs = {\n", + " \"arrowstyle\": \"-|>\",\n", + " \"node_shape\": \"s\",\n", + " \"edgecolors\": \"k\",\n", + "}\n", + "fig, ax = plt.subplots(figsize=(20, 20))\n", + "petab_select.plot.graph_iteration_layers(\n", + " plot_data=plot_data,\n", + " draw_networkx_kwargs=draw_networkx_kwargs,\n", + " ax=ax,\n", + ");" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/doc/examples/other_model_types_problem/petab/conditions.tsv b/doc/examples/other_model_types_problem/petab/conditions.tsv new file mode 100644 index 0000000..5d45658 --- /dev/null +++ b/doc/examples/other_model_types_problem/petab/conditions.tsv @@ -0,0 +1,2 @@ +conditionId +default diff --git a/doc/examples/other_model_types_problem/petab/dummy.xml b/doc/examples/other_model_types_problem/petab/dummy.xml new file mode 100644 index 0000000..377893d --- /dev/null +++ b/doc/examples/other_model_types_problem/petab/dummy.xml @@ -0,0 +1,4 @@ + + + + diff --git a/doc/examples/other_model_types_problem/petab/measurements.tsv b/doc/examples/other_model_types_problem/petab/measurements.tsv new file mode 100644 index 0000000..638e864 --- /dev/null +++ b/doc/examples/other_model_types_problem/petab/measurements.tsv @@ -0,0 +1,102 @@ +observableId simulationConditionId time measurement +y default 0.0 0.12573 +y default 0.1 1.9729 +y default 0.2 5.08042 +y default 0.3 7.1399 +y default 0.4 9.38433 +y default 0.5 13.4866 +y default 0.6 17.984 +y default 0.7 21.56208 +y default 0.8 24.25626 +y default 0.9 28.47958 +y default 1.0 34.37673 +y default 1.1 40.79633 +y default 1.2 44.71497 +y default 1.3 53.66621 +y default 1.4 60.07409 +y default 1.5 68.64273 +y default 1.6 77.53574 +y default 1.7 87.1487 +y default 1.8 97.97163 +y default 1.9 109.43751 +y default 2.0 119.87147 +y default 2.1 133.77146 +y default 2.2 144.97481 +y default 2.3 160.08651 +y default 2.4 175.62347 +y default 2.5 190.71901 +y default 2.6 206.7365 +y default 2.7 224.39327 +y default 2.8 243.70227 +y default 2.9 264.2652 +y default 3.0 283.99038 +y default 3.1 306.84582 +y default 3.2 330.08077 +y default 3.3 355.12585 +y default 3.4 380.33466 +y default 3.5 407.23037 +y default 3.6 434.22617 +y default 3.7 464.03539 +y default 3.8 495.54398 +y default 3.9 528.18843 +y default 4.0 558.74093 +y default 4.1 596.21892 +y default 4.2 632.18588 +y default 4.3 669.21631 +y default 4.4 707.78446 +y default 4.5 747.81108 +y default 4.6 791.73802 +y default 4.7 835.97526 +y default 4.8 881.16163 +y default 4.9 927.6601 +y default 5.0 975.35738 +y default 5.1 1024.14668 +y default 5.2 1077.43555 +y default 5.3 1131.94147 +y default 5.4 1185.63164 +y default 5.5 1244.77012 +y default 5.6 1304.10986 +y default 5.7 1365.56104 +y default 5.8 1426.77588 +y default 5.9 1492.3333 +y default 6.0 1559.56356 +y default 6.1 1627.8352 +y default 6.2 1701.77937 +y default 6.3 1772.63909 +y default 6.4 1848.64897 +y default 6.5 1925.36643 +y default 6.6 2006.66347 +y default 6.7 2088.03536 +y default 6.8 2171.19335 +y default 6.9 2254.44149 +y default 7.0 2345.05203 +y default 7.1 2436.33869 +y default 7.2 2529.64396 +y default 7.3 2623.36709 +y default 7.4 2723.54201 +y default 7.5 2820.55457 +y default 7.6 2923.81847 +y default 7.7 3030.50005 +y default 7.8 3137.20905 +y default 7.9 3249.29739 +y default 8.0 3360.18852 +y default 8.1 3474.67181 +y default 8.2 3592.86244 +y default 8.3 3712.74385 +y default 8.4 3835.84232 +y default 8.5 3963.75541 +y default 8.6 4092.46117 +y default 8.7 4224.70956 +y default 8.8 4357.00539 +y default 8.9 4496.63411 +y default 9.0 4634.71261 +y default 9.1 4779.52941 +y default 9.2 4923.40721 +y default 9.3 5071.94952 +y default 9.4 5224.76979 +y default 9.5 5380.40645 +y default 9.6 5537.44101 +y default 9.7 5697.67947 +y default 9.8 5861.01878 +y default 9.9 6028.19348 +y default 10.0 6200.50268 diff --git a/doc/examples/other_model_types_problem/petab/observables.tsv b/doc/examples/other_model_types_problem/petab/observables.tsv new file mode 100644 index 0000000..627c5bc --- /dev/null +++ b/doc/examples/other_model_types_problem/petab/observables.tsv @@ -0,0 +1,2 @@ +observableId observableFormula noiseFormula +y k0*time^0 + k1*time^1 + k2*time^2 + k3*time^3 + k4*time^4 + k5*time^5 1 diff --git a/doc/examples/other_model_types_problem/petab/parameters.tsv b/doc/examples/other_model_types_problem/petab/parameters.tsv new file mode 100644 index 0000000..348fc89 --- /dev/null +++ b/doc/examples/other_model_types_problem/petab/parameters.tsv @@ -0,0 +1,7 @@ +parameterId parameterScale lowerBound upperBound estimate +k0 lin 0 100 1 +k1 lin 0 100 1 +k2 lin 0 100 1 +k3 lin 0 100 1 +k4 lin 0 100 1 +k5 lin 0 100 1 diff --git a/doc/examples/other_model_types_problem/petab/problem.yaml b/doc/examples/other_model_types_problem/petab/problem.yaml new file mode 100644 index 0000000..f8de5fc --- /dev/null +++ b/doc/examples/other_model_types_problem/petab/problem.yaml @@ -0,0 +1,11 @@ +format_version: 1 +parameter_file: parameters.tsv +problems: +- condition_files: + - conditions.tsv + measurement_files: + - measurements.tsv + sbml_files: + - dummy.xml + observable_files: + - observables.tsv diff --git a/doc/examples/other_model_types_problem/petab_select/model_space.tsv b/doc/examples/other_model_types_problem/petab_select/model_space.tsv new file mode 100644 index 0000000..1f3e830 --- /dev/null +++ b/doc/examples/other_model_types_problem/petab_select/model_space.tsv @@ -0,0 +1,2 @@ +model_subspace_id petab_yaml k0 k1 k2 k3 k4 k5 +M ../petab/problem.yaml 0;estimate 0;estimate 0;estimate 0;estimate 0;estimate 0;estimate diff --git a/doc/examples/other_model_types_problem/petab_select/problem.yaml b/doc/examples/other_model_types_problem/petab_select/problem.yaml new file mode 100644 index 0000000..4bb1c85 --- /dev/null +++ b/doc/examples/other_model_types_problem/petab_select/problem.yaml @@ -0,0 +1,5 @@ +format_version: 1 +criterion: AIC +method: backward +model_space_files: +- model_space.tsv diff --git a/doc/index.rst b/doc/index.rst index 13a2626..e02b27e 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -34,6 +34,8 @@ PEtab Select is well-integrated with: * `BasiCO `_ (`example `__) +* `Data2Dynamics `_ + (`example `__) * `PEtab.jl `_ (`example `__) * `pyPESTO `_ @@ -42,6 +44,15 @@ PEtab Select is well-integrated with: Other model calibration tools can easily be integrated using the provided Python library or command line interface. +This documentation provides examples aimed at both users and developers. +However, if you are a user with PEtab problem(s) and simply want to perform +model selection, then check the file formats documented here, and the model +selection documentation of one of the calibration tools listed above. In most +cases, model selection is performed entirely via one of the calibration tools, +rather than the PEtab Select package directly. After model selection, the +analysis and visualization methods in the PEtab Select package can be used +directly with the results from your calibration tool. + Installation ------------ diff --git a/petab_select/plot.py b/petab_select/plot.py index 04b6e69..9361acc 100644 --- a/petab_select/plot.py +++ b/petab_select/plot.py @@ -28,6 +28,10 @@ """The font size of axis tick labels.""" DEFAULT_NODE_COLOR = "darkgrey" """The default color of nodes in graph plots.""" +FONT_HEIGHT_WIDTH_RATIO = 2 +"""The ratio of the font height to font width. Used for graph node sizes.""" +NODE_SIZE_LABEL_SIZE_RATIO = 500 +"""The ratio of node size to label size. Used for graph node sizes.""" __all__ = [ @@ -100,17 +104,16 @@ def __init__( def get_default_labels(self) -> dict[ModelHash, str]: """Get default model labels.""" - return ( - { - model.hash: model.model_label or model.model_id or model.hash - for model in self.models - } - | { - model.predecessor_model_hash: model.predecessor_model_hash - for model in self.models - } - | {VIRTUAL_INITIAL_MODEL.hash: "Virtual\nInitial\nModel"} - ) + labels = {} + for model in self.models: + labels[model.hash] = ( + model.model_label or model.model_id or model.hash + ) + labels[model.predecessor_model_hash] = labels.get( + model.predecessor_model_hash, model.predecessor_model_hash + ) + labels[VIRTUAL_INITIAL_MODEL.hash] = "Virtual\nInitial\nModel" + return labels def augment_labels( self, @@ -511,6 +514,22 @@ def scatter_criterion_vs_n_estimated( return ax +def labels_to_node_sizes(G: nx.Graph) -> list[float]: + """Compute reasonable node sizes to scale with node label sizes.""" + node_sizes = [] + for label in G.nodes: + height = 1 + width = len(label) + if "\n" in label: + height = len(label.split("\n")) + width = len(sorted(label.split("\n"), key=lambda x: len(x))[-1]) + label_size = ( + width if width * FONT_HEIGHT_WIDTH_RATIO > height else height + ) + node_sizes.append(label_size * NODE_SIZE_LABEL_SIZE_RATIO) + return node_sizes + + def graph_iteration_layers( plot_data: PlotData, ax: plt.Axes = None, @@ -542,7 +561,6 @@ def graph_iteration_layers( default_draw_networkx_kwargs = { "arrowstyle": "-|>", "node_shape": "s", - "node_size": 250, "edgecolors": "k", } if draw_networkx_kwargs is None: @@ -608,8 +626,13 @@ def graph_iteration_layers( for model_hash in G.nodes ] - # Apply custom labels - nx.relabel_nodes(G, mapping=plot_data.labels, copy=False) + # Apply custom labels. Need `copy=True` to preserve node ordering. + G = nx.relabel_nodes(G, mapping=plot_data.labels, copy=True) + + draw_networkx_kwargs["node_size"] = draw_networkx_kwargs.get( + "node_size", + labels_to_node_sizes(G=G), + ) nx.draw_networkx( G, pos, ax=ax, node_color=node_colors, **draw_networkx_kwargs diff --git a/petab_select/problem.py b/petab_select/problem.py index a64e62a..4ba467c 100644 --- a/petab_select/problem.py +++ b/petab_select/problem.py @@ -73,7 +73,7 @@ def reset(self) -> None: class Problem(BaseModel): """The model selection problem.""" - format_version: str = Field(default="1.0.0") + format_version: str = Field(default="1.0.0", coerce_numbers_to_str=True) """The file format version.""" criterion: Annotated[ Criterion, PlainSerializer(lambda x: x.value, return_type=str) diff --git a/pyproject.toml b/pyproject.toml index 5462e7a..92f7d9d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -53,6 +53,7 @@ doc = [ "readthedocs-sphinx-ext>=2.2.5", "sphinx-autodoc-typehints", "petab_select[plot]", + "statsmodels>=0.14.4", ] [project.scripts]