From 04f9034b2be17830c39458ca8e07004773dec6bb Mon Sep 17 00:00:00 2001 From: Dilan Pathirana Date: Tue, 26 Mar 2024 18:56:45 +0100 Subject: [PATCH 01/13] make model hashes nicer; add model hash to model method --- petab_select/constants.py | 7 ++++ petab_select/model.py | 75 ++++++++++++++++++++------------------- petab_select/problem.py | 9 ++++- 3 files changed, 54 insertions(+), 37 deletions(-) diff --git a/petab_select/constants.py b/petab_select/constants.py index ac0b2a73..359c7f64 100644 --- a/petab_select/constants.py +++ b/petab_select/constants.py @@ -2,6 +2,7 @@ import sys from enum import Enum from pathlib import Path +import string from typing import Dict, List, Literal, Union # Zero-indexed column/row indices @@ -32,6 +33,12 @@ MODEL_CODE = 'model_code' MODEL_HASH = 'model_hash' MODEL_HASHES = 'model_hashes' +MODEL_SUBSPACE_INDICES_HASH_MAP = ( # [0-9]+[A-Z]+[a-z] + ''.join(str(i) for i in range(10)) + + string.ascii_uppercase + string.ascii_lowercase +) +MODEL_HASH_DELIMITER = '.' +HASHED_MODEL_SUBSPACE_INDICES_DELIMITER = '-' # If `predecessor_model_hash` is defined for a model, it is the ID of the model that the # current model was/is to be compared to. This is part of the result and is # only (optionally) set by the PEtab calibration tool. It is not defined by the diff --git a/petab_select/model.py b/petab_select/model.py index 55ddf56f..8f9682d9 100644 --- a/petab_select/model.py +++ b/petab_select/model.py @@ -2,6 +2,7 @@ import warnings from os.path import relpath from pathlib import Path +import string from typing import Any, Dict, List, Optional, Tuple, Union import petab @@ -12,10 +13,13 @@ from .constants import ( CRITERIA, ESTIMATED_PARAMETERS, + HASHED_MODEL_SUBSPACE_INDICES_DELIMITER, MODEL_HASH, + MODEL_HASH_DELIMITER, MODEL_ID, MODEL_SUBSPACE_ID, MODEL_SUBSPACE_INDICES, + MODEL_SUBSPACE_INDICES_HASH_MAP, PARAMETERS, PETAB_ESTIMATE_TRUE, PETAB_PROBLEM, @@ -58,14 +62,6 @@ class Model(PetabMixin): Functions to convert attributes from :class:`Model` to YAML. criteria: The criteria values of the calibrated model (e.g. AIC). - hash_attributes: - This attribute is currently not used. - Attributes that will be used to calculate the hash of the - :class:`Model` instance. NB: this hash is used during pairwise comparison - to determine whether any two :class:`Model` instances are unique. The - model instances are compared by their parameter estimation - problems, as opposed to parameter estimation results, which may - differ due to e.g. floating-point arithmetic. model_id: The model ID. petab_yaml: @@ -130,28 +126,6 @@ class Model(PetabMixin): for criterion_id, criterion_value in x.items() }, } - hash_attributes = { - # MODEL_ID: lambda x: hash(x), # possible circular dependency on hash - # MODEL_SUBSPACE_ID: lambda x: hash(x), - # MODEL_SUBSPACE_INDICES: hash_list, - # TODO replace `YAML` with `PETAB_PROBLEM_HASH`, as YAML could refer to - # different problems if used on different filesystems or sometimes - # absolute and other times relative. Better to check whether the - # PEtab problem itself is unique. - # TODO replace `PARAMETERS` with `PARAMETERS_ALL`, which should be al - # parameters in the PEtab problem. This avoids treating the PEtab problem - # differently to the model (in a subspace with the PEtab problem) that has - # all nominal values defined in the subspace. - # TODO add `estimated_parameters`? Needs to be clarified whether this hash - # should be unique amongst only not-yet-calibrated models, or may also - # return the same value between differently parameterized models that ended - # up being calibrated to be the same... probably should be the former. - # Currently, the hash is stored, hence will "persist" after calibration - # if the same `Model` instance is used. - # PETAB_YAML: lambda x: hash(x), - PETAB_YAML: hash_str, - PARAMETERS: hash_parameter_dict, - } def __init__( self, @@ -523,12 +497,7 @@ def get_hash(self) -> int: The hash. """ if self.model_hash is None: - self.model_hash = hash_list( - [ - method(getattr(self, attribute)) - for attribute, method in Model.hash_attributes.items() - ] - ) + self.model_hash = hash_model(model=self) return self.model_hash def __hash__(self) -> None: @@ -772,3 +741,37 @@ def models_to_yaml_list( model_dicts = None if not model_dicts else model_dicts with open(output_yaml, 'w') as f: yaml.dump(model_dicts, f) + + +def unhash_model(model_hash: str): + model_subspace_id, hashed_model_subspace_indices = model_hash.split(MODEL_HASH_DELIMITER) + + if HASHED_MODEL_SUBSPACE_INDICES_DELIMITER in hashed_model_subspace_indices: + model_subspace_indices = [ + int[s] + for s in hashed_model_subspace_indices.split( + HASHED_MODEL_SUBSPACE_INDICES_DELIMITER + ) + ] + else: + model_subspace_indices = [ + MODEL_SUBSPACE_INDICES_HASH_MAP.index(s) + for s in hashed_model_subspace_indices + ] + + return model_subspace_id, model_subspace_indices + + +def hash_model(model: Model): + try: + hashed_model_subspace_indices = ''.join( + MODEL_SUBSPACE_INDICES_HASH_MAP[index] + for index in model.model_subspace_indices + ) + except: + hashed_model_subspace_indices = '_'.join( + str(i) for i in model.model_subspace_indices + ) + + model_hash = model.model_subspace_id + MODEL_HASH_DELIMITER + hashed_model_subspace_indices + return model_hash diff --git a/petab_select/problem.py b/petab_select/problem.py index 7b97fb19..25f907ae 100644 --- a/petab_select/problem.py +++ b/petab_select/problem.py @@ -17,7 +17,7 @@ Criterion, Method, ) -from .model import Model, default_compare +from .model import Model, default_compare, unhash_model from .model_space import ModelSpace __all__ = [ @@ -239,6 +239,13 @@ def get_best( ) return best_model + def model_hash_to_model(self, model_hash: str): + model_subspace_id, model_subspace_indices = unhash_model(model_hash) + model = self.model_space.model_subspaces[model_subspace_id].indices_to_model( + indices=model_subspace_indices, + ) + return model + def new_candidate_space( self, *args, From 1a6d881fd3a4d308fd35e721ab572cd5c4a3027e Mon Sep 17 00:00:00 2001 From: Dilan Pathirana Date: Tue, 26 Mar 2024 19:23:14 +0100 Subject: [PATCH 02/13] clean --- petab_select/constants.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/petab_select/constants.py b/petab_select/constants.py index 359c7f64..ab2e3b33 100644 --- a/petab_select/constants.py +++ b/petab_select/constants.py @@ -1,8 +1,8 @@ """Constants for the PEtab Select package.""" +import string import sys from enum import Enum from pathlib import Path -import string from typing import Dict, List, Literal, Union # Zero-indexed column/row indices @@ -33,9 +33,11 @@ MODEL_CODE = 'model_code' MODEL_HASH = 'model_hash' MODEL_HASHES = 'model_hashes' -MODEL_SUBSPACE_INDICES_HASH_MAP = ( # [0-9]+[A-Z]+[a-z] - ''.join(str(i) for i in range(10)) - + string.ascii_uppercase + string.ascii_lowercase +MODEL_SUBSPACE_INDICES_HASH_MAP = ( + # [0-9]+[A-Z]+[a-z] + string.digits + + string.ascii_uppercase + + string.ascii_lowercase ) MODEL_HASH_DELIMITER = '.' HASHED_MODEL_SUBSPACE_INDICES_DELIMITER = '-' From 43478a0d1aff418212fc93369bca16702c10060e Mon Sep 17 00:00:00 2001 From: Dilan Pathirana Date: Tue, 26 Mar 2024 19:24:13 +0100 Subject: [PATCH 03/13] black --- petab_select/model.py | 17 +++++++++++++---- petab_select/problem.py | 4 +++- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/petab_select/model.py b/petab_select/model.py index 8f9682d9..ad3780cc 100644 --- a/petab_select/model.py +++ b/petab_select/model.py @@ -1,8 +1,8 @@ """The `Model` class.""" +import string import warnings from os.path import relpath from pathlib import Path -import string from typing import Any, Dict, List, Optional, Tuple, Union import petab @@ -744,9 +744,14 @@ def models_to_yaml_list( def unhash_model(model_hash: str): - model_subspace_id, hashed_model_subspace_indices = model_hash.split(MODEL_HASH_DELIMITER) + model_subspace_id, hashed_model_subspace_indices = model_hash.split( + MODEL_HASH_DELIMITER + ) - if HASHED_MODEL_SUBSPACE_INDICES_DELIMITER in hashed_model_subspace_indices: + if ( + HASHED_MODEL_SUBSPACE_INDICES_DELIMITER + in hashed_model_subspace_indices + ): model_subspace_indices = [ int[s] for s in hashed_model_subspace_indices.split( @@ -773,5 +778,9 @@ def hash_model(model: Model): str(i) for i in model.model_subspace_indices ) - model_hash = model.model_subspace_id + MODEL_HASH_DELIMITER + hashed_model_subspace_indices + model_hash = ( + model.model_subspace_id + + MODEL_HASH_DELIMITER + + hashed_model_subspace_indices + ) return model_hash diff --git a/petab_select/problem.py b/petab_select/problem.py index 25f907ae..39dc086e 100644 --- a/petab_select/problem.py +++ b/petab_select/problem.py @@ -241,7 +241,9 @@ def get_best( def model_hash_to_model(self, model_hash: str): model_subspace_id, model_subspace_indices = unhash_model(model_hash) - model = self.model_space.model_subspaces[model_subspace_id].indices_to_model( + model = self.model_space.model_subspaces[ + model_subspace_id + ].indices_to_model( indices=model_subspace_indices, ) return model From bb120cc7c8ed0b976fc7d17f4b858768cd38cf7b Mon Sep 17 00:00:00 2001 From: Dilan Pathirana <59329744+dilpath@users.noreply.github.com> Date: Tue, 26 Mar 2024 23:04:53 +0100 Subject: [PATCH 04/13] Apply suggestions from code review Co-authored-by: Daniel Weindl --- petab_select/model.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/petab_select/model.py b/petab_select/model.py index ad3780cc..6492570d 100644 --- a/petab_select/model.py +++ b/petab_select/model.py @@ -753,7 +753,7 @@ def unhash_model(model_hash: str): in hashed_model_subspace_indices ): model_subspace_indices = [ - int[s] + int(s) for s in hashed_model_subspace_indices.split( HASHED_MODEL_SUBSPACE_INDICES_DELIMITER ) @@ -774,7 +774,7 @@ def hash_model(model: Model): for index in model.model_subspace_indices ) except: - hashed_model_subspace_indices = '_'.join( + hashed_model_subspace_indices = HASHED_MODEL_SUBSPACE_INDICES_DELIMITER.join( str(i) for i in model.model_subspace_indices ) From 36e72a2c2f5af225c5ead48d8869a41afda3e530 Mon Sep 17 00:00:00 2001 From: Dilan Pathirana Date: Tue, 26 Mar 2024 23:06:57 +0100 Subject: [PATCH 05/13] review --- petab_select/model.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/petab_select/model.py b/petab_select/model.py index 6492570d..dc1e8bf0 100644 --- a/petab_select/model.py +++ b/petab_select/model.py @@ -773,9 +773,11 @@ def hash_model(model: Model): MODEL_SUBSPACE_INDICES_HASH_MAP[index] for index in model.model_subspace_indices ) - except: - hashed_model_subspace_indices = HASHED_MODEL_SUBSPACE_INDICES_DELIMITER.join( - str(i) for i in model.model_subspace_indices + except KeyError: + hashed_model_subspace_indices = ( + HASHED_MODEL_SUBSPACE_INDICES_DELIMITER.join( + str(i) for i in model.model_subspace_indices + ) ) model_hash = ( From bfd6cb5bc41b83ab9f52516fa985750f9ed49020 Mon Sep 17 00:00:00 2001 From: Dilan Pathirana Date: Tue, 26 Mar 2024 23:09:57 +0100 Subject: [PATCH 06/13] fix delimiters --- petab_select/constants.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/petab_select/constants.py b/petab_select/constants.py index ab2e3b33..24243d80 100644 --- a/petab_select/constants.py +++ b/petab_select/constants.py @@ -39,8 +39,8 @@ + string.ascii_uppercase + string.ascii_lowercase ) -MODEL_HASH_DELIMITER = '.' -HASHED_MODEL_SUBSPACE_INDICES_DELIMITER = '-' +MODEL_HASH_DELIMITER = '-' +HASHED_MODEL_SUBSPACE_INDICES_DELIMITER = '.' # If `predecessor_model_hash` is defined for a model, it is the ID of the model that the # current model was/is to be compared to. This is part of the result and is # only (optionally) set by the PEtab calibration tool. It is not defined by the From bf1bf17122b2cd42f459ef86c154c4143a8e1b22 Mon Sep 17 00:00:00 2001 From: Dilan Pathirana Date: Tue, 26 Mar 2024 23:19:40 +0100 Subject: [PATCH 07/13] doc --- petab_select/model.py | 23 ++++++++++++++++++++++- petab_select/problem.py | 12 +++++++++++- 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/petab_select/model.py b/petab_select/model.py index dc1e8bf0..073d80d3 100644 --- a/petab_select/model.py +++ b/petab_select/model.py @@ -744,6 +744,17 @@ def models_to_yaml_list( def unhash_model(model_hash: str): + """Convert a model hash into model subspace information. + + Args: + model_hash: + The model hash, in the format produced by :func:`hash_model`. + + Returns: + The model subspace ID, and the indices that correspond to a unique + model in the subspace. The indices can be converted to a model with + the `ModelSubspace.indices_to_model` method. + """ model_subspace_id, hashed_model_subspace_indices = model_hash.split( MODEL_HASH_DELIMITER ) @@ -767,7 +778,17 @@ def unhash_model(model_hash: str): return model_subspace_id, model_subspace_indices -def hash_model(model: Model): +def hash_model(model: Model) -> str: + """Create a unique hash for a model. + + Args: + model: + The model. + + Returns: + The hash. The format is the model subspace followed by a representation + of the indices of the model parameters in its subspace. + """ try: hashed_model_subspace_indices = ''.join( MODEL_SUBSPACE_INDICES_HASH_MAP[index] diff --git a/petab_select/problem.py b/petab_select/problem.py index 39dc086e..433ea561 100644 --- a/petab_select/problem.py +++ b/petab_select/problem.py @@ -239,7 +239,17 @@ def get_best( ) return best_model - def model_hash_to_model(self, model_hash: str): + def model_hash_to_model(self, model_hash: str) -> Model: + """Get the model that matches a model hash. + + Args: + model_hash: + The model hash, in the format produced by + :func:`petab_select.model.hash_model`. + + Returns: + The model. + """ model_subspace_id, model_subspace_indices = unhash_model(model_hash) model = self.model_space.model_subspaces[ model_subspace_id From a63b8e7778327b65ead451f27907b4304a3ae96a Mon Sep 17 00:00:00 2001 From: Dilan Pathirana <59329744+dilpath@users.noreply.github.com> Date: Wed, 27 Mar 2024 10:38:39 +0100 Subject: [PATCH 08/13] Update petab_select/model.py Co-authored-by: Daniel Weindl --- petab_select/model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/petab_select/model.py b/petab_select/model.py index 073d80d3..8ff9cd4f 100644 --- a/petab_select/model.py +++ b/petab_select/model.py @@ -743,7 +743,7 @@ def models_to_yaml_list( yaml.dump(model_dicts, f) -def unhash_model(model_hash: str): +def unhash_model(model_hash: str) -> tuple[str, list[int]]: """Convert a model hash into model subspace information. Args: From 714c86d25b99c751fc9f3adc12c55df345b106ee Mon Sep 17 00:00:00 2001 From: Dilan Pathirana Date: Wed, 27 Mar 2024 10:57:15 +0100 Subject: [PATCH 09/13] doc hash uniqueness --- petab_select/model.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/petab_select/model.py b/petab_select/model.py index 8ff9cd4f..cee699d3 100644 --- a/petab_select/model.py +++ b/petab_select/model.py @@ -485,12 +485,18 @@ def to_petab( def get_hash(self) -> int: """Get the model hash. - Currently designed to only use pre-calibration information, such that if a model - is calibrated twice and the two calibrated models differ in their parameter - estimates, then they will still have the same hash. - - This is not implemented as ``__hash__`` because Python automatically truncates - values in a system-dependent manner, which reduces interoperability + Hashes are only unique to a specific PEtab Select problem. If the + problem is changed, then an old hash may now refer to a different + model. A hash currently only contains the ID of the model subspace that + the model belongs to, and the location of the model in its subspace. + + Hashes only use pre-calibration information, such that if a model + is calibrated twice and the two calibrated models differ in their + parameter estimates, then they will still have the same hash. + + This is not implemented as ``__hash__`` because Python automatically + truncates values in a system-dependent manner, which reduces + interoperability ( https://docs.python.org/3/reference/datamodel.html#object.__hash__ ). Returns: From 36a1503f75fb29a9e977ab814bc8814698838435 Mon Sep 17 00:00:00 2001 From: Dilan Pathirana Date: Wed, 27 Mar 2024 17:12:43 +0100 Subject: [PATCH 10/13] :class:`ModelHash` --- petab_select/constants.py | 4 +- petab_select/model.py | 299 ++++++++++++++++++++++++++++++-------- petab_select/problem.py | 15 +- 3 files changed, 244 insertions(+), 74 deletions(-) diff --git a/petab_select/constants.py b/petab_select/constants.py index 24243d80..3ddb96b0 100644 --- a/petab_select/constants.py +++ b/petab_select/constants.py @@ -33,14 +33,14 @@ MODEL_CODE = 'model_code' MODEL_HASH = 'model_hash' MODEL_HASHES = 'model_hashes' +MODEL_HASH_DELIMITER = '-' +MODEL_SUBSPACE_INDICES_HASH_DELIMITER = '.' MODEL_SUBSPACE_INDICES_HASH_MAP = ( # [0-9]+[A-Z]+[a-z] string.digits + string.ascii_uppercase + string.ascii_lowercase ) -MODEL_HASH_DELIMITER = '-' -HASHED_MODEL_SUBSPACE_INDICES_DELIMITER = '.' # If `predecessor_model_hash` is defined for a model, it is the ID of the model that the # current model was/is to be compared to. This is part of the result and is # only (optionally) set by the PEtab calibration tool. It is not defined by the diff --git a/petab_select/model.py b/petab_select/model.py index cee699d3..b119303a 100644 --- a/petab_select/model.py +++ b/petab_select/model.py @@ -13,12 +13,12 @@ from .constants import ( CRITERIA, ESTIMATED_PARAMETERS, - HASHED_MODEL_SUBSPACE_INDICES_DELIMITER, MODEL_HASH, MODEL_HASH_DELIMITER, MODEL_ID, MODEL_SUBSPACE_ID, MODEL_SUBSPACE_INDICES, + MODEL_SUBSPACE_INDICES_HASH_DELIMITER, MODEL_SUBSPACE_INDICES_HASH_MAP, PARAMETERS, PETAB_ESTIMATE_TRUE, @@ -45,6 +45,7 @@ 'default_compare', 'models_from_yaml_list', 'models_to_yaml_list', + 'ModelHash', ] @@ -148,9 +149,12 @@ def __init__( self.parameters = parameters self.estimated_parameters = estimated_parameters self.criteria = criteria - self.model_hash = model_hash self.predecessor_model_hash = predecessor_model_hash + if self.predecessor_model_hash is not None: + self.predecessor_model_hash = ModelHash.from_hash( + self.predecessor_model_hash + ) if self.parameters is None: self.parameters = {} @@ -161,6 +165,15 @@ def __init__( super().__init__(petab_yaml=petab_yaml, petab_problem=petab_problem) + self.model_hash = None + self.get_hash() + if model_hash is not None: + model_hash = ModelHash.from_hash(model_hash) + if self.model_hash != model_hash: + raise ValueError( + "The supplied model hash does not match the computed " + "model hash." + ) if self.model_id is None: self.model_id = self.get_hash() @@ -482,17 +495,10 @@ def to_petab( PETAB_YAML: petab_yaml, } - def get_hash(self) -> int: + def get_hash(self) -> str: """Get the model hash. - Hashes are only unique to a specific PEtab Select problem. If the - problem is changed, then an old hash may now refer to a different - model. A hash currently only contains the ID of the model subspace that - the model belongs to, and the location of the model in its subspace. - - Hashes only use pre-calibration information, such that if a model - is calibrated twice and the two calibrated models differ in their - parameter estimates, then they will still have the same hash. + See the documentation for :class:`ModelHash` for more information. This is not implemented as ``__hash__`` because Python automatically truncates values in a system-dependent manner, which reduces @@ -503,7 +509,7 @@ def get_hash(self) -> int: The hash. """ if self.model_hash is None: - self.model_hash = hash_model(model=self) + self.model_hash = ModelHash.from_model(model=self) return self.model_hash def __hash__(self) -> None: @@ -749,67 +755,236 @@ def models_to_yaml_list( yaml.dump(model_dicts, f) -def unhash_model(model_hash: str) -> tuple[str, list[int]]: - """Convert a model hash into model subspace information. +class ModelHash(str): + """A class to handle model hash functionality. - Args: - model_hash: - The model hash, in the format produced by :func:`hash_model`. + The model hash is designed to be: human-readable; able to be converted + back into the corresponding model, and unique up to the same PEtab + problem and parameters. - Returns: - The model subspace ID, and the indices that correspond to a unique - model in the subspace. The indices can be converted to a model with - the `ModelSubspace.indices_to_model` method. + Consider two different models in different model subspaces, with + `ModelHash`s `model_hash0` and `model_hash1`, respectively. Assume that + these two models end up encoding the same PEtab problem (e.g. they set the + same parameters to be estimated). + The string and hash representations will be different, + `str(model_hash0) != str(model_hash1)` and + `hash(model_hash0) != hash(model_hash1)`, but their hashes will pass the + equality check `model_hash0 == model_hash1`. + + This means that different models in different model subspaces that end up + being the same PEtab problem will have different human-readable hashes, + but if these models arise during model selection, then only one of them + will be calibrated. + + Attributes: + model_subspace_id: + The ID of the model subspace of the model. Unique up to a single + PEtab Select problem model space. + model_subspace_indices_hash: + A hash of the location of the model in its model + subspace. Unique up to a single model subspace. + petab_hash: + A hash that is unique up to the same PEtab problem, which is + determined by: the PEtab problem YAML file location, nominal + parameter values, and parameters set to be estimated. This means + that different models may have the same `unique_petab_hash`, + because they are the same estimation problem. """ - model_subspace_id, hashed_model_subspace_indices = model_hash.split( - MODEL_HASH_DELIMITER - ) - if ( - HASHED_MODEL_SUBSPACE_INDICES_DELIMITER - in hashed_model_subspace_indices + def __init__( + self, + model_subspace_id: str, + model_subspace_indices_hash: str, + petab_hash: str, ): - model_subspace_indices = [ - int(s) - for s in hashed_model_subspace_indices.split( - HASHED_MODEL_SUBSPACE_INDICES_DELIMITER - ) - ] - else: - model_subspace_indices = [ - MODEL_SUBSPACE_INDICES_HASH_MAP.index(s) - for s in hashed_model_subspace_indices - ] + self.model_subspace_id = model_subspace_id + self.model_subspace_indices_hash = model_subspace_indices_hash + self.petab_hash = petab_hash + + def __new__( + cls, + model_subspace_id: str, + model_subspace_indices_hash: str, + petab_hash: str, + ): + hash_str = MODEL_HASH_DELIMITER.join( + [ + model_subspace_id, + model_subspace_indices_hash, + petab_hash, + ] + ) + instance = super().__new__(cls, hash_str) + return instance + + def __copy__(self): + return ModelHash( + model_subspace_id=self.model_subspace_id, + model_subspace_indices_hash=self.model_subspace_indices_hash, + petab_hash=self.petab_hash, + ) - return model_subspace_id, model_subspace_indices + def __deepcopy__(self, memo): + return self.__copy__() + @staticmethod + def get_petab_hash(model: Model) -> str: + """Get a hash that is unique up to the same estimation problem. -def hash_model(model: Model) -> str: - """Create a unique hash for a model. + See :attr:`petab_hash` for more information. - Args: - model: - The model. + Args: + model: + The model. - Returns: - The hash. The format is the model subspace followed by a representation - of the indices of the model parameters in its subspace. - """ - try: - hashed_model_subspace_indices = ''.join( - MODEL_SUBSPACE_INDICES_HASH_MAP[index] - for index in model.model_subspace_indices + Returns: + The unique PEtab hash. + """ + petab_yaml = str(model.petab_yaml.resolve()) + model_parameter_df = model.to_petab(set_estimated_parameters=False)[ + PETAB_PROBLEM + ].parameter_df + nominal_parameter_hash = hash_parameter_dict( + model_parameter_df[NOMINAL_VALUE].to_dict() ) - except KeyError: - hashed_model_subspace_indices = ( - HASHED_MODEL_SUBSPACE_INDICES_DELIMITER.join( - str(i) for i in model.model_subspace_indices + estimate_parameter_hash = hash_parameter_dict( + model_parameter_df[ESTIMATE].to_dict() + ) + return hash_str( + petab_yaml + estimate_parameter_hash + nominal_parameter_hash + )[:8] + + @staticmethod + def from_hash( + model_hash: Optional[Union[str, "ModelHash"]] + ) -> "ModelHash": + """Reconstruct a :class:`ModelHash` object from its :func:`__hash__` string. + + Args: + model_hash: + The model hash. + + Returns: + The :class:`ModelHash` object. + """ + if isinstance(model_hash, ModelHash): + return model_hash + + if model_hash == VIRTUAL_INITIAL_MODEL: + return ModelHash( + model_subspace_id='', + model_subspace_indices_hash='', + petab_hash=VIRTUAL_INITIAL_MODEL, ) + + ( + model_subspace_id, + model_subspace_indices_hash, + petab_hash, + ) = model_hash.split(MODEL_HASH_DELIMITER) + return ModelHash( + model_subspace_id=model_subspace_id, + model_subspace_indices_hash=model_subspace_indices_hash, + petab_hash=petab_hash, ) - model_hash = ( - model.model_subspace_id - + MODEL_HASH_DELIMITER - + hashed_model_subspace_indices - ) - return model_hash + @staticmethod + def from_model(model: Model) -> "ModelHash": + """Create a hash for a model. + + Args: + model: + The model. + + Returns: + The model hash. + """ + return ModelHash( + model_subspace_id=model.model_subspace_id, + model_subspace_indices_hash=( + ModelHash.hash_model_subspace_indices( + model.model_subspace_indices, + ) + ), + petab_hash=ModelHash.get_petab_hash(model=model), + ) + + @staticmethod + def hash_model_subspace_indices(model_subspace_indices: list[str]) -> str: + """Hash the location of a model in its subspace. + + Args: + model_subspace_indices: + The location (indices) of the model in its subspace. + + Returns: + The hash. + """ + try: + return ''.join( + MODEL_SUBSPACE_INDICES_HASH_MAP[index] + for index in model_subspace_indices + ) + except KeyError: + return MODEL_SUBSPACE_INDICES_HAS_HASH_DELIMITER.join( + str(i) for i in model_subspace_indices + ) + + def unhash_model_subspace_indices(self) -> list[int]: + """Get the location of a model in its subspace. + + Returns: + The location, as indices of the subspace. + """ + if ( + MODEL_SUBSPACE_INDICES_HASH_DELIMITER + in self.model_subspace_indices_hash + ): + return [ + int(s) + for s in self.model_subspace_indices_hash.split( + MODEL_SUBSPACE_INDICES_HASH_DELIMITER + ) + ] + else: + return [ + MODEL_SUBSPACE_INDICES_HASH_MAP.index(s) + for s in self.model_subspace_indices_hash + ] + + def get_model(self, petab_select_problem=None) -> Model: + """Get the model that a hash corresponds to. + + Args: + petab_select_problem: + The PEtab Select problem. The model will be found in its model + space. + + Returns: + The model. + """ + if self.petab_hash == VIRTUAL_INITIAL_MODEL: + return self.petab_hash + + return petab_select_problem.model_space.model_subspaces[ + self.model_subspace_id + ].indices_to_model( + self.unhash_model_subspace_indices( + self.model_subspace_indices_hash + ) + ) + + def __hash__(self) -> str: + """A string representation of the model hash.""" + return hash(self.petab_hash) + + def __eq__(self, other_hash: "ModelHash") -> bool: + """Check whether two model hashes are equivalent. + + This only checks for equivalence up to the same PEtab problem (see + :attr:`petab_hash`) + + Returns: + Whether the two hashes correspond to equivalent PEtab problems. + """ + return self.petab_hash == other_hash.petab_hash diff --git a/petab_select/problem.py b/petab_select/problem.py index 433ea561..6eee7822 100644 --- a/petab_select/problem.py +++ b/petab_select/problem.py @@ -17,7 +17,7 @@ Criterion, Method, ) -from .model import Model, default_compare, unhash_model +from .model import Model, ModelHash, default_compare from .model_space import ModelSpace __all__ = [ @@ -239,24 +239,19 @@ def get_best( ) return best_model - def model_hash_to_model(self, model_hash: str) -> Model: + def model_hash_to_model(self, model_hash: Union[str, ModelHash]) -> Model: """Get the model that matches a model hash. Args: model_hash: - The model hash, in the format produced by - :func:`petab_select.model.hash_model`. + The model hash. Returns: The model. """ - model_subspace_id, model_subspace_indices = unhash_model(model_hash) - model = self.model_space.model_subspaces[ - model_subspace_id - ].indices_to_model( - indices=model_subspace_indices, + return ModelHash.from_hash(model_hash).get_model( + petab_select_problem=self, ) - return model def new_candidate_space( self, From b693ebfea9b72a7999457ce756808eaa658f130f Mon Sep 17 00:00:00 2001 From: Dilan Pathirana Date: Wed, 27 Mar 2024 17:16:02 +0100 Subject: [PATCH 11/13] typo --- petab_select/model.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/petab_select/model.py b/petab_select/model.py index b119303a..e9bf0dff 100644 --- a/petab_select/model.py +++ b/petab_select/model.py @@ -766,10 +766,10 @@ class ModelHash(str): `ModelHash`s `model_hash0` and `model_hash1`, respectively. Assume that these two models end up encoding the same PEtab problem (e.g. they set the same parameters to be estimated). - The string and hash representations will be different, - `str(model_hash0) != str(model_hash1)` and - `hash(model_hash0) != hash(model_hash1)`, but their hashes will pass the - equality check `model_hash0 == model_hash1`. + The string representation will be different, + `str(model_hash0) != str(model_hash1)`, but their hashes will pass the + equality check: `model_hash0 == model_hash1` and + `hash(model_hash0) == hash(model_hash1)`. This means that different models in different model subspaces that end up being the same PEtab problem will have different human-readable hashes, From 96f00224d48b0ad62bf6e80500b5ea8da36680d0 Mon Sep 17 00:00:00 2001 From: Dilan Pathirana Date: Wed, 27 Mar 2024 17:19:01 +0100 Subject: [PATCH 12/13] typo --- test/model_space/test_model_space.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/model_space/test_model_space.py b/test/model_space/test_model_space.py index de690713..3f81b5aa 100644 --- a/test/model_space/test_model_space.py +++ b/test/model_space/test_model_space.py @@ -75,9 +75,9 @@ def test_model_space_backward_virtual(model_space): candidate_space = BackwardCandidateSpace() model_space.search(candidate_space) - # The forward candidate space is initialized without a model, so a virtual initial - # model is used. This means the expected models are the "smallest" models (as many - # fixed parameters as possible) in the model space. + # The backward candidate space is initialized without a model, so a virtual + # initial model is used. This means the expected models are the "smallest" + # models (as many fixed parameters as possible) in the model space. expected_models = [ ('model_subspace_1', {f'k{i}': ESTIMATE for i in range(1, 5)}), # This model is not included because it is exactly the same as the From 9ab8a2374b10797291205f9b3114dc29b32563e7 Mon Sep 17 00:00:00 2001 From: Dilan Pathirana Date: Wed, 27 Mar 2024 17:31:24 +0100 Subject: [PATCH 13/13] support user-supplied models with no specified subspace --- petab_select/model.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/petab_select/model.py b/petab_select/model.py index e9bf0dff..a82aa408 100644 --- a/petab_select/model.py +++ b/petab_select/model.py @@ -899,13 +899,19 @@ def from_model(model: Model) -> "ModelHash": Returns: The model hash. """ - return ModelHash( - model_subspace_id=model.model_subspace_id, - model_subspace_indices_hash=( + model_subspace_id = '' + model_subspace_indices_hash = '' + if model.model_subspace_id is not None: + model_subspace_id = model.model_subspace_id + model_subspace_indices_hash = ( ModelHash.hash_model_subspace_indices( - model.model_subspace_indices, + model.model_subspace_indices ) - ), + ) + + return ModelHash( + model_subspace_id=model_subspace_id, + model_subspace_indices_hash=model_subspace_indices_hash, petab_hash=ModelHash.get_petab_hash(model=model), )