diff --git a/petab_select/constants.py b/petab_select/constants.py index ac0b2a73..3ddb96b0 100644 --- a/petab_select/constants.py +++ b/petab_select/constants.py @@ -1,4 +1,5 @@ """Constants for the PEtab Select package.""" +import string import sys from enum import Enum from pathlib import Path @@ -32,6 +33,14 @@ MODEL_CODE = 'model_code' MODEL_HASH = 'model_hash' MODEL_HASHES = 'model_hashes' +MODEL_HASH_DELIMITER = '-' +MODEL_SUBSPACE_INDICES_HASH_DELIMITER = '.' +MODEL_SUBSPACE_INDICES_HASH_MAP = ( + # [0-9]+[A-Z]+[a-z] + string.digits + + string.ascii_uppercase + + string.ascii_lowercase +) # If `predecessor_model_hash` is defined for a model, it is the ID of the model that the # current model was/is to be compared to. This is part of the result and is # only (optionally) set by the PEtab calibration tool. It is not defined by the diff --git a/petab_select/model.py b/petab_select/model.py index 55ddf56f..a82aa408 100644 --- a/petab_select/model.py +++ b/petab_select/model.py @@ -1,4 +1,5 @@ """The `Model` class.""" +import string import warnings from os.path import relpath from pathlib import Path @@ -13,9 +14,12 @@ CRITERIA, ESTIMATED_PARAMETERS, MODEL_HASH, + MODEL_HASH_DELIMITER, MODEL_ID, MODEL_SUBSPACE_ID, MODEL_SUBSPACE_INDICES, + MODEL_SUBSPACE_INDICES_HASH_DELIMITER, + MODEL_SUBSPACE_INDICES_HASH_MAP, PARAMETERS, PETAB_ESTIMATE_TRUE, PETAB_PROBLEM, @@ -41,6 +45,7 @@ 'default_compare', 'models_from_yaml_list', 'models_to_yaml_list', + 'ModelHash', ] @@ -58,14 +63,6 @@ class Model(PetabMixin): Functions to convert attributes from :class:`Model` to YAML. criteria: The criteria values of the calibrated model (e.g. AIC). - hash_attributes: - This attribute is currently not used. - Attributes that will be used to calculate the hash of the - :class:`Model` instance. NB: this hash is used during pairwise comparison - to determine whether any two :class:`Model` instances are unique. The - model instances are compared by their parameter estimation - problems, as opposed to parameter estimation results, which may - differ due to e.g. floating-point arithmetic. model_id: The model ID. petab_yaml: @@ -130,28 +127,6 @@ class Model(PetabMixin): for criterion_id, criterion_value in x.items() }, } - hash_attributes = { - # MODEL_ID: lambda x: hash(x), # possible circular dependency on hash - # MODEL_SUBSPACE_ID: lambda x: hash(x), - # MODEL_SUBSPACE_INDICES: hash_list, - # TODO replace `YAML` with `PETAB_PROBLEM_HASH`, as YAML could refer to - # different problems if used on different filesystems or sometimes - # absolute and other times relative. Better to check whether the - # PEtab problem itself is unique. - # TODO replace `PARAMETERS` with `PARAMETERS_ALL`, which should be al - # parameters in the PEtab problem. This avoids treating the PEtab problem - # differently to the model (in a subspace with the PEtab problem) that has - # all nominal values defined in the subspace. - # TODO add `estimated_parameters`? Needs to be clarified whether this hash - # should be unique amongst only not-yet-calibrated models, or may also - # return the same value between differently parameterized models that ended - # up being calibrated to be the same... probably should be the former. - # Currently, the hash is stored, hence will "persist" after calibration - # if the same `Model` instance is used. - # PETAB_YAML: lambda x: hash(x), - PETAB_YAML: hash_str, - PARAMETERS: hash_parameter_dict, - } def __init__( self, @@ -174,9 +149,12 @@ def __init__( self.parameters = parameters self.estimated_parameters = estimated_parameters self.criteria = criteria - self.model_hash = model_hash self.predecessor_model_hash = predecessor_model_hash + if self.predecessor_model_hash is not None: + self.predecessor_model_hash = ModelHash.from_hash( + self.predecessor_model_hash + ) if self.parameters is None: self.parameters = {} @@ -187,6 +165,15 @@ def __init__( super().__init__(petab_yaml=petab_yaml, petab_problem=petab_problem) + self.model_hash = None + self.get_hash() + if model_hash is not None: + model_hash = ModelHash.from_hash(model_hash) + if self.model_hash != model_hash: + raise ValueError( + "The supplied model hash does not match the computed " + "model hash." + ) if self.model_id is None: self.model_id = self.get_hash() @@ -508,27 +495,21 @@ def to_petab( PETAB_YAML: petab_yaml, } - def get_hash(self) -> int: + def get_hash(self) -> str: """Get the model hash. - Currently designed to only use pre-calibration information, such that if a model - is calibrated twice and the two calibrated models differ in their parameter - estimates, then they will still have the same hash. + See the documentation for :class:`ModelHash` for more information. - This is not implemented as ``__hash__`` because Python automatically truncates - values in a system-dependent manner, which reduces interoperability + This is not implemented as ``__hash__`` because Python automatically + truncates values in a system-dependent manner, which reduces + interoperability ( https://docs.python.org/3/reference/datamodel.html#object.__hash__ ). Returns: The hash. """ if self.model_hash is None: - self.model_hash = hash_list( - [ - method(getattr(self, attribute)) - for attribute, method in Model.hash_attributes.items() - ] - ) + self.model_hash = ModelHash.from_model(model=self) return self.model_hash def __hash__(self) -> None: @@ -772,3 +753,244 @@ def models_to_yaml_list( model_dicts = None if not model_dicts else model_dicts with open(output_yaml, 'w') as f: yaml.dump(model_dicts, f) + + +class ModelHash(str): + """A class to handle model hash functionality. + + The model hash is designed to be: human-readable; able to be converted + back into the corresponding model, and unique up to the same PEtab + problem and parameters. + + Consider two different models in different model subspaces, with + `ModelHash`s `model_hash0` and `model_hash1`, respectively. Assume that + these two models end up encoding the same PEtab problem (e.g. they set the + same parameters to be estimated). + The string representation will be different, + `str(model_hash0) != str(model_hash1)`, but their hashes will pass the + equality check: `model_hash0 == model_hash1` and + `hash(model_hash0) == hash(model_hash1)`. + + This means that different models in different model subspaces that end up + being the same PEtab problem will have different human-readable hashes, + but if these models arise during model selection, then only one of them + will be calibrated. + + Attributes: + model_subspace_id: + The ID of the model subspace of the model. Unique up to a single + PEtab Select problem model space. + model_subspace_indices_hash: + A hash of the location of the model in its model + subspace. Unique up to a single model subspace. + petab_hash: + A hash that is unique up to the same PEtab problem, which is + determined by: the PEtab problem YAML file location, nominal + parameter values, and parameters set to be estimated. This means + that different models may have the same `unique_petab_hash`, + because they are the same estimation problem. + """ + + def __init__( + self, + model_subspace_id: str, + model_subspace_indices_hash: str, + petab_hash: str, + ): + self.model_subspace_id = model_subspace_id + self.model_subspace_indices_hash = model_subspace_indices_hash + self.petab_hash = petab_hash + + def __new__( + cls, + model_subspace_id: str, + model_subspace_indices_hash: str, + petab_hash: str, + ): + hash_str = MODEL_HASH_DELIMITER.join( + [ + model_subspace_id, + model_subspace_indices_hash, + petab_hash, + ] + ) + instance = super().__new__(cls, hash_str) + return instance + + def __copy__(self): + return ModelHash( + model_subspace_id=self.model_subspace_id, + model_subspace_indices_hash=self.model_subspace_indices_hash, + petab_hash=self.petab_hash, + ) + + def __deepcopy__(self, memo): + return self.__copy__() + + @staticmethod + def get_petab_hash(model: Model) -> str: + """Get a hash that is unique up to the same estimation problem. + + See :attr:`petab_hash` for more information. + + Args: + model: + The model. + + Returns: + The unique PEtab hash. + """ + petab_yaml = str(model.petab_yaml.resolve()) + model_parameter_df = model.to_petab(set_estimated_parameters=False)[ + PETAB_PROBLEM + ].parameter_df + nominal_parameter_hash = hash_parameter_dict( + model_parameter_df[NOMINAL_VALUE].to_dict() + ) + estimate_parameter_hash = hash_parameter_dict( + model_parameter_df[ESTIMATE].to_dict() + ) + return hash_str( + petab_yaml + estimate_parameter_hash + nominal_parameter_hash + )[:8] + + @staticmethod + def from_hash( + model_hash: Optional[Union[str, "ModelHash"]] + ) -> "ModelHash": + """Reconstruct a :class:`ModelHash` object from its :func:`__hash__` string. + + Args: + model_hash: + The model hash. + + Returns: + The :class:`ModelHash` object. + """ + if isinstance(model_hash, ModelHash): + return model_hash + + if model_hash == VIRTUAL_INITIAL_MODEL: + return ModelHash( + model_subspace_id='', + model_subspace_indices_hash='', + petab_hash=VIRTUAL_INITIAL_MODEL, + ) + + ( + model_subspace_id, + model_subspace_indices_hash, + petab_hash, + ) = model_hash.split(MODEL_HASH_DELIMITER) + return ModelHash( + model_subspace_id=model_subspace_id, + model_subspace_indices_hash=model_subspace_indices_hash, + petab_hash=petab_hash, + ) + + @staticmethod + def from_model(model: Model) -> "ModelHash": + """Create a hash for a model. + + Args: + model: + The model. + + Returns: + The model hash. + """ + model_subspace_id = '' + model_subspace_indices_hash = '' + if model.model_subspace_id is not None: + model_subspace_id = model.model_subspace_id + model_subspace_indices_hash = ( + ModelHash.hash_model_subspace_indices( + model.model_subspace_indices + ) + ) + + return ModelHash( + model_subspace_id=model_subspace_id, + model_subspace_indices_hash=model_subspace_indices_hash, + petab_hash=ModelHash.get_petab_hash(model=model), + ) + + @staticmethod + def hash_model_subspace_indices(model_subspace_indices: list[str]) -> str: + """Hash the location of a model in its subspace. + + Args: + model_subspace_indices: + The location (indices) of the model in its subspace. + + Returns: + The hash. + """ + try: + return ''.join( + MODEL_SUBSPACE_INDICES_HASH_MAP[index] + for index in model_subspace_indices + ) + except KeyError: + return MODEL_SUBSPACE_INDICES_HAS_HASH_DELIMITER.join( + str(i) for i in model_subspace_indices + ) + + def unhash_model_subspace_indices(self) -> list[int]: + """Get the location of a model in its subspace. + + Returns: + The location, as indices of the subspace. + """ + if ( + MODEL_SUBSPACE_INDICES_HASH_DELIMITER + in self.model_subspace_indices_hash + ): + return [ + int(s) + for s in self.model_subspace_indices_hash.split( + MODEL_SUBSPACE_INDICES_HASH_DELIMITER + ) + ] + else: + return [ + MODEL_SUBSPACE_INDICES_HASH_MAP.index(s) + for s in self.model_subspace_indices_hash + ] + + def get_model(self, petab_select_problem=None) -> Model: + """Get the model that a hash corresponds to. + + Args: + petab_select_problem: + The PEtab Select problem. The model will be found in its model + space. + + Returns: + The model. + """ + if self.petab_hash == VIRTUAL_INITIAL_MODEL: + return self.petab_hash + + return petab_select_problem.model_space.model_subspaces[ + self.model_subspace_id + ].indices_to_model( + self.unhash_model_subspace_indices( + self.model_subspace_indices_hash + ) + ) + + def __hash__(self) -> str: + """A string representation of the model hash.""" + return hash(self.petab_hash) + + def __eq__(self, other_hash: "ModelHash") -> bool: + """Check whether two model hashes are equivalent. + + This only checks for equivalence up to the same PEtab problem (see + :attr:`petab_hash`) + + Returns: + Whether the two hashes correspond to equivalent PEtab problems. + """ + return self.petab_hash == other_hash.petab_hash diff --git a/petab_select/problem.py b/petab_select/problem.py index 7b97fb19..6eee7822 100644 --- a/petab_select/problem.py +++ b/petab_select/problem.py @@ -17,7 +17,7 @@ Criterion, Method, ) -from .model import Model, default_compare +from .model import Model, ModelHash, default_compare from .model_space import ModelSpace __all__ = [ @@ -239,6 +239,20 @@ def get_best( ) return best_model + def model_hash_to_model(self, model_hash: Union[str, ModelHash]) -> Model: + """Get the model that matches a model hash. + + Args: + model_hash: + The model hash. + + Returns: + The model. + """ + return ModelHash.from_hash(model_hash).get_model( + petab_select_problem=self, + ) + def new_candidate_space( self, *args, diff --git a/test/model_space/test_model_space.py b/test/model_space/test_model_space.py index de690713..3f81b5aa 100644 --- a/test/model_space/test_model_space.py +++ b/test/model_space/test_model_space.py @@ -75,9 +75,9 @@ def test_model_space_backward_virtual(model_space): candidate_space = BackwardCandidateSpace() model_space.search(candidate_space) - # The forward candidate space is initialized without a model, so a virtual initial - # model is used. This means the expected models are the "smallest" models (as many - # fixed parameters as possible) in the model space. + # The backward candidate space is initialized without a model, so a virtual + # initial model is used. This means the expected models are the "smallest" + # models (as many fixed parameters as possible) in the model space. expected_models = [ ('model_subspace_1', {f'k{i}': ESTIMATE for i in range(1, 5)}), # This model is not included because it is exactly the same as the