From 6a9daeb943b0c5b0cd928af33c477cee1a0873d4 Mon Sep 17 00:00:00 2001 From: Miki Bonacci Date: Wed, 7 Feb 2024 12:34:35 +0100 Subject: [PATCH] Adding to_kinds and get_kinds functionalities. *Missing: - Give custom threshold and use kind_tags as defined in structure.properties.positions.kind_tags. Questions: answered, but worth to leave it here for now: What is then the value of each property associated to each kind? an average? maybe yes. This can be then generated in another function which takes the averages. Maybe in the property specific to_kind methods. Maybe indeed the average point which is obtained during the space_grid generation. --- aiida_atomistic/data/structure/5_feb.ipynb | 207 ++++++++++++++---- .../structure/properties/per_site/charge.py | 87 ++++++++ .../structure/properties/per_site/mass.py | 79 ++++++- .../structure/properties/per_site/position.py | 1 + .../structure/properties/per_site/symbols.py | 2 + .../properties/property_collector.py | 9 +- aiida_atomistic/data/structure/structure.py | 72 ++++++ 7 files changed, 404 insertions(+), 53 deletions(-) create mode 100644 aiida_atomistic/data/structure/properties/per_site/charge.py diff --git a/aiida_atomistic/data/structure/5_feb.ipynb b/aiida_atomistic/data/structure/5_feb.ipynb index cc76c08..eaf3403 100644 --- a/aiida_atomistic/data/structure/5_feb.ipynb +++ b/aiida_atomistic/data/structure/5_feb.ipynb @@ -43,21 +43,26 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "properties = {\n", " \"cell\":{\"value\":((1.0,0,0),(0,1,0),(0,0,1))},\n", " \"pbc\":{\"value\":[True,False,True]},\n", - " \"positions\":{\"value\":[[1,1,1],[2,3,3]]},\n", - " \"symbols\":{\"value\":[\"He\",\"Cu\"]},\n", + " \"positions\":{\n", + " \"value\":[[1,1,1],[2,3,3],[3,4,5],[1,2,3]],\n", + " \"kind_tags\":[\"He1\",\"\",\"\",\"He1\"]\n", + " },\n", + " \"symbols\":{\"value\":[\"He\",\"Cu\",\"He\",\"He\"]},\n", + " \"mass\":{\"value\":[1.551,9,1.5512,2],},\n", + " \"charge\":{\"value\":[1,0,1,0]}\n", " }" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -66,16 +71,16 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "Cell(parent=, value=((1.0, 0.0, 0.0), (0.0, 1.0, 0.0), (0.0, 0.0, 1.0)), domain='global')" + "Cell(parent=, value=((1.0, 0.0, 0.0), (0.0, 1.0, 0.0), (0.0, 0.0, 1.0)), domain='global')" ] }, - "execution_count": 7, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -86,16 +91,16 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "['pbc', 'cell', 'positions', 'symbols', 'custom']" + "['pbc', 'cell', 'positions', 'symbols', 'mass', 'charge', 'custom']" ] }, - "execution_count": 8, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -106,22 +111,40 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "" + "['charge', 'symbols', 'pbc', 'cell', 'positions', 'mass']" ] }, - "execution_count": 9, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "structure.store()" + "structure.properties.get_defined_properties()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "#structure.store()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "#s = orm.load_node(structure.pk)" ] }, { @@ -130,27 +153,16 @@ "metadata": {}, "outputs": [], "source": [ - "s = orm.load_node(structure.pk)" + "#s.properties" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "s.properties" + "#s.properties.positions" ] }, { @@ -161,7 +173,7 @@ { "data": { "text/plain": [ - "Positions(parent=, value=[(1.0, 1.0, 1.0), (2.0, 3.0, 3.0)], domain='per-site')" + "['pbc', 'cell', 'positions', 'symbols', 'mass', 'charge', 'custom']" ] }, "execution_count": 12, @@ -170,7 +182,7 @@ } ], "source": [ - "s.properties.positions" + "list(structure.properties.get_valid_properties())" ] }, { @@ -181,7 +193,7 @@ { "data": { "text/plain": [ - "['pbc', 'cell', 'positions', 'symbols', 'custom']" + "4" ] }, "execution_count": 13, @@ -190,7 +202,7 @@ } ], "source": [ - "list(s.properties.get_valid_properties())" + "len(structure.properties.positions.value)" ] }, { @@ -201,7 +213,7 @@ { "data": { "text/plain": [ - "2" + "Symbols(parent=, value=['He', 'Cu', 'He', 'He'], domain='per-site')" ] }, "execution_count": 14, @@ -210,7 +222,7 @@ } ], "source": [ - "len(s.properties.positions.value)" + "structure.properties.symbols " ] }, { @@ -221,7 +233,13 @@ { "data": { "text/plain": [ - "Symbols(parent=, value=['He', 'Cu'], domain='per-site')" + "{'cell': {'value': ((1.0, 0, 0), (0, 1, 0), (0, 0, 1))},\n", + " 'pbc': {'value': [True, False, True]},\n", + " 'positions': {'value': [[1, 1, 1], [2, 3, 3], [3, 4, 5], [1, 2, 3]],\n", + " 'kind_tags': ['He1', '', '', 'He1']},\n", + " 'symbols': {'value': ['He', 'Cu', 'He', 'He']},\n", + " 'mass': {'value': [1.551, 9, 1.5512, 2]},\n", + " 'charge': {'value': [1, 0, 1, 0]}}" ] }, "execution_count": 15, @@ -230,7 +248,7 @@ } ], "source": [ - "s.properties.symbols " + "structure.base.attributes.get('_property_attributes')" ] }, { @@ -241,10 +259,13 @@ { "data": { "text/plain": [ - "{'pbc': {'value': [True, False, True]},\n", - " 'cell': {'value': [[1.0, 0, 0], [0, 1, 0], [0, 0, 1]]},\n", - " 'symbols': {'value': ['He', 'Cu']},\n", - " 'positions': {'value': [[1, 1, 1], [2, 3, 3]]}}" + "{'cell': {'value': ((1.0, 0, 0), (0, 1, 0), (0, 0, 1))},\n", + " 'pbc': {'value': [True, False, True]},\n", + " 'positions': {'value': [[1, 1, 1], [2, 3, 3], [3, 4, 5], [1, 2, 3]],\n", + " 'kind_tags': ['He1', '', '', 'He1']},\n", + " 'symbols': {'value': ['He', 'Cu', 'He', 'He']},\n", + " 'mass': {'value': [1.551, 9, 1.5512, 2]},\n", + " 'charge': {'value': [1, 0, 1, 0]}}" ] }, "execution_count": 16, @@ -253,7 +274,7 @@ } ], "source": [ - "s.base.attributes.get('_property_attributes')" + "structure.to_dict()" ] }, { @@ -264,10 +285,7 @@ { "data": { "text/plain": [ - "{'pbc': {'value': [True, False, True]},\n", - " 'cell': {'value': [[1.0, 0, 0], [0, 1, 0], [0, 0, 1]]},\n", - " 'symbols': {'value': ['He', 'Cu']},\n", - " 'positions': {'value': [[1, 1, 1], [2, 3, 3]]}}" + "[1.551, 9.0, 1.5512, 2.0]" ] }, "execution_count": 17, @@ -276,7 +294,104 @@ } ], "source": [ - "s.to_dict()" + "structure.properties.mass.value" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.001" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "structure.properties.mass.default_kind_threshold" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(array([ 0, 74, 0, 4]),\n", + " [1.601, 9.001000000000007, 1.601, 2.0010000000000003])" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "structure.properties.mass.to_kinds(thr = 0.1, kind_names=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['He', 'Cu', 'He', 'He']" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "structure.properties.symbols.value" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "He\n", + "Cu\n", + "He\n", + "He\n" + ] + }, + { + "data": { + "text/plain": [ + "([2, 1, 2, 3],\n", + " ['He2', 'Cu1', 'He2', 'He3'],\n", + " {'charge': [1.1250000000000002, 0.225, 1.1250000000000002, 0.225],\n", + " 'mass': [1.5514999999999999,\n", + " 9.000499999999178,\n", + " 1.5514999999999999,\n", + " 2.0004999999999504]})" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "get_kinds(structure)" ] }, { diff --git a/aiida_atomistic/data/structure/properties/per_site/charge.py b/aiida_atomistic/data/structure/properties/per_site/charge.py new file mode 100644 index 0000000..98648c2 --- /dev/null +++ b/aiida_atomistic/data/structure/properties/per_site/charge.py @@ -0,0 +1,87 @@ +from typing import List, Literal +from pydantic import Field, validator, root_validator + +from aiida.common.constants import elements + +from aiida_atomistic.data.structure.properties.property_utils import BaseProperty + + +################################################## Start: Charge property: + +class Charge(BaseProperty): + """ + The charge property. + """ + domain = "per-site" + default_kind_threshold = 0.45 + # units... maybe specify in the docs. + value: List[float] = Field(default=None) + + # ToDo: + @validator("value", always=True) + def validate_charges(cls,value,values): + # I have to use the _property_attributes, as accessing directly parent.properties gives recursion error. + # Maybe it is possible to change how we get the properties? + properties = values["parent"].base.attributes.get("_property_attributes") + if not "positions" in properties: + # this also validated that we have symbols. + raise ValueError("If you define charges, you should define also the corresponding positions.") + elif not len(properties["charge"]["value"]) == len(properties["positions"]["value"]): + """if len(properties["charge"]["value"]) == 0: + # assign default values of the charges. + symbols = values["parent"].base.attributes.get("_property_attributes")["symbols"] + properties["charge"]["value"] = [_atomic_charges[symbol] for symbol in symbols] + else:""" + raise ValueError("The number of provided charges should either be zero or match the number of positions.") + return value + + + def to_kinds(self, thr: float = default_kind_threshold, kind_names=False): + import numpy as np + """Get the kinds for the charge property + + ### Search algorithm: + + Basically I generate the space grid, using min+thr/2, max+thr/2 and thr. + Then, for each point of the space (the max+thr/2 is excluded), + I select the sites which are at a distance lower or equal than the thr from the + points (which are the middle points of the space [min:max:thr]). + In this way I can assign the indexes for each region. + + Then I should provide a renaming of the kinds, + but this can be done at the end of the procedure, + which should be done for every property which requires a kind. + Then, I should also consider the tags. + + ## Missing: + - return the value fo the property, an average: the middle point of the space_grid, or better, + the space_grid[i] + + Args: + thr (float, optional): the threshold to consider two atoms of the same element to be the same kind. + Defaults to structure.properties.charge.default_kind_threshold. + kinds_names (bool, optional): if the kinds shoulb be printed also with their element name. When we defined the kinds + with respect all the properties of the structure, we just use numbers. + + Returns: + kinds: list of kinds associated to the charge property. + """ + symbols_array = np.array(self.parent.properties.symbols.value) + prop_array = np.array(self.value) + space_grid = np.arange(start= np.min(prop_array+thr/2), stop=np.max(prop_array+thr/2),step=thr) + + # list for the value of the property for each generated kind. + kinds_values = [0]*len(symbols_array) + + kinds = list(symbols_array) + for i in range(len(space_grid)): + # +thr/1e10 is needed because sometime the equal condition is not detected. + indexes = np.where(np.abs(space_grid[i]-prop_array)<=thr/2+thr/1e10)[0] + + if len(indexes) > 0: + for j in indexes: + kinds[j] = kinds[j]+f"{i}" if kind_names else i + kinds_values[j] = space_grid[i] + + + return np.array(kinds), kinds_values \ No newline at end of file diff --git a/aiida_atomistic/data/structure/properties/per_site/mass.py b/aiida_atomistic/data/structure/properties/per_site/mass.py index cdb418e..e42b332 100644 --- a/aiida_atomistic/data/structure/properties/per_site/mass.py +++ b/aiida_atomistic/data/structure/properties/per_site/mass.py @@ -1,19 +1,88 @@ -from typing import List -from pydantic import Field +from typing import List, Literal +from pydantic import Field, validator, root_validator + +from aiida.common.constants import elements from aiida_atomistic.data.structure.properties.property_utils import BaseProperty -################################################## Start: PBC property: +_atomic_masses = {el['symbol']: el['mass'] for el in elements.values()} + +################################################## Start: Mass property: class Mass(BaseProperty): """ The mass property. """ domain = "per-site" - default_kind_threshold: float = Field(default=1e-3) + default_kind_threshold = 1e-3 # units... maybe specify in the docs. value: List[float] = Field(default=None) + # ToDo: + @validator("value", always=True) + def validate_masses(cls,value,values): + # I have to use the _property_attributes, as accessing directly parent.properties gives recursion error. + # Maybe it is possible to change how we get the properties? + properties = values["parent"].base.attributes.get("_property_attributes") + if not "positions" in properties: + # this also validated that we have symbols. + raise ValueError("If you define masses, you should define also the corresponding positions.") + elif not len(properties["mass"]["value"]) == len(properties["positions"]["value"]): + """if len(properties["mass"]["value"]) == 0: + # assign default values of the masses. + symbols = values["parent"].base.attributes.get("_property_attributes")["symbols"] + properties["mass"]["value"] = [_atomic_masses[symbol] for symbol in symbols] + else:""" + raise ValueError("The number of provided masses should either be zero or match the number of positions.") + return value + + def to_kinds(self, thr: float = default_kind_threshold, kind_names=False): + import numpy as np + """Get the kinds for the charge property + + ### Search algorithm: + + Basically I generate the space grid, using min+thr/2, max+thr/2 and thr. + Then, for each point of the space (the max+thr/2 is excluded), + I select the sites which are at a distance lower or equal than the thr from the + points (which are the middle points of the space [min:max:thr]). + In this way I can assign the indexes for each region. + + Then I should provide a renaming of the kinds, + but this can be done at the end of the procedure, + which should be done for every property which requires a kind. + Then, I should also consider the tags. + + ## Missing: + - recognise kind_tags and do not write the same kind; However, the tags should be + assigned only in one place for the full structure. no matter the properties. e.g. in the positions. + + Args: + thr (float, optional): the threshold to consider two atoms of the same element to be the same kind. + Defaults to structure.properties.charge.default_kind_threshold. + kinds_names (bool, optional): if the kinds shoulb be printed also with their element name. When we defined the kinds + with respect all the properties of the structure, we just use numbers. + + Returns: + kinds: list of kinds associated to the charge property. + """ + symbols_array = np.array(self.parent.properties.symbols.value) + prop_array = np.array(self.value) + space_grid = np.arange(start= np.min(prop_array+thr/2), stop=np.max(prop_array+thr/2),step=thr) + + # list for the value of the property for each generated kind. + kinds_values = [0]*len(symbols_array) + + kinds = list(symbols_array) + for i in range(len(space_grid)): + # +thr/1e10 is needed because sometime the equal condition is not detected. + indexes = np.where(np.abs(space_grid[i]-prop_array)<=thr/2+thr/1e10)[0] -################################################## End: PBC property. \ No newline at end of file + if len(indexes) > 0: + for j in indexes: + kinds[j] = kinds[j]+f"{i}" if kind_names else i + kinds_values[j] = space_grid[i] + + + return np.array(kinds), kinds_values \ No newline at end of file diff --git a/aiida_atomistic/data/structure/properties/per_site/position.py b/aiida_atomistic/data/structure/properties/per_site/position.py index 1fb42bf..2e6de8f 100644 --- a/aiida_atomistic/data/structure/properties/per_site/position.py +++ b/aiida_atomistic/data/structure/properties/per_site/position.py @@ -12,6 +12,7 @@ class Positions(BaseProperty): domain = "per-site" #kind_threshold: float = Field(default=1e-3) value: List[Tuple[float,float,float]] = Field(default=None) + kind_tags: List[str] = Field(default=None) # thgis should be in position class? Unified, I think so. tipo alla fine del to kind fai un check e resetti. ################################################## End: PBC property. \ No newline at end of file diff --git a/aiida_atomistic/data/structure/properties/per_site/symbols.py b/aiida_atomistic/data/structure/properties/per_site/symbols.py index 26b70ce..8de4394 100644 --- a/aiida_atomistic/data/structure/properties/per_site/symbols.py +++ b/aiida_atomistic/data/structure/properties/per_site/symbols.py @@ -25,5 +25,7 @@ def validate_symbols(cls,value,values): raise ValueError("If you define symbols, you should define also the corresponding positions.") elif not len(value) == len(values["parent"].base.attributes.get("_property_attributes")["positions"]["value"]): raise ValueError("The number of provided symbols should match the number of positions.") + # what if we prefer to give a guess? like the following: + #return [value[0]]*len(values["parent"].base.attributes.get("_property_attributes")["positions"]["value"]) return value ################################################## End: PBC property. \ No newline at end of file diff --git a/aiida_atomistic/data/structure/properties/property_collector.py b/aiida_atomistic/data/structure/properties/property_collector.py index 3ab51ff..d2f3dfa 100644 --- a/aiida_atomistic/data/structure/properties/property_collector.py +++ b/aiida_atomistic/data/structure/properties/property_collector.py @@ -8,6 +8,8 @@ from aiida_atomistic.data.structure.properties.per_site.position import Positions from aiida_atomistic.data.structure.properties.per_site.symbols import Symbols +from aiida_atomistic.data.structure.properties.per_site.mass import Mass +from aiida_atomistic.data.structure.properties.per_site.charge import Charge from aiida_atomistic.data.structure.properties.custom import CustomProperty @@ -43,6 +45,8 @@ class PropertyCollector(HasPropertyMixin): cell: Cell = Property() positions: Positions = Property() symbols: Symbols = Property() + mass: Mass = Property() + charge: Charge = Property() custom: CustomProperty = Property() def __init__( @@ -95,8 +99,9 @@ def _inspect_properties(self,properties): raise ValueError(f"The '{pname}' value is not of the right type. Expected '{type(dict())}', received '{type(pvalue)}'.") else: """ - This call is done as we want to initialise the properties, in such a way to have `pydantic` validation. - this happens because the get method will invoke the `_template_property` method as defined in the + Using the pydantic validation: + the following `getattr` call is done as we want to initialise the properties, in such a way to have `pydantic` validation. + This is needed because the get method will invoke the `_template_property` method as defined in the `HasPropertyMixin` class. The fact is that the `PropertyMixinMetaclass` only define the fget and fset methods, without using them. """ diff --git a/aiida_atomistic/data/structure/structure.py b/aiida_atomistic/data/structure/structure.py index cbc5fbc..4173f3b 100644 --- a/aiida_atomistic/data/structure/structure.py +++ b/aiida_atomistic/data/structure/structure.py @@ -771,6 +771,8 @@ def __init__( ## RM pbc = [True, True, True] ## RM self.set_pbc(pbc) + #### START new methods + @property def properties(self): """ @@ -790,6 +792,76 @@ def to_dict(self): """ return self.base.attributes.get('_property_attributes') + + def get_kinds(self, use_kind_tag = False, use_kind_name = True): + """Get the list of kinds, taking into account all the properties. + + Algorithm: + it generated the kinds_list for each property separately in step 1, then + it creates the matrix k = k.T where the rows are the sites, the columns are the kind for a given property. + In step 2 it checks for the matrxi kwhich rows have the same numbers in the same order, i.e. recognize the different + kinds considering all the properties. + + Args: + structure (_type_): _description_ + use_kind_tag (bool, optional): If True, at the end of the kinds generation, it uses the + structure.positions.kind_tags list to override some or all of + them. Defaults to False. + use_kind_names (bool, optional): if False, it returns only numbers for each different kinds. if True, returns + the kinds ready to be used in the plugins. + + Returns: + kinds_list: the list of kinds to be used in a plugin which requires it. + + *Missing: + - Give custom threshold and use kind_tags as defined in + structure.properties.positions.kind_tags. + + Questions: + answered, but worth to leave it here for now: What is then the value of each property associated to each kind? an average? maybe yes. + This can be then generated in another function which takes the averages. Maybe in the property specific + to_kind methods. Maybe indeed the average point which is obtained during the space_grid generation. + """ + import numpy as np + import copy + + symbols = self.properties.symbols.value + + # Step 1: + kind_properties = [] + kinds_values = {} + for single_property in self.properties.get_defined_properties(): + prop = getattr(self.properties,single_property) + if prop.domain == "per-site" and not single_property in ["symbols","positions"]: + kinds_per_property = prop.to_kinds() + kind_properties.append(kinds_per_property[0]) + kinds_values[single_property] = kinds_per_property[1] + + k = np.array(kind_properties) + k = k.T + + # Step 2: + kinds = np.zeros(len(self.properties.positions.value),dtype=int) -1 + kinds_names = copy.deepcopy(symbols) + for i in range(len(k)): + element = symbols[i] + print(element) + diff = k-k[i] + #print(ab) + diff_sum = np.sum(np.abs(diff),axis=1) + #print(diff_sum) + #print(np.where(diff_sum==0)) + kinds[np.where(diff_sum == 0)[0]] = i + for where in np.where(diff_sum == 0)[0]: + kinds_names[where] = f"{element}{i}" + if len(np.where(kinds == -1)[0]) == 0 : + #print(f"search ended at iteration {i}") + break + + return kinds.tolist(),kinds_names,kinds_values + + #### END new methods + def get_dimensionality(self): """ Return the dimensionality of the structure and its length/surface/volume.