-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathconfig_equivalence.py
189 lines (168 loc) · 7.83 KB
/
config_equivalence.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
# Methods for checking equivalence of configs
import symm_ops as so
import pandas as pd
import numpy as np
import ase
import ase.io
from ase import Atoms
### Fastest methods:
# Experimental function to speed up comparisons by merging array elements into a single string, store all in list
def atom_nums_with_coords_flat(ase_cell):
"""Merges array elements of atomic numbers and atomic x,y,z coordinates as string for fast comparisons
Args:
ase_cell (ase Atoms object): Input structure
Returns:
list: All atomic numbers and coordinates of input structure as list of str
"""
atoms = ase_cell.get_atomic_numbers()
positions = ase_cell.get_scaled_positions()
list_of_strings = []
for atom, pos in zip(atoms, positions):
# Coordinates are rounded to 3 d.p. to avoid numerical error when comparing to transformed structures
# minus signs are removed to avoid discrepency between -0.0 and 0.0
atomAndCoords = (str(atom)+str(np.round(pos[0],3))+str(np.round(pos[1],3))+str(np.round(pos[2],3))).replace('-','')
list_of_strings.append(atomAndCoords)
return list_of_strings
# Experimental function to increase execution speed (compare str instead of np array): speedup x10 of orig (~ 0.2s for 56 atom cfgs)
def compare_cfgs_str(ase_cell, symm_cell):
"""Compares str representation of atomic numbers and atomic coords for all atoms in original (ase_cell) and transformed structure (symm_cell)
Args:
ase_cell (ase Atoms object): Original structure
symm_cell (ase Atoms object): Structure after performing symmetry operations to random TM substitutions in orig config
Returns:
bool: True if two inputted structures have all the same atom type at the same coordinates, False otherwise
"""
orig_cfg = atom_nums_with_coords_flat(ase_cell)
symm_cfg = atom_nums_with_coords_flat(symm_cell)
same_atom_count = 0
# For each atom+coords in orig cfg
for atom in orig_cfg:
# Check if same atom at same coords is in symm cfg
if atom in symm_cfg:
same_atom_count += 1
# Check that all same atoms were found
if (same_atom_count == len(orig_cfg)):
same_cfg = True
else:
same_cfg = False
#print(same_atom_count) #debug
return same_cfg
# Use function to get around issue with continuing out of two loops when applying symm ops to random structures
def check_for_equiv(symm_ops, symm_op_count, orig_cfg, rand_cfg):
"""Applies all symmetry operations to a random substitution structure and checks if any give the same as the original structure
Args:
symm_ops (dictionary): Symmetry operations of the parent structure generated by spglib
symm_op_count (int): Total number of symmetry operations contained in symm_ops dictionary
orig_cfg (ase Atoms object): Original structure for comparing with new structures
rand_cfg (ase Atoms object): Structure created by random substitutions of TM's in the original structure
Returns:
bool: True if at least one match is found after applying symmetry operations, False otherwise
"""
isEquiv = False
# Apply all symm ops of parent and check for equivalence to orig cfg
for op_num in range(symm_op_count):
transformRand_cfg = so.all_operations(rand_cfg, symm_ops, op_num)
is_same_as_orig = compare_cfgs_str(orig_cfg, transformRand_cfg)
if is_same_as_orig:
isEquiv = True
return isEquiv # Return as soon as any match is found
return isEquiv
### Original and (slower) speed improvement test methods:
'''
# Join atom_num to np array for atom_coords
def atom_nums_with_coords(ase_cell):
atoms = ase_cell.get_atomic_numbers()
positions = ase_cell.get_positions()
list_of_arrays = []
for atom, pos in zip(atoms, positions):
atomAndCoords = np.zeros(4)
atomAndCoords[0] = atom
atomAndCoords[1] = pos[0]
atomAndCoords[2] = pos[1]
atomAndCoords[3] = pos[2]
list_of_arrays.append(atomAndCoords)
return list_of_arrays
# Experimental function to speed up comparisons by merging array elements into a single number, changing '.', 99999, '-' to 11111 and 'e' to 77777
def atom_nums_with_coords_int(ase_cell):
atoms = ase_cell.get_atomic_numbers()
positions = ase_cell.get_positions()
list_of_ints = []
for atom, pos in zip(atoms, positions):
atomAndCoords = str(atom)+str(pos[0])+str(pos[1])+str(pos[2])
atomAndCoords = atomAndCoords.replace('.','99999')
atomAndCoords = atomAndCoords.replace('-','11111')
atomAndCoords = atomAndCoords.replace('e','77777')
list_of_ints.append(int(atomAndCoords))
return list_of_ints
# Experimental function to use pandas to sort by coords in order x,y,z to speed up config comparison process
# Returns list of atomic number for coordinates in ascending order
def atom_nums_with_coords_pdSorted(ase_cell):
atoms = ase_cell.get_atomic_numbers()
positions = ase_cell.get_positions()
atom_coords = pd.DataFrame({'Atom_type': atoms[:], 'x': positions[:,0], 'y': positions[:,1], 'z': positions[:,2]})
sorted_atom_coords = atom_coords.sort_values(by=['x', 'y', 'z'])
atoms_sorted = sorted_atom_coords['Atom_type'].tolist()
return atoms_sorted
# Goes through orig cfg coords (line-by-line) and searches for match in candidate symm equiv cfg (line-by-line)
# May be too slow in later uses... on verra! -- yes, ~2s for 56 atom cfgs!!
def compare_cfgs_orig(ase_cell, symm_cell):
orig_cfg = atom_nums_with_coords(ase_cell)
symm_cfg = atom_nums_with_coords(symm_cell)
same_atom_count = 0
# For each atom+coords in orig cfg
for atom in orig_cfg:
# Check if same atom at same coords is in symm cfg
for symm in symm_cfg:
if ( np.array_equal(atom, symm) ):
same_atom_count += 1
# Check that all same atoms were found
if (same_atom_count == len(orig_cfg)):
same_cfg = True
else:
same_cfg = False
return same_cfg
# Experimental function to increase execution speed (compare numbers instead of strings)
# Same time or a little slower than comparing strings (likely due to overhead in producing int intially)
def compare_cfgs_int(ase_cell, symm_cell):
orig_cfg = atom_nums_with_coords_int(ase_cell)
symm_cfg = atom_nums_with_coords_int(symm_cell)
same_atom_count = 0
# For each atom+coords in orig cfg
for atom in orig_cfg:
# Check if same atom at same coords is in symm cfg
for line in symm_cfg:
if (atom == line):
same_atom_count += 1
# Check that all same atoms were found
if (same_atom_count == len(orig_cfg)):
same_cfg = True
else:
same_cfg = False
return same_cfg
# Experimental function to use lists of atoms from sorted coords, 2x slower than comparing lists
def compare_cfgs_pdSorted(orig_atom_list, symm_cell):
same_atom_count = 0
symm_cell_atom_list = atom_nums_with_coords_pdSorted(symm_cell)
for orig_atom, symm_atom in zip (orig_atom_list, symm_cell_atom_list):
if (orig_atom != symm_atom):
same_cfg = False
return same_cfg
else:
same_atom_count += 1
if (same_atom_count == len(orig_atom_list)):
same_cfg = True
else:
same_cfg = False
return same_cfg
# Experimental function for compatibility with pandas sorted coords method
def check_for_equiv_pdSorted(symm_ops, symm_op_count, orig_atom_list, rand_cfg):
isEquiv = False
# Apply all symm ops of parent and check for equivalence to orig cfg
for op_num in range(symm_op_count):
transformRand_cfg = so.all_operations(rand_cfg, symm_ops, op_num)
is_same_as_orig = compare_cfgs_pdSorted(orig_atom_list, transformRand_cfg)
if is_same_as_orig:
isEquiv = True
return isEquiv
return isEquiv
'''