Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

atom selection/filtering backend refactor #291

Merged
merged 41 commits into from
Jan 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
9d07f00
chemical entity now creates a rdkit molecule which shadows itself
ChiCheng45 Jan 25, 2024
ae23edb
fix for attempted self bonding
ChiCheng45 Jan 25, 2024
e97d3e3
implemented some selection function which use smarts
ChiCheng45 Jan 25, 2024
1934a90
reformatted code with black
ChiCheng45 Jan 25, 2024
68c1c6a
changed the default bond type to unspecified
ChiCheng45 Jan 26, 2024
9cb11f3
Merge branch 'protos' into chi/atom-selection
ChiCheng45 Jan 26, 2024
0c7d497
removed inchi method from mdanse chemical system and updated get_subs…
ChiCheng45 Jan 26, 2024
fb269e4
fixed the old atom selectors
ChiCheng45 Jan 26, 2024
83875a7
added more smarts selection types
ChiCheng45 Jan 26, 2024
2ffd6ee
added docstring to smart selector functions
ChiCheng45 Jan 26, 2024
edc05ec
changed the select_all method and refactored
ChiCheng45 Jan 26, 2024
bd9b771
added callable selectors
ChiCheng45 Jan 26, 2024
783e75c
updated smarts patterns
ChiCheng45 Jan 26, 2024
22c79c7
updated smart selectors
ChiCheng45 Jan 26, 2024
1a8bdcc
split smart selectors to atom and group
ChiCheng45 Jan 29, 2024
ef69d97
added molecule selectors
ChiCheng45 Jan 29, 2024
ebb0ba8
removed all selector code and added filterselection class and tests
ChiCheng45 Jan 29, 2024
0bb8653
test_connectivity fix
ChiCheng45 Jan 29, 2024
5055b05
changed filter to a selector
ChiCheng45 Jan 29, 2024
bbc79d3
added json dump and load and refactored tests
ChiCheng45 Jan 29, 2024
78c6f6e
added json dump and load and refactored tests. configurators to use n…
ChiCheng45 Jan 29, 2024
d26b840
reformatted with black
ChiCheng45 Jan 29, 2024
beeba91
fixes to the GUI and updated the default selector setting
ChiCheng45 Jan 29, 2024
844cc0e
changed selection configurator so that it only takes in one value rev…
ChiCheng45 Jan 29, 2024
9ef8dc5
fix when number of elements are changed on two calls to select elements
ChiCheng45 Jan 29, 2024
3dbc3db
updated json update so it returns a minimal string
ChiCheng45 Jan 29, 2024
9ee0dd7
updated thiol smarts string and refactoring
ChiCheng45 Jan 29, 2024
9412796
changed selector to filter so there is no issues with selection desel…
ChiCheng45 Jan 30, 2024
1e7caf9
refactored code and added docstrings
ChiCheng45 Jan 30, 2024
a49c68d
edit
ChiCheng45 Jan 30, 2024
c076937
applied black reformatting
ChiCheng45 Jan 30, 2024
11c2e59
refactoring
ChiCheng45 Jan 30, 2024
61c9829
added atom selection by index
ChiCheng45 Jan 30, 2024
353eb12
renamed method
ChiCheng45 Jan 30, 2024
6b42d6e
refactored and fixed some method call bugs
ChiCheng45 Jan 30, 2024
b02ba3e
reformatted with black
ChiCheng45 Jan 30, 2024
f22a75b
fix for python 3.9 and reformatted code with black
ChiCheng45 Jan 30, 2024
8b494bd
fix for python 3.9
ChiCheng45 Jan 30, 2024
bfddd1e
fix for elements with symbols with more then one character
ChiCheng45 Jan 30, 2024
d70e1f8
Merge branch 'protos' into chi/atom-parsing
MBartkowiakSTFC Jan 30, 2024
d3427c0
refactored tests
ChiCheng45 Jan 31, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 71 additions & 12 deletions MDANSE/Src/MDANSE/Chemistry/ChemicalEntity.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,13 @@
from __future__ import annotations

import abc
from ast import literal_eval
import collections
import copy
from typing import Union, TYPE_CHECKING, List, Tuple

import h5py
import numpy as np
from rdkit import Chem
from numpy.typing import NDArray

from MDANSE.Chemistry import (
ATOMS_DATABASE,
MOLECULES_DATABASE,
Expand Down Expand Up @@ -2429,14 +2427,16 @@ def __init__(self, name: str = ""):

self._atoms = None

self.rdkit_mol = Chem.RWMol()

def __repr__(self):
contents = ", ".join(
[
f'{key[1:] if key[0] == "_" else key}={repr(value)}'
for key, value in self.__dict__.items()
]
)
contents = []
for key, value in self.__dict__.items():
if key == "rdkit_mol":
continue
contents.append(f'{key[1:] if key[0] == "_" else key}={repr(value)}')

contents = ", ".join(contents)
return f"MDANSE.MolecularDynamics.ChemicalEntity.ChemicalSystem({contents})"

def __str__(self):
Expand All @@ -2458,6 +2458,17 @@ def add_chemical_entity(self, chemical_entity: _ChemicalEntity) -> None:
at.index = self._number_of_atoms
self._number_of_atoms += 1

# add the atoms to the rdkit molecule, ghost atoms are
# never added to the rdkit molecule object
atm_num = ATOMS_DATABASE[at.symbol]["atomic_number"]
rdkit_atm = Chem.Atom(atm_num)

# makes sure that rdkit doesn't add extra hydrogens
rdkit_atm.SetNumExplicitHs(0)
rdkit_atm.SetNoImplicit(True)

self.rdkit_mol.AddAtom(rdkit_atm)

self._total_number_of_atoms += chemical_entity.total_number_of_atoms

chemical_entity.parent = self
Expand All @@ -2467,13 +2478,61 @@ def add_chemical_entity(self, chemical_entity: _ChemicalEntity) -> None:
if hasattr(chemical_entity, "_bonds") and hasattr(chemical_entity, "index"):
for bond in chemical_entity._bonds:
number_bond = [chemical_entity.index, bond.index]
if not number_bond in self._bonds:
if number_bond not in self._bonds:
self._bonds.append(number_bond)

# add the bonds between the rdkit atoms, the atom index in
# this chemical system needs to be unique and fixed otherwise
# there could be issues
bonds_added = []
for at_i in chemical_entity.atom_list:
i = at_i.index
for at_j in at_i.bonds:
j = at_j.index
if i == j:
continue
bond_idxs = sorted([i, j])
if bond_idxs not in bonds_added:
# there is currently no bonding information in
# MDANSE, we will have to default to the UNSPECIFIED
# bond type.
single = Chem.rdchem.BondType.UNSPECIFIED
self.rdkit_mol.AddBond(i, j, single)
bonds_added.append(bond_idxs)

self._configuration = None

self._atoms = None

def get_substructure_matches(
self, smarts: list[str], maxmatches: int = 1000000
) -> set[int]:
"""Get the indexes which match any of the smarts string in
the inputted list. Note that the default bond type in MDANSE
is Chem.rdchem.BondType.UNSPECIFIED.

Parameters
----------
smarts : list[str]
List of smarts strings.
maxmatches : int
Maximum number of matches used in the GetSubstructMatches
rdkit method.

Returns
-------
set[int]
An set of matched atom indices.
"""
substruct_set = set()
for smart in smarts:
matches = self.rdkit_mol.GetSubstructMatches(
Chem.MolFromSmarts(smart), maxMatches=maxmatches
)
for match in matches:
substruct_set.update(match)
return substruct_set

@property
def atom_list(self) -> list[Atom]:
"""List of all non-ghost atoms in the ChemicalSystem."""
Expand Down Expand Up @@ -2547,12 +2606,12 @@ def copy(self) -> "ChemicalSystem":

return cs

def rebuild(self, cluster_list: List[Tuple(int)]):
def rebuild(self, cluster_list: List[Tuple[int]]):
"""
Copies the instance of ChemicalSystem into a new, identical instance.

:param cluster_list: list of tuples of atom indices, one per cluster
:type List[Tuple(int)]: each element is a tuple of atom indices (int)
:type List[Tuple[int]]: each element is a tuple of atom indices (int)
"""

atom_names_before = [atom.name for atom in self.atoms]
Expand Down
175 changes: 0 additions & 175 deletions MDANSE/Src/MDANSE/Framework/AtomSelectionParser.py

This file was deleted.

Empty file.
17 changes: 17 additions & 0 deletions MDANSE/Src/MDANSE/Framework/AtomSelector/all_selector.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from MDANSE.Chemistry.ChemicalEntity import ChemicalSystem


def select_all(system: ChemicalSystem) -> set[int]:
"""Selects all atoms in the chemical system.

Parameters
----------
system : ChemicalSystem
The MDANSE chemical system.

Returns
-------
set[int]
All atom indices.
"""
return set([at.index for at in system.atom_list])
Loading
Loading