Skip to content

Commit

Permalink
1st implementation of protein detection
Browse files Browse the repository at this point in the history
  • Loading branch information
SarahMuth committed Jan 31, 2025
1 parent f0ba5c0 commit 6ce0513
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 36 deletions.
2 changes: 2 additions & 0 deletions utils/gridsearch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
def gridsearch():

50 changes: 14 additions & 36 deletions utils/protein_detection.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
import numpy as np
from scipy.ndimage import gaussian_laplace, label, find_objects, center_of_mass
from scipy.ndimage.measurements import variance
from skimage.feature import blob_log


def protein_detection(heatmap): # TODO do this properly
def protein_detection(heatmap): #TODO do this properly
"""
Detects local maxima and estimates sizes of Gaussians in a 3D heatmap.
Expand All @@ -15,40 +13,20 @@ def protein_detection(heatmap): # TODO do this properly
- 'coordinates': Tuple of (z, y, x) for the local maxima
- 'size': Estimated size of the Gaussian (sigma equivalent)
"""
# Apply Laplacian of Gaussian (LoG) filter to enhance Gaussian-like structures
log_filtered = -gaussian_laplace(heatmap, sigma=1)

# Find local maxima
labeled, num_features = label(log_filtered > np.mean(log_filtered)) # Binary threshold

# Get bounding boxes and compute centers of mass
regions = find_objects(labeled)
detections = []

for i, region in enumerate(regions):
if region is None:
continue

# Extract subregion
subregion = labeled[region]
sub_heatmap = heatmap[region]

# Mask specific to the current label
mask = (subregion == (i + 1))

# Compute center of mass as the coordinates of the local maximum
com = center_of_mass(sub_heatmap, labels=mask, index=1)

# Compute size: estimate the variance of the Gaussian
size = np.sqrt(variance(sub_heatmap, labels=mask, index=1))
data_path=#TODO pass the val data paths
threshold = gridsearch(data_path, model)
#smalles protein structure: "beta-amylase": 33.27
#bigges protein structure: "ribosome": 109.02,
#0.3 is the factor to match the PDB size to the experimental data size
adj_factor=0.3 #TODO implement this as an argument, also when creating heatmap
pred_coords = blob_log(preds, min_sigma=33.27*adj_factor, max_sigma=109.02*adj_factor, threshold_abs=threshold)

# Adjust coordinates to global
starts = [r.start for r in region] # Dynamically handle dimensions
com_global = tuple(com[i] + starts[i] for i in range(len(starts)))

detections.append({
'coordinates': com_global,
'size': size
})
detections.append({
'coordinates': coordinates,
'size': size
})

return detections
return detections

0 comments on commit 6ce0513

Please sign in to comment.