Skip to content

Commit

Permalink
finish evaluation
Browse files Browse the repository at this point in the history
  • Loading branch information
SarahMuth committed Feb 18, 2025
1 parent 540a37b commit 951cdfe
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 12 deletions.
68 changes: 57 additions & 11 deletions evaluation/run_evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,56 @@
import csv
import argparse
from tqdm import tqdm
import sys
sys.path.append("/user/muth9/u12095/czii-protein-challenge")

from utils.evaluation.evaluation_metrics import metric_coords
import numpy as np
import json

from utils import metric_coords
from data_processing.create_heatmap import parse_json_files

def evaluate(pred_coords, label_path, model_name, input_name):
def evaluate_per_protein_type(pred_coords, label_path, model_name, input_name):
json_files = [os.path.join(label_path, f) for f in os.listdir(label_path) if f.endswith('.json')]
label_coords, protein_types = parse_json_files(json_files)

# Organize label_coords by protein type
label_dict = {}
for coord, p_type in zip(label_coords, protein_types):
if p_type not in label_dict:
label_dict[p_type] = []
label_dict[p_type].append(coord)

results_folder = os.path.join(os.path.dirname(__file__), "results")
os.makedirs(results_folder, exist_ok=True)
csv_file = os.path.join(results_folder, f"evaluation_{model_name}.csv")

# Check if the file exists to determine whether to write the header
write_header = not os.path.exists(csv_file)

with open(csv_file, mode='a', newline='') as file:
writer = csv.writer(file)
if write_header:
writer.writerow(["input_name", "protein_type", "precision", "recall", "f1", "dev_percentage", "sMAPE", "mae"])

for protein_type, label_coords_subset in label_dict.items():

precision, recall, f1, dev_percentage, sMAPE, mae = metric_coords(label_coords_subset, pred_coords)

# Extend input name
writer.writerow([input_name, protein_type, precision, recall, f1, dev_percentage, sMAPE, mae])

print(f"Per-protein metrics saved to {csv_file}")

def evaluate(pred_coords, label_path, model_name, input_name, evaluate_per_protein=True):
json_files = [os.path.join(label_path, f) for f in os.listdir(label_path) if f.endswith('.json')]
label_coords, protein_types = parse_json_files(json_files)
with open(pred_coords, "r") as f:
points = json.load(f)
# Convert to NumPy array
predictions = np.array(points)

precision, recall, f1, dev_percentage, sMAPE, mae = metric_coords(label_coords, pred_coords)
precision, recall, f1, dev_percentage, sMAPE, mae = metric_coords(label_coords, predictions)

results_folder = os.path.join(os.path.dirname(__file__), "results")
os.makedirs(results_folder, exist_ok=True)
Expand All @@ -23,15 +64,18 @@ def evaluate(pred_coords, label_path, model_name, input_name):
with open(csv_file, mode='a', newline='') as file:
writer = csv.writer(file)
if write_header:
writer.writerow(["input_name", "precision", "recall", "f1", "dev_percentage", "sMAPE", "mae"])
writer.writerow([input_name, precision, recall, f1, dev_percentage, sMAPE, mae])
writer.writerow(["input_name", "protein_type", "precision", "recall", "f1", "dev_percentage", "sMAPE", "mae"])
writer.writerow([input_name, "all", precision, recall, f1, dev_percentage, sMAPE, mae])

print(f"Metrics saved to {csv_file}")

def process_folder(args):
if evaluate_per_protein:
evaluate_per_protein_type(predictions, label_path, model_name, input_name)

def process_folder(args, file_ending):
# Get all prediction files
input_files = [os.path.join(args.pred_coords, name) for name in os.listdir(args.pred_coords)
if name.endswith("_protein_detections.json") and os.path.isfile(os.path.join(args.pred_coords, name))]
if name.endswith(f"{file_ending}.json") and os.path.isfile(os.path.join(args.pred_coords, name))]

# Get all label subfolders
label_subfolders = {name: os.path.join(args.label_path, name) for name in os.listdir(args.label_path)
Expand All @@ -41,7 +85,7 @@ def process_folder(args):

for pred_coords in pbar:
# Extract input_name from the filename
input_name = os.path.basename(pred_coords).replace("_protein_detections.json", "")
input_name = os.path.basename(pred_coords).replace(f"{file_ending}.json", "")

# Match with label subfolder
label_folder = label_subfolders.get(input_name)
Expand Down Expand Up @@ -69,16 +113,18 @@ def main():

args = parser.parse_args()

file_ending = "_protein_detections_peak_local_max"

if os.path.isfile(args.pred_coords) and args.pred_coords.endswith(".json"):
# Extract input_name from the filename
input_name = os.path.basename(args.pred_coords).replace("_protein_detections.json", "")
input_name = os.path.basename(args.pred_coords).replace(f"{file_ending}.json", "")
evaluate(args.pred_coords, args.label_path, args.model_name, input_name)
elif os.path.isdir(args.pred_coords) and any(f.endswith(".json") for f in os.listdir(args.pred_coords)):
process_folder(args)
process_folder(args, file_ending)
else:
print("Invalid input")

print("Finished segmenting!")
print("Finished evaluating!")

if __name__ == "__main__":
main()
2 changes: 1 addition & 1 deletion utils/inference/gridsearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def get_full_label_path(json_val_path, val_path):
def gridsearch(json_val_path, model_path):
print("starting grid search")

threshes = np.arange(1.0, 2.0, 0.1)
threshes = np.arange(1.0, 2.5, 0.1)
data = []

# Load JSON from the file
Expand Down

0 comments on commit 951cdfe

Please sign in to comment.