Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add quality control property for batch processing over genotypes #76

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
368 changes: 265 additions & 103 deletions notebooks/MultipleDicotPipeline.ipynb

Large diffs are not rendered by default.

15 changes: 15 additions & 0 deletions sleap_roots/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
series_name: Name of the series derived from the HDF5 filename.
expected_count: Fetch the expected plant count for this series from the CSV.
group: Group name for the series from the CSV.
qc_fail: Flag to indicate if the series failed QC from the CSV.
"""

h5_path: Optional[str] = None
Expand Down Expand Up @@ -161,6 +162,20 @@
print(f"No group found for series {self.series_name} in CSV.")
return np.nan

@property
def qc_fail(self) -> Union[int, float]:
"""Flag to indicate if the series failed QC from the CSV."""
if not self.csv_path or not Path(self.csv_path).exists():
print("CSV path is not set or the file does not exist.")
return np.nan

Check warning on line 170 in sleap_roots/series.py

View check run for this annotation

Codecov / codecov/patch

sleap_roots/series.py#L169-L170

Added lines #L169 - L170 were not covered by tests
df = pd.read_csv(self.csv_path)
try:
# Match the series_name (or plant_qr_code in the CSV) to fetch the QC flag
return df[df["plant_qr_code"] == self.series_name]["qc_cylinder"].iloc[0]
except IndexError:
print(f"No QC flag found for series {self.series_name} in CSV.")
return np.nan

Check warning on line 177 in sleap_roots/series.py

View check run for this annotation

Codecov / codecov/patch

sleap_roots/series.py#L175-L177

Added lines #L175 - L177 were not covered by tests
Comment on lines +168 to +177
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Change the return value of qc_fail to False when the CSV path is not set or the QC flag is not found, to maintain consistency with the expected boolean return type.

-            return np.nan
+            return False
-            return np.nan
+            return False

Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation.

Suggested change
if not self.csv_path or not Path(self.csv_path).exists():
print("CSV path is not set or the file does not exist.")
return np.nan
df = pd.read_csv(self.csv_path)
try:
# Match the series_name (or plant_qr_code in the CSV) to fetch the QC flag
return df[df["plant_qr_code"] == self.series_name]["qc_cylinder"].iloc[0]
except IndexError:
print(f"No QC flag found for series {self.series_name} in CSV.")
return np.nan
if not self.csv_path or not Path(self.csv_path).exists():
print("CSV path is not set or the file does not exist.")
return False
df = pd.read_csv(self.csv_path)
try:
# Match the series_name (or plant_qr_code in the CSV) to fetch the QC flag
return df[df["plant_qr_code"] == self.series_name]["qc_cylinder"].iloc[0]
except IndexError:
print(f"No QC flag found for series {self.series_name} in CSV.")
return False


def __len__(self) -> int:
"""Length of the series (number of images)."""
return len(self.video)
Expand Down
10 changes: 8 additions & 2 deletions sleap_roots/trait_pipelines.py
Original file line number Diff line number Diff line change
Expand Up @@ -401,12 +401,13 @@
csv_suffix: The suffix to append to the CSV file name. Default is ".all_frames_summary.csv".

Returns:
A dictionary containing the series name, group, aggregated traits, and summary statistics.
A dictionary containing the series name, group, qc_fail, aggregated traits, and summary statistics.
"""
# Initialize the return structure with the series name and group
result = {
"series": str(series.series_name),
"group": str(series.group),
"qc_fail": series.qc_fail,
"traits": {},
"summary_stats": {},
}
Expand Down Expand Up @@ -528,6 +529,11 @@
# Group series by their group property
series_groups = {}
for series in series_list:
# Exclude series with qc_fail flag set to 1
if int(series.qc_fail) == 1:
print(f"Skipping series '{series.series_name}' due to qc_fail flag.")
continue

Check warning on line 535 in sleap_roots/trait_pipelines.py

View check run for this annotation

Codecov / codecov/patch

sleap_roots/trait_pipelines.py#L533-L535

Added lines #L533 - L535 were not covered by tests
# Get the group name from the series object
group_name = str(series.group)
if group_name not in series_groups:
series_groups[group_name] = {"names": [], "series": []}
Expand Down Expand Up @@ -733,7 +739,7 @@

Returns:
A pandas DataFrame of computed traits summarized over all frames of each
series. The resulting dataframe will have a row for each series and a column
group. The resulting dataframe will have a row for each series and a column
for each series-level summarized trait.

Summarized traits are prefixed with the trait name and an underscore,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
6030_2,6030,2,h5s_predictions\ES\6030_2.h5,0,,3,,,,,,,
6035_1,6035,1,h5s_predictions\ES\6035_1.h5,0,,3,,,,,,,
6035_2,6035,2,h5s_predictions\ES\6035_2.h5,0,,3,,,,,,,
6039_1,6039,1,h5s_predictions\ES\6039_1.h5,0,,2,1,1,,,,,
6039_1,6039,1,h5s_predictions\ES\6039_1.h5,1,,2,1,1,,,,,
6039_2,6039,2,h5s_predictions\ES\6039_2.h5,0,,3,,,,,,,
6042_1,6042,1,h5s_predictions\ES\6042_1.h5,0,,3,,,,,,,
6042_2,6042,2,h5s_predictions\ES\6042_2.h5,0,,3,,,,,,,
Expand Down
7 changes: 6 additions & 1 deletion tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def csv_path(tmp_path):
# Create a dummy CSV file
csv_path = tmp_path / "dummy.csv"
csv_path.write_text(
"plant_qr_code,number_of_plants_cylinder,genotype\ndummy,10,1100\nseries2,15,Kitaake-X\n"
"plant_qr_code,number_of_plants_cylinder,genotype,qc_cylinder\ndummy,10,1100,0\nseries2,15,Kitaake-X,1\n"
)
return csv_path

Expand Down Expand Up @@ -85,6 +85,11 @@ def test_expected_count(series_instance, csv_path):
assert series_instance.expected_count == 10


def test_qc_cylinder(series_instance, csv_path):
series_instance.csv_path = csv_path
assert series_instance.qc_fail == 0


def test_len():
series = Series(video=["frame1", "frame2"])
assert len(series) == 2
Expand Down
Loading