Skip to content

Commit

Permalink
Fix subsetting range bug
Browse files Browse the repository at this point in the history
Fixes a RuntimeError when using the --subset option with
a CSV that has too few species. This error only occurred
when k > species. This change will return less than k entries
when there are too few species. This matches the behavior
of heapq.nlargest() used for non species grouping.

Fixes #87
  • Loading branch information
johnbradley committed Feb 6, 2025
1 parent 306a0be commit 58faf9c
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 0 deletions.
2 changes: 2 additions & 0 deletions src/bioclip/predict.py
Original file line number Diff line number Diff line change
Expand Up @@ -544,6 +544,8 @@ def apply_filter(self, keep_labels_ary: List[bool]):
self._subset_txt_names = names

def format_species_probs(self, image_key: str, probs: torch.Tensor, k: int = 5) -> List[dict[str, float]]:
# Prevent error when probs is smaller than k
k = min(k, probs.shape[0])
topk = probs.topk(k)
result = []
for i, prob in zip(topk.indices, topk.values):
Expand Down
13 changes: 13 additions & 0 deletions tests/test_predict.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,19 @@ def test_create_taxa_filter_from_csv(self):
def test_get_rank_labels(self):
self.assertEqual(','.join(get_rank_labels()), 'kingdom,phylum,class,order,family,genus,species')

def test_format_species_probs_too_few_species(self):
classifier = TreeOfLifeClassifier()

# test when k < number of probabilities
probs = torch.tensor([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7])
top_probs = classifier.format_species_probs(EXAMPLE_CAT_IMAGE, probs, k=5)
self.assertEqual(len(top_probs), 5)
self.assertEqual(top_probs[0]['file_name'], EXAMPLE_CAT_IMAGE)

# test when k > number of probabilities
probs = torch.tensor([0.1, 0.2, 0.3, 0.4])
top_probs = classifier.format_species_probs(EXAMPLE_CAT_IMAGE, probs, k=5)


class TestEmbed(unittest.TestCase):
def test_get_image_features(self):
Expand Down

0 comments on commit 58faf9c

Please sign in to comment.