Skip to content

Commit

Permalink
Split remove_duplicates into 2: now there's select_nth_duplicate too
Browse files Browse the repository at this point in the history
Also fix a bug when the same segid is passed multiple times in an
iterable by adding a `set()` conversion.
  • Loading branch information
jasper-tms committed May 18, 2023
1 parent 7d19a93 commit 368c819
Showing 1 changed file with 16 additions and 22 deletions.
38 changes: 16 additions & 22 deletions fanc/lookup.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,7 @@ def segids_from_pts(pts,
anchor_point_sources = ['somas_dec2022', 'peripheral_nerves', 'neck_connective']
def anchor_point(segid, source_tables=anchor_point_sources,
timestamp='now', resolve_duplicates=False,
slow_mode=False) -> np.ndarray:
select_nth_duplicate: int = 0, slow_mode=False) -> np.ndarray:
"""
Return a representative "anchor" point for each of the given
segment ID(s).
Expand All @@ -240,14 +240,16 @@ def anchor_point(segid, source_tables=anchor_point_sources,
If 'now', use the current time.
If datetime, use that time.
resolve_duplicates: bool or int (default False)
This argument specifies what to do if multiple anchor points are found
within a single table for a single segment ID. Possible values and behaviors:
- False: an exception will be raised instead of trying to resolve anything.
- True: the point with the smallest x coordinate will be used.
- An integer from 1 to the number of points found: The Nth point (1-indexed!!)
will be used, when the points are sorted by x coordinate. Setting
`resolve_duplicates=1` gives the same behavior as `resolve_duplicates=True`.
resolve_duplicates: bool (default False)
select_nth_duplicate: int (default 0)
If multiple anchor points are found within a single table for a
single segment ID, raise an error (if resolve_duplicates==False) or
select one of the points (if resolve_duplicates==True) by sorting the
points by x coordinate and then picking the Nth one, with N given by
select_nth_duplicate. (N is zero-indexed, so the default value of
select_nth_duplicate=0 results in selection of the point with the
smallest x coordinate). select_nth_duplicate is ignored if
resolve_duplicates==False.
Returns
-------
Expand All @@ -274,7 +276,7 @@ def anchor_point(segid, source_tables=anchor_point_sources,
' Use updated IDs or provide the timestamp where'
' the ID(s) is valid.')

anchor_points = pd.Series(index=segid, dtype=object)
anchor_points = pd.Series(index=set(segid), dtype=object)

for table in source_tables:
unanchored_ids = anchor_points[anchor_points.isna()].index.values
Expand All @@ -300,19 +302,11 @@ def anchor_point(segid, source_tables=anchor_point_sources,
f' {seg} in table "{table}":\n'
f'{np.vstack(point.pt_position)}.\nSet'
' resolve_duplicates to choose one.')
if resolve_duplicates is True:
# default behavior is to resolve duplicates by taking the
# first point (the one with the smallest x coordinate)
resolve_duplicates = 1
if isinstance(resolve_duplicates, int):
# change from 1-indexing (which the user can specify)
# to 0-indexing (which we must use with iloc)
resolve_duplicates -= 1
if resolve_duplicates >= len(point):
raise ValueError('resolve_duplicates is greater than'
f' {len(point)}, the number of points found'
if select_nth_duplicate >= len(point):
raise ValueError('select_nth_duplicate is too large given'
f' that {len(point)} points were found'
f' for segid {seg} in table "{table}".')
anchor_points.loc[seg] = point['pt_position'].iloc[resolve_duplicates]
anchor_points.loc[seg] = point['pt_position'].iloc[select_nth_duplicate]
else:
anchor_points.loc[seg] = point['pt_position'].iloc[0]
if not any(anchor_points.isna()):
Expand Down

0 comments on commit 368c819

Please sign in to comment.