diff --git a/notebooks/joe/checkPointsVsSeg.ipynb b/notebooks/joe/checkPointsVsSeg.ipynb new file mode 100644 index 000000000..a2fc7073f --- /dev/null +++ b/notebooks/joe/checkPointsVsSeg.ipynb @@ -0,0 +1,245 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "e787cadc-98c4-4ad4-9cb1-2f4e4064eba5", + "metadata": {}, + "source": [ + "## Configuration\n", + "\n", + "Enter below the Neuroglancer state ID, from the end of a link such as:\n", + "https://spelunker.cave-explorer.org/#!middleauth+https://global.daf-apis.com/nglstate/api/v1/6407833740378112\n", + "\n", + "Also enter the name of the annotation layers with merge errors and split errors, indicated as line annotations; and the name of the segmentation layer." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "87a07aa6-a870-4c9b-961d-fe063df66427", + "metadata": {}, + "outputs": [], + "source": [ + "# https://spelunker.cave-explorer.org/#!middleauth+https://global.daf-apis.com/nglstate/api/v1/5984476767191040\n", + "state_id = 5984476767191040\n", + "merge_err_layer_name = \"merge\"\n", + "split_err_layer_name = \"split\"\n", + "\n", + "# sometimes we want segmentations from the same NG state, but sometimes\n", + "# we want them from a different state. So:\n", + "# https://spelunker.cave-explorer.org/#!middleauth+https://global.daf-apis.com/nglstate/api/v1/6064644210819072\n", + "seg_state_id = 6064644210819072\n", + "seg_layer_name = \"seg\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3de34e3f-ac1f-48d4-a154-cded2420a283", + "metadata": {}, + "outputs": [], + "source": [ + "from caveclient import CAVEclient\n", + "import nglui\n", + "from nglui.statebuilder import *\n", + "import pandas as pd\n", + "import numpy as np\n", + "from zetta_utils.layer.volumetric.cloudvol import build_cv_layer;\n", + "from zetta_utils.geometry import Vec3D;" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "347fbfd8-a9c7-4f2f-bbde-749c59d889a8", + "metadata": {}, + "outputs": [], + "source": [ + "# Load the annotation data\n", + "client = CAVEclient()\n", + "state = client.state.get_state_json(state_id)\n", + "\n", + "# I'm not sure what linked_segmentations does. But in the current data I'm working with, it \n", + "# just returns an empty list. Maybe in some other data it does something useful.\n", + "ptA, ptB, segs = nglui.parser.line_annotations(state, merge_err_layer_name, linked_segmentations=True)\n", + "for i in range(0, len(segs)):\n", + " if segs[i]: segs[i] = segs[i][0]\n", + "df = pd.DataFrame({\"ptA\": ptA, \"ptB\": ptB})\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "542835a3-6f75-40bb-a3dc-79f73ea89a33", + "metadata": {}, + "outputs": [], + "source": [ + "# Load the segmentation data from CloudVolume.\n", + "seg_state = client.state.get_state_json(seg_state_id)\n", + "seg_path = nglui.parser.get_layer(seg_state, seg_layer_name)['source']\n", + "if seg_path.startswith('precomputed://'): seg_path = seg_path[14:]\n", + "print(f'Loading segmentation data from {seg_path}')\n", + "index_resolution = Vec3D(24, 24, 45)\n", + "data_resolution = Vec3D(96, 96, 45)\n", + "cvl = build_cv_layer(path=seg_path,\n", + " allow_slice_rounding=True,\n", + " index_resolution=index_resolution,\n", + " data_resolution=data_resolution,\n", + " interpolation_mode='nearest',\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0e5bfad4-2afa-4b7b-8171-36de29a29975", + "metadata": {}, + "outputs": [], + "source": [ + "# Define a function to look up the segment ID at a given point.\n", + "def seg_at_point(pos, cutout, cutout_base):\n", + " i = np.floor(pos - cutout_base).astype(int)\n", + " return cutout[i[0], i[1], i[2]]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7718c962-d92d-46d6-b0bf-ab5dc11649cb", + "metadata": {}, + "outputs": [], + "source": [ + "def find_segments(df, label='Dataset'):\n", + " df['segA'] = None\n", + " df['segB'] = None\n", + "\n", + " # Determine the range of X, Y, and Z.\n", + " min_x = min(df['ptA'].apply(lambda p: p[0]).min(), df['ptB'].apply(lambda p: p[0]).min())\n", + " max_x = max(df['ptA'].apply(lambda p: p[0]).max(), df['ptB'].apply(lambda p: p[0]).max())\n", + " \n", + " min_y = min(df['ptA'].apply(lambda p: p[1]).min(), df['ptB'].apply(lambda p: p[1]).min())\n", + " max_y = max(df['ptA'].apply(lambda p: p[1]).max(), df['ptB'].apply(lambda p: p[1]).max())\n", + " \n", + " min_z = min(df['ptA'].apply(lambda p: p[2]).min(), df['ptB'].apply(lambda p: p[2]).min())\n", + " max_z = max(df['ptA'].apply(lambda p: p[2]).max(), df['ptB'].apply(lambda p: p[2]).max())\n", + " print(f'{label} X ranges from {min_x} to {max_x}')\n", + " print(f'{label} Y ranges from {min_y} to {max_y}')\n", + " print(f'{label} Z ranges from {min_z} to {max_z}')\n", + "\n", + " # Iterate over that volume in blocks small enough to download, finding the segments\n", + " # associated with any points in that block.\n", + " x_stride = 512\n", + " y_stride = 512\n", + " z_stride = 128\n", + "\n", + " print('Finding segments...')\n", + " for x in np.arange(min_x, max_x + 1, x_stride):\n", + " print(f'x={x} ({100*(x-min_x)/(max_x-min_x):.0f}%)')\n", + " for y in np.arange(min_y, max_y + 1, y_stride):\n", + " for z in np.arange(min_z, max_z + 1, z_stride):\n", + " ptA_in_range = df['ptA'].apply(lambda p: x <= p[0] < x + x_stride and y <= p[1] < y + y_stride and z <= p[2] < z + z_stride)\n", + " ptB_in_range = df['ptB'].apply(lambda p: x <= p[0] < x + x_stride and y <= p[1] < y + y_stride and z <= p[2] < z + z_stride)\n", + " \n", + " # Get indexes where ptA or ptB are in range; if none, skip to next\n", + " indexes_in_range = df[ptA_in_range | ptB_in_range].index\n", + " if len(indexes_in_range) == 0: continue\n", + " \n", + " # Load a block (cutout) of segmentation data\n", + " cutout = cvl[index_resolution, x:x+x_stride, y:y+y_stride, z:z+z_stride]\n", + " cutout = cutout[0] # (use only channel 0)\n", + " cutout_base = Vec3D(x, y, z)\n", + " for index in indexes_in_range: \n", + " pt = df.loc[index, 'ptA']\n", + " if x <= pt[0] < x + x_stride and y < pt[1] < y + y_stride and z < pt[2] < z + z_stride:\n", + " df.at[index, 'segA'] = seg_at_point(Vec3D(*pt), cutout, cutout_base)\n", + " pt = df.loc[index, 'ptB']\n", + " if x <= pt[0] < x + x_stride and y < pt[1] < y + y_stride and z < pt[2] < z + z_stride:\n", + " df.at[index, 'segB'] = seg_at_point(Vec3D(*pt), cutout, cutout_base)\n", + " same_seg = (df['segA'] == df['segB']).sum()\n", + " diff_seg = (df['segA'] != df['segB']).sum()\n", + " print(f'{label} has the same segment on {same_seg} rows, and different segments on {diff_seg} rows.')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "beb3e7f5-8885-4c7a-bd8b-610c7624bda9", + "metadata": {}, + "outputs": [], + "source": [ + "find_segments(df, merge_err_layer_name)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9b53685b-7928-401a-9772-ed644a1059c9", + "metadata": {}, + "outputs": [], + "source": [ + "same_seg = (df['segA'] == df['segB']).sum()\n", + "diff_seg = (df['segA'] != df['segB']).sum()\n", + "print(f'This dataset has the same segment on {same_seg} rows, and different segments on {diff_seg} rows.')\n", + "print(f'So, {diff_seg/len(df)} ={diff_seg}/{len(df)} merge errors fixed.')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d71af9bb-79bd-4976-aea7-af68c02cc637", + "metadata": {}, + "outputs": [], + "source": [ + "# Now let's do the same work for split errors.\n", + "ptA, ptB, segs = nglui.parser.line_annotations(state, split_err_layer_name, linked_segmentations=True)\n", + "for i in range(0, len(segs)):\n", + " if segs[i]: segs[i] = segs[i][0]\n", + "split_df = pd.DataFrame({\"ptA\": ptA, \"ptB\": ptB})\n", + "find_segments(split_df, split_err_layer_name)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82ef7091-b27e-4105-8d0d-8b5a9df25b79", + "metadata": {}, + "outputs": [], + "source": [ + "same_seg = (split_df['segA'] == split_df['segB']).sum()\n", + "diff_seg = (split_df['segA'] != split_df['segB']).sum()\n", + "print(f'This dataset has the same segment on {same_seg} rows, and different segments on {diff_seg} rows.')\n", + "print(f'So, {same_seg/len(split_df)} ={same_seg}/{len(split_df)} split errors fixed.')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a9d24fb2-6449-4cb9-a4db-130cf56eb128", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}