From 00784737f13ab5acc6042df0f81db4669132c1ea Mon Sep 17 00:00:00 2001
From: JoeStrout <joe@strout.net>
Date: Mon, 19 Aug 2024 21:02:45 +0000
Subject: [PATCH] Add three new notebook files.

---
 notebooks/joe/ContactPathAnalysis.ipynb   | 379 ++++++++++++++++++++++
 notebooks/joe/PoleOfInaccessibility.ipynb | 299 +++++++++++++++++
 notebooks/joe/ProspectiveSynapses.ipynb   | 308 ++++++++++++++++++
 3 files changed, 986 insertions(+)
 create mode 100644 notebooks/joe/ContactPathAnalysis.ipynb
 create mode 100644 notebooks/joe/PoleOfInaccessibility.ipynb
 create mode 100644 notebooks/joe/ProspectiveSynapses.ipynb

diff --git a/notebooks/joe/ContactPathAnalysis.ipynb b/notebooks/joe/ContactPathAnalysis.ipynb
new file mode 100644
index 000000000..6b949b871
--- /dev/null
+++ b/notebooks/joe/ContactPathAnalysis.ipynb
@@ -0,0 +1,379 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "778c11e4-9531-436c-9ff6-96eaa2d6cb00",
+   "metadata": {},
+   "source": [
+    "# Contact Path Analysis\n",
+    "\n",
+    "This notebook analyzes a contact between two cells, found by dilating them both by a few pixels and then identifying the region of overlap.  It's a 2D process since in the datasets I've been working with, the Z resolution is so poor that humans routinely work in a 2D view, and the point of all this is to identify putative synapses for human review.\n",
+    "\n",
+    "Goals of the current analysis:\n",
+    "1. Find a good \"midpoint\" for a contact region (which always looks like a thick line).\n",
+    "2. Create a perpendicular line at that point.\n",
+    "\n",
+    "When we draw this perpendicular, it should appear to bisect the line into two approximately equal halves.  (If the line is a loop, then it will cut at some arbitrary position on the loop.)  For our synapse application, this line will represent a putative synapse, connecting the presynaptic and postsynaptic cells at the center of the contact."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f9c239c9-8ffa-44f2-bb9f-d9466380b537",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Imports\n",
+    "from zetta_utils.layer.volumetric.cloudvol import build_cv_layer\n",
+    "from zetta_utils.geometry import Vec3D\n",
+    "import cc3d\n",
+    "from collections import deque\n",
+    "import numpy as np\n",
+    "import zetta_utils.tensor_ops.convert as convert\n",
+    "from PIL import Image\n",
+    "import matplotlib.pyplot as plt\n",
+    "import matplotlib.colors as mcolors\n",
+    "from skimage.morphology import skeletonize\n",
+    "import time"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "dd9f6c5f-3dc5-47d4-a910-964818888e11",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Constants\n",
+    "CONTACT_SEG_PATH = \"gs://tmp_2w/joe/concact-20240801\"\n",
+    "RESOLUTION = (8, 8, 42)  # (nm)\n",
+    "BOUNDS_START = (27991, 21266, 3063)  # (voxels)\n",
+    "BOUNDS_END = (28247, 21522, 3103)\n",
+    "Z = 3081  # particular Z level we want to work with"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e3e5071b-ef2f-44f8-a000-c7efb13da7f5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Load and extract the 2D image at the given Z\n",
+    "cvl = build_cv_layer(path = CONTACT_SEG_PATH)\n",
+    "data = cvl[RESOLUTION, BOUNDS_START[0]:BOUNDS_END[0], BOUNDS_START[1]:BOUNDS_END[1], Z:Z+1]\n",
+    "data = data[0, :, :, 0]\n",
+    "print(f'Loaded {data.shape} image of type {data.dtype}, with {len(np.unique(data))} unique values')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b13c0756-6ed0-4db6-818a-df55c8418284",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Let's plot the data so we can see what it looks like\n",
+    "def plot(data):\n",
+    "    unique_values = np.unique(data)\n",
+    "    qty_values = len(unique_values)\n",
+    "    \n",
+    "    # create appropriately sized color map\n",
+    "    if qty_values > 2:\n",
+    "        colors = plt.colormaps['tab20'](np.linspace(0, 1, qty_values - 1))\n",
+    "        color_list = [(0,0,0)] + [colors[i] for i in range(qty_values-1)]\n",
+    "        cmap = mcolors.ListedColormap(color_list)\n",
+    "    else:\n",
+    "        cmap = 'gray'\n",
+    "    \n",
+    "    # remap unique values to sequential values from 0 - qty_values-1\n",
+    "    value_to_index = {v: i for i, v in enumerate(unique_values)}\n",
+    "    indexed_data = np.vectorize(value_to_index.get)(data)\n",
+    "\n",
+    "    # transpose to match the orientation seen in NG\n",
+    "    indexed_data = np.transpose(indexed_data)\n",
+    "\n",
+    "    # plot the data using the indexed colors\n",
+    "    plt.figure(figsize=(4,4))\n",
+    "    plt.imshow(indexed_data, cmap=cmap, interpolation='nearest')\n",
+    "    plt.show()\n",
+    "plot(data)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3cf57633-42f0-4620-aa55-4ccb8de2002d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Now let's focus on a single value.\n",
+    "CONTACT_ID = 70\n",
+    "contact_data = (data == CONTACT_ID).astype(bool)\n",
+    "plot(contact_data)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4be8f206-f9a5-4c23-9e47-98dc7be9e0cb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Skeletonize this contact region.\n",
+    "skeleton = skeletonize(contact_data)\n",
+    "plot(skeleton)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8632d8e0-aea2-41aa-86a6-bc52700ed1e3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Let's find the endpoints of the skeleton.\n",
+    "# (For now, assume that there are exactly two such; ToDo: deal with gaps (more than 2 endpoints) and loops (0 endpoints).\n",
+    "def find_endpoints(skeleton):\n",
+    "    endpoints = []\n",
+    "    # Define the 8-connectivity structure\n",
+    "    struct = np.array([[1,1,1],\n",
+    "                       [1,0,1],\n",
+    "                       [1,1,1]])\n",
+    "    \n",
+    "    for i in range(1, skeleton.shape[0] - 1):\n",
+    "        for j in range(1, skeleton.shape[1] - 1):\n",
+    "            if skeleton[i, j] == 1:\n",
+    "                # Count neighbors\n",
+    "                neighbors = np.sum(skeleton[i-1:i+2, j-1:j+2] * struct)\n",
+    "                if neighbors == 1:\n",
+    "                    endpoints.append((i, j))\n",
+    "    return endpoints\n",
+    "endpoints = find_endpoints(skeleton)\n",
+    "print(f'Skeleton endpoints: {endpoints}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "29393a09-1ce0-41cb-ace2-77bb15f01b7a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Now trace the path, again using 8-neighbor connectivity.\n",
+    "def trace_path(skeleton, start):\n",
+    "    path = []\n",
+    "    queue = deque([start])\n",
+    "    visited = set()\n",
+    "    \n",
+    "    while queue:\n",
+    "        current = queue.popleft()\n",
+    "        if current in visited:\n",
+    "            continue\n",
+    "        visited.add(current)\n",
+    "        path.append(current)\n",
+    "        \n",
+    "        i, j = current\n",
+    "        # Look at all 8 neighbors\n",
+    "        for ni in range(i-1, i+2):\n",
+    "            for nj in range(j-1, j+2):\n",
+    "                if (ni, nj) != (i, j) and skeleton[ni, nj] == 1 and (ni, nj) not in visited:\n",
+    "                    queue.append((ni, nj))\n",
+    "                    break # Found the next step in the path\n",
+    "    return path\n",
+    "path = trace_path(skeleton, endpoints[0])\n",
+    "midpoint = path[len(path) // 2]\n",
+    "print(f'Traced path of length {len(path)} from {path[0]} to {path[-1]}, with midpoint at {midpoint}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1cbe52b0-ead8-4975-b880-6b616f8c43a3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Estimate the slope at the midpoint, so we can find a perpendicular."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6d7c6801-2a3e-4bbc-b71d-603113202964",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def estimate_slope(path, midpoint, window=3):\n",
+    "    mid_index = path.index(midpoint)\n",
+    "    \n",
+    "    # Define indices for a small segment around the midpoint\n",
+    "    start_index = max(0, mid_index - window)\n",
+    "    end_index = min(len(path) - 1, mid_index + window)\n",
+    "    \n",
+    "    # Coordinates of the start and end points of the segment\n",
+    "    start_point = path[start_index]\n",
+    "    end_point = path[end_index]\n",
+    "    \n",
+    "    # Calculate the vector (dx, dy)\n",
+    "    dx = end_point[0] - start_point[0]\n",
+    "    dy = end_point[1] - start_point[1]\n",
+    "    \n",
+    "    # Normalize the vector to avoid scaling issues\n",
+    "    length = np.hypot(dx, dy)\n",
+    "    if length != 0:\n",
+    "        dx /= length\n",
+    "        dy /= length\n",
+    "    \n",
+    "    return dx, dy\n",
+    "dx, dy = estimate_slope(path, midpoint)\n",
+    "print(f'Slope at midpoint: {(dx, dy)}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a723f889-50aa-44ea-8ab0-f325eea230d3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# define a bisecting line (which could also be a putative synapse!)\n",
+    "hl = 10  # line half-length\n",
+    "line = ((midpoint[0]-dy*hl, midpoint[1]+dx*hl), (midpoint[0]+dy*hl, midpoint[1]-dx*hl), 'red')\n",
+    "print(f'Bisecting line: {line}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b4ae6c0e-b9e7-4c9c-9ecc-2e074b229ab8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Let's expand our plot function so it can include a set of lines.\n",
+    "# Each line defined as (xy0, xy1, color).\n",
+    "def plot(data, lines=[]):\n",
+    "    unique_values = np.unique(data)\n",
+    "    qty_values = len(unique_values)\n",
+    "    \n",
+    "    # create appropriately sized color map\n",
+    "    if qty_values > 2:\n",
+    "        colors = plt.colormaps['tab20'](np.linspace(0, 1, qty_values - 1))\n",
+    "        color_list = [(0,0,0)] + [colors[i] for i in range(qty_values-1)]\n",
+    "        cmap = mcolors.ListedColormap(color_list)\n",
+    "    else:\n",
+    "        cmap = 'gray'\n",
+    "    \n",
+    "    # remap unique values to sequential values from 0 - qty_values-1\n",
+    "    value_to_index = {v: i for i, v in enumerate(unique_values)}\n",
+    "    indexed_data = np.vectorize(value_to_index.get)(data)\n",
+    "\n",
+    "    # transpose to match the orientation seen in NG\n",
+    "    indexed_data = np.transpose(indexed_data)\n",
+    "\n",
+    "    # plot the data using the indexed colors\n",
+    "    plt.figure(figsize=(4,4))\n",
+    "    plt.imshow(indexed_data, cmap=cmap, interpolation='nearest')\n",
+    "\n",
+    "    # additional lines: plot 'em if you got 'em\n",
+    "    for line in lines:\n",
+    "        (x0, y0), (x1, y1), color = line\n",
+    "        plt.plot([x0, x1], [y0, y1], color=color, linewidth=2)\n",
+    "    plt.show()\n",
+    "plot(contact_data, [line])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "80c0fa3f-1481-40dc-a322-42cce93f9de7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Let's wrap all that up into a single method, and try it on a couple other contacts.\n",
+    "def plot_bisection(data, contact_id):\n",
+    "    contact_data = (data == contact_id).astype(bool)\n",
+    "    skeleton = skeletonize(contact_data)\n",
+    "    endpoints = find_endpoints(skeleton)\n",
+    "    path = trace_path(skeleton, endpoints[0])\n",
+    "    midpoint = path[len(path) // 2]\n",
+    "    print(f'Traced path of length {len(path)} from {path[0]} to {path[-1]}, with midpoint at {midpoint}')\n",
+    "    dx, dy = estimate_slope(path, midpoint)\n",
+    "    hl = 10  # line half-length\n",
+    "    line = ((midpoint[0]-dy*hl, midpoint[1]+dx*hl), (midpoint[0]+dy*hl, midpoint[1]-dx*hl), 'red')\n",
+    "    plot(contact_data, [line])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "03691dd6-7f64-4728-8b57-16f4c64964e4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plot_bisection(data, 71)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c3c9e497-22fe-41f7-acd3-1aca867bec50",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plot_bisection(data, 108)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8f71073d-7eed-41f7-8e23-3ee7d9ec27f3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plot_bisection(data, 67)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ec89c2aa-d384-420b-8fa2-fe1e4c751a7e",
+   "metadata": {},
+   "source": [
+    "## Future Work\n",
+    "\n",
+    "- The path extraction probably does not deal well with gaps.  When we have more than two endpoints, we might need to try tracing the path from each of them, and pick the longest path for bisection.\n",
+    "- The last test above does seem to handle a loop acceptably, but it's not clear why the skeleton has any endpoints at all in this case.  It's worth digging into that more to ensure it's robust.\n",
+    "- The perpendicular uses an arbitrary window to compute the local slope.  We could probably do better by averaging over several different window sizes.\n",
+    "- All this still needs to be wrapped up as part of a larger analysis loop, which finds perpendiculars for every contact and ensures that the endpoints are in the right cell (using the cell segmentation layer), and outputs those in NG or Precomputed format."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "86fbfc3c-f96e-4291-9c62-8ce0f56191a4",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/notebooks/joe/PoleOfInaccessibility.ipynb b/notebooks/joe/PoleOfInaccessibility.ipynb
new file mode 100644
index 000000000..15889e278
--- /dev/null
+++ b/notebooks/joe/PoleOfInaccessibility.ipynb
@@ -0,0 +1,299 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "62cfd58c-7857-427d-931b-6dd83d3ef3e2",
+   "metadata": {},
+   "source": [
+    "# A Better Center\n",
+    "\n",
+    "This notebook demonstrates how to calculate the **\"Pole of Inaccessibility\"**, which may be a better notion of \"center\" for odd (especially concave) shapes.\n",
+    "\n",
+    "Reference: https://en.wikipedia.org/wiki/Pole_of_inaccessibility"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c039b5a5-10b8-4913-8617-67deb9003e0b",
+   "metadata": {},
+   "source": [
+    "Let's start by loading an image from a GS bucket.\n",
+    "This will represent a supervoxel, for example."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "589851f1-75e3-4fd9-94d6-90d4bed3e691",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from google.cloud import storage\n",
+    "from PIL import Image\n",
+    "import numpy as np\n",
+    "import io\n",
+    "import matplotlib.pyplot as plt\n",
+    "from scipy.spatial import distance\n",
+    "from skimage.morphology import binary_dilation\n",
+    "from skimage.measure import label\n",
+    "\n",
+    "def download_image_from_gcs(bucket_name, source_blob_name):\n",
+    "    \"\"\"Downloads an image file from Google Cloud Storage and returns it as a PIL Image.\"\"\"\n",
+    "    storage_client = storage.Client()\n",
+    "    bucket = storage_client.bucket(bucket_name)\n",
+    "    blob = bucket.blob(source_blob_name)\n",
+    "    image_data = blob.download_as_bytes()\n",
+    "    return Image.open(io.BytesIO(image_data))\n",
+    "\n",
+    "# Specify the bucket and image path\n",
+    "bucket_name = \"joe_exp\"\n",
+    "source_blob_name = \"shapes/shape_0.png\"\n",
+    "\n",
+    "# Download the image\n",
+    "image = download_image_from_gcs(bucket_name, source_blob_name)\n",
+    "\n",
+    "# Convert the image to a NumPy array\n",
+    "image_array = np.array(image)\n",
+    "\n",
+    "# Threshold the image at a value of 128, making a binary image (1 for the interior, 0 for the exterior)\n",
+    "threshold_value = 128\n",
+    "binary_image_array = (image_array < threshold_value).astype(np.uint8)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b016e720-4186-49c3-b3ce-852c72505c55",
+   "metadata": {},
+   "source": [
+    "Now let's get a list of all the interior coordinates, and the perimeter coordinates."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "13252a9f-b2ea-4809-b392-f26d36820bd9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Find the interior pixels (all pixels with a value of 1)\n",
+    "interior_coords = np.argwhere(binary_image_array == 1)\n",
+    "\n",
+    "# Find the perimeter pixels\n",
+    "dilated_image = binary_dilation(binary_image_array)\n",
+    "perimeter_image = dilated_image ^ binary_image_array  # XOR to find perimeter pixels\n",
+    "perimeter_coords = np.argwhere(perimeter_image == 1)\n",
+    "\n",
+    "# Convert the coordinates to lists of tuples\n",
+    "interior_coords = [tuple(coord) for coord in interior_coords]\n",
+    "perimeter_coords = [tuple(coord) for coord in perimeter_coords]\n",
+    "\n",
+    "print(\"Number of interior pixels:\", len(interior_coords))\n",
+    "print(\"Number of perimeter pixels:\", len(perimeter_coords))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "92dd473e-273a-41ab-b286-8642dc5ea64f",
+   "metadata": {},
+   "source": [
+    "With the list of interior coordinates, calculating the centroid is trivial.\n",
+    "(It's just the average coordinate.)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "91fc7ac2-583a-42e1-b9a8-c14c17b94523",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "centroid = np.mean(interior_coords, axis=0)\n",
+    "print(\"Centroid:\", centroid)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a92e127b-86cb-48a6-b5a5-8f59d3c64147",
+   "metadata": {},
+   "source": [
+    "But now let's calculate the _Pole of Inaccessibility_ -- that is, the interior point\n",
+    "which is farthest away from the nearest perimeter point (so, most inaccessible)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f0bf9812-27eb-4ee8-920b-3421104aba94",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Calculate the distance between each interior pixel and all perimeter pixels\n",
+    "distances = distance.cdist(interior_coords, perimeter_coords, metric='euclidean')\n",
+    "\n",
+    "# Find the minimum distance to a perimeter pixel for each interior pixel\n",
+    "min_distances = distances.min(axis=1)\n",
+    "\n",
+    "# Identify the interior pixel with the maximum of these minimum distances\n",
+    "max_min_distance_index = np.argmax(min_distances)\n",
+    "pole_of_inaccessibility = interior_coords[max_min_distance_index]\n",
+    "\n",
+    "print(\"Pole of Inaccessibility:\", pole_of_inaccessibility)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "901f6ca1-dd1e-45a1-9b45-1460ce45b08a",
+   "metadata": {},
+   "source": [
+    "And now plot both types of center, so we can see."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e8070d42-fedf-4f9f-8214-0db06d51fd07",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def show_result(binary_image_array, centroid, pole, legend_pos=(1, 0.5)):\n",
+    "    # Plot the binary image\n",
+    "    plt.imshow(binary_image_array, cmap='gray')\n",
+    "    #plt.title(\"Binary Image with Centroid and Pole of Inaccessibility\")\n",
+    "    plt.axis('off')  # Hide the axis\n",
+    "    \n",
+    "    # Plot the centroid\n",
+    "    plt.scatter(*centroid[::-1], color='red', label='Centroid', marker='x', s=100)\n",
+    "    \n",
+    "    # Plot the pole of inaccessibility\n",
+    "    plt.scatter(*pole[::-1], color='blue', label='Pole of Inaccessibility', marker='o', s=100)\n",
+    "    \n",
+    "    if legend_pos is not None:\n",
+    "        plt.legend(loc='center left', bbox_to_anchor=legend_pos)\n",
+    "\n",
+    "    # Adjust the layout to make space for the legend, and show it\n",
+    "    plt.tight_layout()\n",
+    "    plt.show()\n",
+    "show_result(binary_image_array, centroid, pole_of_inaccessibility)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "42dbd039-17c7-429a-b19f-e075b021fecc",
+   "metadata": {},
+   "source": [
+    "## Let's Encapsulate\n",
+    "\n",
+    "The above developed & demonstrated the algorithm step by step.  But now let's make a nice neat function to return the pole for any binary image (ndarray).  \"Pole of inaccessibility\" is an overly dramatic and wordy name, so we'll just call it the \"center pole.\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3e70e9e6-bea3-4a2f-ac3c-6dab17643aa3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def center_pole(binary_image_array):\n",
+    "    # Find the interior pixels (all pixels with a value of 1), and the surrounding perimiter pixels\n",
+    "    interior_coords = np.argwhere(binary_image_array == 1)\n",
+    "\n",
+    "    # Find the perimeter pixels\n",
+    "    dilated_image = binary_dilation(binary_image_array)\n",
+    "    perimeter_image = dilated_image ^ binary_image_array  # XOR to find perimeter pixels\n",
+    "    perimeter_coords = np.argwhere(perimeter_image == 1)\n",
+    "\n",
+    "    # Convert the coordinates to lists of tuples (needed by cdist)\n",
+    "    interior_coords = [tuple(coord) for coord in interior_coords]\n",
+    "    perimeter_coords = [tuple(coord) for coord in perimeter_coords]\n",
+    "\n",
+    "    # Calculate the distance between each interior pixel and all perimeter pixels\n",
+    "    distances = distance.cdist(interior_coords, perimeter_coords, metric='euclidean')\n",
+    "    \n",
+    "    # Find the minimum distance to a perimeter pixel for each interior pixel\n",
+    "    min_distances = distances.min(axis=1)\n",
+    "    \n",
+    "    # Identify the interior pixel with the maximum of these minimum distances\n",
+    "    max_min_distance_index = np.argmax(min_distances)\n",
+    "    pole_of_inaccessibility = interior_coords[max_min_distance_index]\n",
+    "    return pole_of_inaccessibility"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cfb0e2c0-95d0-40a2-9ffa-7e1efba6eb64",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def centroid(binary_image_array):\n",
+    "    # Find the interior pixels (all pixels with a value of 1),\n",
+    "    # and average to find the centroid\n",
+    "    interior_coords = np.argwhere(binary_image_array == 1)\n",
+    "    centroid = np.mean(interior_coords, axis=0)\n",
+    "    return centroid"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a61c283f-9632-4f87-a2d9-0c35904cde48",
+   "metadata": {},
+   "source": [
+    "And let's demonstrate it on a few more examples."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "58c54291-7ba7-4c48-8ffa-62ba8c65402c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def load_binary(source_blob_name, threshold_value=128):\n",
+    "    image = download_image_from_gcs(bucket_name, source_blob_name)\n",
+    "    image_array = np.array(image)\n",
+    "    return (image_array < threshold_value).astype(np.uint8)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b39fe340-7d57-4ae4-b697-d5404164373f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for i in range(1, 5):\n",
+    "    img = load_binary(f'shapes/shape_{i}.png')\n",
+    "    show_result(img, centroid(img), center_pole(img))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9303d1f3-b7ac-4291-90a1-9688683dc99f",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/notebooks/joe/ProspectiveSynapses.ipynb b/notebooks/joe/ProspectiveSynapses.ipynb
new file mode 100644
index 000000000..78273f94f
--- /dev/null
+++ b/notebooks/joe/ProspectiveSynapses.ipynb
@@ -0,0 +1,308 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "10e54712-6b4f-4f5d-b1c7-6b93b91c5d27",
+   "metadata": {},
+   "source": [
+    "# Prospective Synapses\n",
+    "\n",
+    "This notebook analyzes all the contact points (already found by a script such as find-contacts.py)\n",
+    "within a small volume, and for each one, constructs a line segment representing a possible synapse.\n",
+    "These are written in NG state format, so that in Neuroglancer, a domain expert can review each one\n",
+    "and mark it as Synapse, Not Synapse, or Unclear (using the Description field)."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "7a041664-70f8-405f-88ff-4a3c40e3b1ff",
+   "metadata": {},
+   "source": [
+    "See the ContactPathAnalysis notebook for a more detailed breakdown of the steps in constructing a single line bisecting a given contact."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c2078f36-a4bb-49b8-a468-86021da4629a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Imports\n",
+    "from zetta_utils.layer.volumetric.cloudvol import build_cv_layer\n",
+    "from zetta_utils.geometry import Vec3D\n",
+    "import cc3d\n",
+    "from collections import deque\n",
+    "import numpy as np\n",
+    "import zetta_utils.tensor_ops.convert as convert\n",
+    "from PIL import Image\n",
+    "import matplotlib.pyplot as plt\n",
+    "import matplotlib.colors as mcolors\n",
+    "from skimage.morphology import skeletonize\n",
+    "import time"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e130596e-e5d8-41f2-b936-0be3a47126d4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Constants\n",
+    "CONTACT_SEG_PATH = \"gs://tmp_2w/joe/concact-20240816\"\n",
+    "RESOLUTION = (16, 16, 42)  # (nm)\n",
+    "BOUNDS_START = (13995, 10633, 3063)  # (voxels)\n",
+    "BOUNDS_END = (14123, 10761, 3103)\n",
+    "CELL_SEG_PATH = \"gs://dkronauer-ant-001-kisuk/test/240507-finetune-z1400-3400/seg\"\n",
+    "PRESYN_CELL_ID = 74170031222807106"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a6aee9f3-d2db-46dc-8a09-250963d7b303",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Load and extract the 3D volume of the contact segmentation layer\n",
+    "cvl = build_cv_layer(path = CONTACT_SEG_PATH)\n",
+    "data = cvl[RESOLUTION, BOUNDS_START[0]:BOUNDS_END[0], BOUNDS_START[1]:BOUNDS_END[1], BOUNDS_START[2]:BOUNDS_END[2]]\n",
+    "data = data[0] # (ignore dimension 0, channel)\n",
+    "print(f'Contacts: Loaded {data.shape} image of type {data.dtype}, with {len(np.unique(data))} unique values')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cb1cbdf1-52ba-44b2-a2b6-02ab84ad6d72",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# And load the cell segmentation layer.\n",
+    "cell_vol = build_cv_layer(path = CELL_SEG_PATH)\n",
+    "cell_data = cell_vol[RESOLUTION, BOUNDS_START[0]:BOUNDS_END[0], BOUNDS_START[1]:BOUNDS_END[1], BOUNDS_START[2]:BOUNDS_END[2]]\n",
+    "cell_data = cell_data[0] # (ignore dimension 0, channel)\n",
+    "print(f'Cells: Loaded {cell_data.shape} image of type {data.dtype}, with {len(np.unique(cell_data))} unique values')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "52ba5b0e-4f2f-4ad9-851b-efe0fa59a82c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Define a function to find the Z value that contains the most of a given contact ID.\n",
+    "def z_of_max_count(array: np.ndarray, value: int) -> int:\n",
+    "    counts = np.sum(array == value, axis=(0, 1))\n",
+    "    return np.argmax(counts)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ccd09518-3009-4b30-b7d8-5ae2fcdd0160",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def find_endpoints(skeleton):\n",
+    "    endpoints = []\n",
+    "    # Define the 8-connectivity structure\n",
+    "    struct = np.array([[1,1,1],\n",
+    "                       [1,0,1],\n",
+    "                       [1,1,1]])\n",
+    "    \n",
+    "    for i in range(1, skeleton.shape[0] - 1):\n",
+    "        for j in range(1, skeleton.shape[1] - 1):\n",
+    "            if skeleton[i, j] == 1:\n",
+    "                # Count neighbors\n",
+    "                neighbors = np.sum(skeleton[i-1:i+2, j-1:j+2] * struct)\n",
+    "                if neighbors == 1:\n",
+    "                    endpoints.append((i, j))\n",
+    "    if not endpoints:\n",
+    "        # No endpoints?  Must be a loop!  Return an arbitrary point\n",
+    "        skel_points = np.argwhere(skeleton)\n",
+    "        if len(skel_points) < 3:\n",
+    "            return None\n",
+    "        i = len(skel_points) // 2\n",
+    "        endpoints = [tuple(skel_points[i]), tuple(skel_points[i+1])]\n",
+    "    return endpoints\n",
+    "\n",
+    "def trace_path(skeleton, start):\n",
+    "    path = []\n",
+    "    queue = deque([start])\n",
+    "    visited = set()\n",
+    "    \n",
+    "    while queue:\n",
+    "        current = queue.popleft()\n",
+    "        if current in visited:\n",
+    "            continue\n",
+    "        visited.add(current)\n",
+    "        path.append(current)\n",
+    "        \n",
+    "        i, j = current\n",
+    "        # Look at all 8 neighbors\n",
+    "        for ni in range(i-1, i+2):\n",
+    "            for nj in range(j-1, j+2):\n",
+    "                if (ni, nj) != (i, j) and skeleton[ni, nj] == 1 and (ni, nj) not in visited:\n",
+    "                    queue.append((ni, nj))\n",
+    "                    break # Found the next step in the path\n",
+    "    return path\n",
+    "\n",
+    "def estimate_slope(path, midpoint, window=3):\n",
+    "    mid_index = path.index(midpoint)\n",
+    "    \n",
+    "    # Define indices for a small segment around the midpoint\n",
+    "    start_index = max(0, mid_index - window)\n",
+    "    end_index = min(len(path) - 1, mid_index + window)\n",
+    "    \n",
+    "    # Coordinates of the start and end points of the segment\n",
+    "    start_point = path[start_index]\n",
+    "    end_point = path[end_index]\n",
+    "    \n",
+    "    # Calculate the vector (dx, dy)\n",
+    "    dx = end_point[0] - start_point[0]\n",
+    "    dy = end_point[1] - start_point[1]\n",
+    "    \n",
+    "    # Normalize the vector to avoid scaling issues\n",
+    "    length = np.hypot(dx, dy)\n",
+    "    if length != 0:\n",
+    "        dx /= length\n",
+    "        dy /= length\n",
+    "    \n",
+    "    return dx, dy\n",
+    "\n",
+    "def estimate_avg_slope(path, midpoint):\n",
+    "    dx_sum, dy_sum = 0, 0\n",
+    "    count = 0\n",
+    "    for window in range(1,5):\n",
+    "        dx, dy = estimate_slope(path, midpoint, window)\n",
+    "        dx_sum += dx\n",
+    "        dy_sum += dy\n",
+    "        count += 1\n",
+    "    return dx_sum / count, dy_sum / count\n",
+    "\n",
+    "def calc_bisection(data2D, contact_id):\n",
+    "    contact_data = (data2D == contact_id).astype(bool)\n",
+    "    skeleton = skeletonize(contact_data)\n",
+    "    endpoints = find_endpoints(skeleton)\n",
+    "    if not endpoints:\n",
+    "        return None\n",
+    "    path = trace_path(skeleton, endpoints[0])\n",
+    "    midpoint = path[len(path) // 2]\n",
+    "    dx, dy = estimate_avg_slope(path, midpoint)\n",
+    "    hl = 3  # line half-length\n",
+    "    line = ((round(midpoint[0]-dy*hl), round(midpoint[1]+dx*hl)), \n",
+    "            (round(midpoint[0]+dy*hl), round(midpoint[1]-dx*hl)), '')\n",
+    "    return line"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2338bf0e-bbcd-4889-b045-cdbb5fc335e7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Define a function to return the 3D line for a given contact ID.\n",
+    "def compute_line(data3D: np.ndarray, cell_data: np.ndarray, contact_id: int):\n",
+    "    \"\"\"\n",
+    "    Examine the 2D image at the given z (relative to our bounds), find the segment in the given\n",
+    "    array identified by contact_id, and calculate a short line segment that bisects that segment.\n",
+    "    Return this in the form ((x0,y0), (x1,y1), description).\n",
+    "    \"\"\"\n",
+    "    best_z = z_of_max_count(data3D, contact_id)\n",
+    "    image = data3D[:, :, best_z]\n",
+    "    line2D = calc_bisection(image, contact_id)\n",
+    "    if not line2D:\n",
+    "        return None\n",
+    "    # Look up the cell ID on either end of the line as well, then reorder the points\n",
+    "    # so that the first point is the postsynaptic cell, and the second point is presynaptic.\n",
+    "    cell_img = cell_data[:, :, best_z]\n",
+    "    cell_A = cell_img[line2D[0]]\n",
+    "    cell_B = cell_img[line2D[1]]\n",
+    "    #print(f'Found putative synapse between {cell_A} and {cell_B}')\n",
+    "    if cell_A == PRESYN_CELL_ID:\n",
+    "        line2D = (line2D[1], line2D[0], line2D[2])\n",
+    "    line3D = (line2D[0] + (best_z,), line2D[1] + (best_z,), line2D[2])\n",
+    "    return line3D"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "70f409d8-708e-40d9-9538-620cb7365cff",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Now let's find the 3D lines for all our IDs.  Store them in a list,\n",
+    "# where each element is (id, start, end, description)\n",
+    "lines = []\n",
+    "for id in np.unique(data):\n",
+    "    if id == 0: continue\n",
+    "    line = compute_line(data, cell_data, id)\n",
+    "    if line:\n",
+    "        lines.append ((id,) + line)\n",
+    "    else:\n",
+    "        print(f\"Couldn't find valid line for contact {id}\")\n",
+    "print(f'Found {len(lines)} putative synapses, such as:')\n",
+    "print(lines[0])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "891fda93-304e-4537-9b02-6205c766aa9c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Print them in JSON format, suitable for pasting into Neuroglancer.\n",
+    "z_offset = 0.5  # (needed because NG actually displays 0.5 units off of where it claims)\n",
+    "print('\"annotations\": [')\n",
+    "for i, line in enumerate(lines):\n",
+    "    id, pos_A, pos_B, desc = line\n",
+    "    pos_A = [pos_A[0] + BOUNDS_START[0], pos_A[1] + BOUNDS_START[1],  pos_A[2] + BOUNDS_START[2] + z_offset]\n",
+    "    pos_B = [pos_B[0] + BOUNDS_START[0], pos_B[1] + BOUNDS_START[1],  pos_B[2] + BOUNDS_START[2] + z_offset]\n",
+    "    print('{')\n",
+    "    print(f'\"pointA\": {pos_A},')\n",
+    "    print(f'\"pointB\": {pos_B},')\n",
+    "    print('\"type\": \"line\",')\n",
+    "    if desc:\n",
+    "        print(f'\"description\": \"{desc}\",')\n",
+    "    print(f'\"id\": \"{id}\"')\n",
+    "    print('},' if i < len(lines)-1 else '}')\n",
+    "print(\"],\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d7139023-6a9b-4366-9169-c19d54e5232a",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}