Add three new notebook files.

ZettaAI · Aug 19, 2024 · 0078473 · 0078473
1 parent 3051644
commit 0078473
Show file tree

Hide file tree

Showing 3 changed files with 986 additions and 0 deletions.
diff --git a/notebooks/joe/ContactPathAnalysis.ipynb b/notebooks/joe/ContactPathAnalysis.ipynb
@@ -0,0 +1,379 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "778c11e4-9531-436c-9ff6-96eaa2d6cb00",
+   "metadata": {},
+   "source": [
+    "# Contact Path Analysis\n",
+    "\n",
+    "This notebook analyzes a contact between two cells, found by dilating them both by a few pixels and then identifying the region of overlap.  It's a 2D process since in the datasets I've been working with, the Z resolution is so poor that humans routinely work in a 2D view, and the point of all this is to identify putative synapses for human review.\n",
+    "\n",
+    "Goals of the current analysis:\n",
+    "1. Find a good \"midpoint\" for a contact region (which always looks like a thick line).\n",
+    "2. Create a perpendicular line at that point.\n",
+    "\n",
+    "When we draw this perpendicular, it should appear to bisect the line into two approximately equal halves.  (If the line is a loop, then it will cut at some arbitrary position on the loop.)  For our synapse application, this line will represent a putative synapse, connecting the presynaptic and postsynaptic cells at the center of the contact."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f9c239c9-8ffa-44f2-bb9f-d9466380b537",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Imports\n",
+    "from zetta_utils.layer.volumetric.cloudvol import build_cv_layer\n",
+    "from zetta_utils.geometry import Vec3D\n",
+    "import cc3d\n",
+    "from collections import deque\n",
+    "import numpy as np\n",
+    "import zetta_utils.tensor_ops.convert as convert\n",
+    "from PIL import Image\n",
+    "import matplotlib.pyplot as plt\n",
+    "import matplotlib.colors as mcolors\n",
+    "from skimage.morphology import skeletonize\n",
+    "import time"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "dd9f6c5f-3dc5-47d4-a910-964818888e11",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Constants\n",
+    "CONTACT_SEG_PATH = \"gs://tmp_2w/joe/concact-20240801\"\n",
+    "RESOLUTION = (8, 8, 42)  # (nm)\n",
+    "BOUNDS_START = (27991, 21266, 3063)  # (voxels)\n",
+    "BOUNDS_END = (28247, 21522, 3103)\n",
+    "Z = 3081  # particular Z level we want to work with"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e3e5071b-ef2f-44f8-a000-c7efb13da7f5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Load and extract the 2D image at the given Z\n",
+    "cvl = build_cv_layer(path = CONTACT_SEG_PATH)\n",
+    "data = cvl[RESOLUTION, BOUNDS_START[0]:BOUNDS_END[0], BOUNDS_START[1]:BOUNDS_END[1], Z:Z+1]\n",
+    "data = data[0, :, :, 0]\n",
+    "print(f'Loaded {data.shape} image of type {data.dtype}, with {len(np.unique(data))} unique values')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b13c0756-6ed0-4db6-818a-df55c8418284",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Let's plot the data so we can see what it looks like\n",
+    "def plot(data):\n",
+    "    unique_values = np.unique(data)\n",
+    "    qty_values = len(unique_values)\n",
+    "    \n",
+    "    # create appropriately sized color map\n",
+    "    if qty_values > 2:\n",
+    "        colors = plt.colormaps['tab20'](np.linspace(0, 1, qty_values - 1))\n",
+    "        color_list = [(0,0,0)] + [colors[i] for i in range(qty_values-1)]\n",
+    "        cmap = mcolors.ListedColormap(color_list)\n",
+    "    else:\n",
+    "        cmap = 'gray'\n",
+    "    \n",
+    "    # remap unique values to sequential values from 0 - qty_values-1\n",
+    "    value_to_index = {v: i for i, v in enumerate(unique_values)}\n",
+    "    indexed_data = np.vectorize(value_to_index.get)(data)\n",
+    "\n",
+    "    # transpose to match the orientation seen in NG\n",
+    "    indexed_data = np.transpose(indexed_data)\n",
+    "\n",
+    "    # plot the data using the indexed colors\n",
+    "    plt.figure(figsize=(4,4))\n",
+    "    plt.imshow(indexed_data, cmap=cmap, interpolation='nearest')\n",
+    "    plt.show()\n",
+    "plot(data)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3cf57633-42f0-4620-aa55-4ccb8de2002d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Now let's focus on a single value.\n",
+    "CONTACT_ID = 70\n",
+    "contact_data = (data == CONTACT_ID).astype(bool)\n",
+    "plot(contact_data)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4be8f206-f9a5-4c23-9e47-98dc7be9e0cb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Skeletonize this contact region.\n",
+    "skeleton = skeletonize(contact_data)\n",
+    "plot(skeleton)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8632d8e0-aea2-41aa-86a6-bc52700ed1e3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Let's find the endpoints of the skeleton.\n",
+    "# (For now, assume that there are exactly two such; ToDo: deal with gaps (more than 2 endpoints) and loops (0 endpoints).\n",
+    "def find_endpoints(skeleton):\n",
+    "    endpoints = []\n",
+    "    # Define the 8-connectivity structure\n",
+    "    struct = np.array([[1,1,1],\n",
+    "                       [1,0,1],\n",
+    "                       [1,1,1]])\n",
+    "    \n",
+    "    for i in range(1, skeleton.shape[0] - 1):\n",
+    "        for j in range(1, skeleton.shape[1] - 1):\n",
+    "            if skeleton[i, j] == 1:\n",
+    "                # Count neighbors\n",
+    "                neighbors = np.sum(skeleton[i-1:i+2, j-1:j+2] * struct)\n",
+    "                if neighbors == 1:\n",
+    "                    endpoints.append((i, j))\n",
+    "    return endpoints\n",
+    "endpoints = find_endpoints(skeleton)\n",
+    "print(f'Skeleton endpoints: {endpoints}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "29393a09-1ce0-41cb-ace2-77bb15f01b7a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Now trace the path, again using 8-neighbor connectivity.\n",
+    "def trace_path(skeleton, start):\n",
+    "    path = []\n",
+    "    queue = deque([start])\n",
+    "    visited = set()\n",
+    "    \n",
+    "    while queue:\n",
+    "        current = queue.popleft()\n",
+    "        if current in visited:\n",
+    "            continue\n",
+    "        visited.add(current)\n",
+    "        path.append(current)\n",
+    "        \n",
+    "        i, j = current\n",
+    "        # Look at all 8 neighbors\n",
+    "        for ni in range(i-1, i+2):\n",
+    "            for nj in range(j-1, j+2):\n",
+    "                if (ni, nj) != (i, j) and skeleton[ni, nj] == 1 and (ni, nj) not in visited:\n",
+    "                    queue.append((ni, nj))\n",
+    "                    break # Found the next step in the path\n",
+    "    return path\n",
+    "path = trace_path(skeleton, endpoints[0])\n",
+    "midpoint = path[len(path) // 2]\n",
+    "print(f'Traced path of length {len(path)} from {path[0]} to {path[-1]}, with midpoint at {midpoint}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1cbe52b0-ead8-4975-b880-6b616f8c43a3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Estimate the slope at the midpoint, so we can find a perpendicular."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6d7c6801-2a3e-4bbc-b71d-603113202964",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def estimate_slope(path, midpoint, window=3):\n",
+    "    mid_index = path.index(midpoint)\n",
+    "    \n",
+    "    # Define indices for a small segment around the midpoint\n",
+    "    start_index = max(0, mid_index - window)\n",
+    "    end_index = min(len(path) - 1, mid_index + window)\n",
+    "    \n",
+    "    # Coordinates of the start and end points of the segment\n",
+    "    start_point = path[start_index]\n",
+    "    end_point = path[end_index]\n",
+    "    \n",
+    "    # Calculate the vector (dx, dy)\n",
+    "    dx = end_point[0] - start_point[0]\n",
+    "    dy = end_point[1] - start_point[1]\n",
+    "    \n",
+    "    # Normalize the vector to avoid scaling issues\n",
+    "    length = np.hypot(dx, dy)\n",
+    "    if length != 0:\n",
+    "        dx /= length\n",
+    "        dy /= length\n",
+    "    \n",
+    "    return dx, dy\n",
+    "dx, dy = estimate_slope(path, midpoint)\n",
+    "print(f'Slope at midpoint: {(dx, dy)}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a723f889-50aa-44ea-8ab0-f325eea230d3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# define a bisecting line (which could also be a putative synapse!)\n",
+    "hl = 10  # line half-length\n",
+    "line = ((midpoint[0]-dy*hl, midpoint[1]+dx*hl), (midpoint[0]+dy*hl, midpoint[1]-dx*hl), 'red')\n",
+    "print(f'Bisecting line: {line}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b4ae6c0e-b9e7-4c9c-9ecc-2e074b229ab8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Let's expand our plot function so it can include a set of lines.\n",
+    "# Each line defined as (xy0, xy1, color).\n",
+    "def plot(data, lines=[]):\n",
+    "    unique_values = np.unique(data)\n",
+    "    qty_values = len(unique_values)\n",
+    "    \n",
+    "    # create appropriately sized color map\n",
+    "    if qty_values > 2:\n",
+    "        colors = plt.colormaps['tab20'](np.linspace(0, 1, qty_values - 1))\n",
+    "        color_list = [(0,0,0)] + [colors[i] for i in range(qty_values-1)]\n",
+    "        cmap = mcolors.ListedColormap(color_list)\n",
+    "    else:\n",
+    "        cmap = 'gray'\n",
+    "    \n",
+    "    # remap unique values to sequential values from 0 - qty_values-1\n",
+    "    value_to_index = {v: i for i, v in enumerate(unique_values)}\n",
+    "    indexed_data = np.vectorize(value_to_index.get)(data)\n",
+    "\n",
+    "    # transpose to match the orientation seen in NG\n",
+    "    indexed_data = np.transpose(indexed_data)\n",
+    "\n",
+    "    # plot the data using the indexed colors\n",
+    "    plt.figure(figsize=(4,4))\n",
+    "    plt.imshow(indexed_data, cmap=cmap, interpolation='nearest')\n",
+    "\n",
+    "    # additional lines: plot 'em if you got 'em\n",
+    "    for line in lines:\n",
+    "        (x0, y0), (x1, y1), color = line\n",
+    "        plt.plot([x0, x1], [y0, y1], color=color, linewidth=2)\n",
+    "    plt.show()\n",
+    "plot(contact_data, [line])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "80c0fa3f-1481-40dc-a322-42cce93f9de7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Let's wrap all that up into a single method, and try it on a couple other contacts.\n",
+    "def plot_bisection(data, contact_id):\n",
+    "    contact_data = (data == contact_id).astype(bool)\n",
+    "    skeleton = skeletonize(contact_data)\n",
+    "    endpoints = find_endpoints(skeleton)\n",
+    "    path = trace_path(skeleton, endpoints[0])\n",
+    "    midpoint = path[len(path) // 2]\n",
+    "    print(f'Traced path of length {len(path)} from {path[0]} to {path[-1]}, with midpoint at {midpoint}')\n",
+    "    dx, dy = estimate_slope(path, midpoint)\n",
+    "    hl = 10  # line half-length\n",
+    "    line = ((midpoint[0]-dy*hl, midpoint[1]+dx*hl), (midpoint[0]+dy*hl, midpoint[1]-dx*hl), 'red')\n",
+    "    plot(contact_data, [line])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "03691dd6-7f64-4728-8b57-16f4c64964e4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plot_bisection(data, 71)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c3c9e497-22fe-41f7-acd3-1aca867bec50",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plot_bisection(data, 108)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8f71073d-7eed-41f7-8e23-3ee7d9ec27f3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plot_bisection(data, 67)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ec89c2aa-d384-420b-8fa2-fe1e4c751a7e",
+   "metadata": {},
+   "source": [
+    "## Future Work\n",
+    "\n",
+    "- The path extraction probably does not deal well with gaps.  When we have more than two endpoints, we might need to try tracing the path from each of them, and pick the longest path for bisection.\n",
+    "- The last test above does seem to handle a loop acceptably, but it's not clear why the skeleton has any endpoints at all in this case.  It's worth digging into that more to ensure it's robust.\n",
+    "- The perpendicular uses an arbitrary window to compute the local slope.  We could probably do better by averaging over several different window sizes.\n",
+    "- All this still needs to be wrapped up as part of a larger analysis loop, which finds perpendiculars for every contact and ensures that the endpoints are in the right cell (using the cell segmentation layer), and outputs those in NG or Precomputed format."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "86fbfc3c-f96e-4291-9c62-8ce0f56191a4",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}