diff --git a/.bumpversion.cfg b/.bumpversion.cfg index e16214c8..06170adf 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 5.11.0 +current_version = 5.14.0 commit = True tag = True diff --git a/.github/workflows/annotationframeworkclient.yml b/.github/workflows/annotationframeworkclient.yml deleted file mode 100644 index aab164ff..00000000 --- a/.github/workflows/annotationframeworkclient.yml +++ /dev/null @@ -1,49 +0,0 @@ -# This workflow will install Python dependencies, run tests and lint with a single version of Python -# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions - -name: CAVE Client - -on: - push: - branches: - - master - paths-ignore: - - "README.rst" - pull_request: - branches: master - -jobs: - test: - name: Test against different Python versions - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - name: Set up Python 3.7 - uses: actions/setup-python@v2 - with: - python-version: 3.7 - - - uses: actions/cache@v2 - with: - path: ~/.cache/pip - key: ${{ runner.os }}-pip-${{ hashFiles('**/test_requirements.txt') }} - restore-keys: | - ${{ runner.os }}-pip- - - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install flake8 pytest - pip install -r requirements.txt - if [ -f test_requirements.txt ]; then pip install -r test_requirements.txt; fi - - - name: Lint with flake8 - run: | - # stop the build if there are Python syntax errors or undefined names - # flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics - # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - - - name: Test with pytest - run: | - pytest diff --git a/.github/workflows/daily.yml b/.github/workflows/daily.yml new file mode 100644 index 00000000..477775e7 --- /dev/null +++ b/.github/workflows/daily.yml @@ -0,0 +1,8 @@ +name: build status +on: + schedule: + - cron: "8 15 * * *" # 7:08am PST + workflow_dispatch: +jobs: + build: + uses: ./.github/workflows/dev.yml diff --git a/.github/annotationframeworkclient.yml b/.github/workflows/dev.yml similarity index 60% rename from .github/annotationframeworkclient.yml rename to .github/workflows/dev.yml index aab164ff..667fa271 100644 --- a/.github/annotationframeworkclient.yml +++ b/.github/workflows/dev.yml @@ -1,7 +1,7 @@ # This workflow will install Python dependencies, run tests and lint with a single version of Python # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions -name: CAVE Client +name: Development tests on: push: @@ -11,31 +11,41 @@ on: - "README.rst" pull_request: branches: master + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + # Allows other workflows to trigger this workflow + workflow_call: jobs: test: name: Test against different Python versions - runs-on: ubuntu-latest + strategy: + matrix: + python-version: [3.7, 3.8, 3.9, 3.11] + os: [ubuntu-latest, windows-latest, macos-latest] + + runs-on: ${{ matrix.os }} steps: - - uses: actions/checkout@v2 - - name: Set up Python 3.7 - uses: actions/setup-python@v2 + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 with: - python-version: 3.7 + python-version: ${{ matrix.python-version }} - - uses: actions/cache@v2 + - uses: actions/cache@v4 with: path: ~/.cache/pip - key: ${{ runner.os }}-pip-${{ hashFiles('**/test_requirements.txt') }} + key: ${{ runner.os }}-${{ matrix.python-version }}-pip-${{ hashFiles('**/test_requirements.txt') }} restore-keys: | - ${{ runner.os }}-pip- + ${{ runner.os }}-${{ matrix.python-version }}-pip- - name: Install dependencies run: | python -m pip install --upgrade pip pip install flake8 pytest pip install -r requirements.txt - if [ -f test_requirements.txt ]; then pip install -r test_requirements.txt; fi + pip install -r test_requirements.txt - name: Lint with flake8 run: | diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml new file mode 100644 index 00000000..4eae4dce --- /dev/null +++ b/.github/workflows/docs.yml @@ -0,0 +1,60 @@ +name: dev docs + +on: + # run this workflow when dev checks pass + push: + branches: + - master + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + +jobs: + # This workflow contains a single job called "docs" + docs: + name: Build and deploy (dev) docs + + strategy: + matrix: + os: [ubuntu-latest] + python-version: ["3.11"] + + runs-on: ${{ matrix.os }} + + steps: + - name: Get repo + uses: actions/checkout@v4 + + - name: Get gh-pages branch + run: git fetch origin gh-pages --depth=1 + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Update pip + run: python -m pip install --upgrade pip + + - name: Cache for virtual environment + uses: actions/cache@v3 + with: + path: ./.venv + key: venv-${{ matrix.os }}-${{ matrix.python-version }}-${{ hashFiles('doc_requirements.txt') }} + + - name: Install dependencies + run: pip install -r doc_requirements.txt + + - name: Configure git for github-actions[bot] + run: | + git config --global user.name "github-actions[bot]" + git config --global user.email "github-actions[bot]@users.noreply.github.com" + + # - name: Build and push versioned docs with mike + # run: | + # poetry run mike deploy --push --update-aliases dev + # poetry run mike set-default stable --push + + - name: Build and push docs + run: | + mkdocs gh-deploy --force --remote-branch gh-pages --remote-name origin diff --git a/.readthedocs.yml b/.readthedocs.yml deleted file mode 100644 index 83426bb2..00000000 --- a/.readthedocs.yml +++ /dev/null @@ -1,22 +0,0 @@ -# .readthedocs.yaml -# Read the Docs configuration file -# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details - -# Required -version: 2 - -# Set the version of Python and other tools you might need -build: - os: ubuntu-22.04 - tools: - python: "3.11" - -# Build documentation in the docs/ directory with Sphinx -sphinx: - configuration: docs/conf.py - -# We recommend specifying your dependencies to enable reproducible builds: -# https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html -python: - install: - - requirements: docs/requirements.txt diff --git a/CAVEclientExamples.ipynb b/CAVEclientExamples.ipynb deleted file mode 100644 index 3eb7e240..00000000 --- a/CAVEclientExamples.ipynb +++ /dev/null @@ -1,541 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "source": [ - "### 1. One client to rule them all\n", - "\n", - "The Connectome Annotation Versioning Engine framework consists of a number of different services, each with a specific set of tasks that it can perform through REST endpoints. This module is designed to ease programmatic interaction with all of the various endpoints. Going forward, we also will be increasingly using authentication tokens for programmatic access to most if not all of the services. In order to collect a given server, datastack name, and user token together into a coherent package that can be used on multiple endpoints, we will use a CAVEclient that can build appropriately configured clients for each of the specific services.\n", - "\n", - "The following examples cover each of the client subtypes that are associated with a single service. The ImageryClient, which is a more complex collection of tools, will be covered elsewhere." - ], - "metadata": {} - }, - { - "cell_type": "markdown", - "source": [ - "#### Setting up your credentials\n", - "\n", - "Most services require you to authenticate yourself. In order to use authenticated services in the CAVEclient, you need to get a token for yourself and save it to your hard drive. This token should be treated as a personal secret, like a password.\n", - "\n", - "The easiest way to get your token is to use the client in its so-called \"global\" mode, without reference to any particular dataset. Once your client is initialized, you can get instructions for how to get a new token by running `client.auth.get_new_token()`. If your server is not for the MICrONs project, you may need to set a server address when initializing the CAVEclient.\n", - "\n", - "Note: If you have already set up Graphene authetication in CloudVolume, the same token applies. The CAVEclient will read from and write to the same default file as CloudVolume, so you probably need to do nothing to use it. If you need multiple tokens for different projects, please read the documentation.\n" - ], - "metadata": {} - }, - { - "cell_type": "code", - "execution_count": null, - "source": [ - "from caveclient import CAVEclient\n", - "\n", - "client = CAVEclient()\n", - "client.auth.get_new_token()" - ], - "outputs": [], - "metadata": {} - }, - { - "cell_type": "markdown", - "source": [ - "Follow the instructions that are printed out. If you cannot log in in step 2, contact the project administrators. Copy the token (only the long alphanumeric part!) and save it to your drive by running the command specified in step 3a after replacing the PASTE_YOUR_TOKEN_HERE bit with the value you got after login. Once this is done, you shouldn't need to do it again for a long while. Note that every time you do the refresh_token step, you invalidate any previous token. That means that if you want to use the CAVEclient on multiple computers, you need to copy the same token to each device instead of going through these steps multiple times." - ], - "metadata": {} - }, - { - "cell_type": "markdown", - "source": [ - "#### Initializing a CAVEclient\n", - "\n", - "Most services require the use of a specific datastack. Once you have set up credentials on your computer. we can specify a datastack." - ], - "metadata": {} - }, - { - "cell_type": "code", - "execution_count": null, - "source": [ - "from caveclient import CAVEclient\n", - "\n", - "datastack_name = 'minnie65_phase3_v0'\n", - "client = CAVEclient(datastack_name)" - ], - "outputs": [], - "metadata": {} - }, - { - "cell_type": "markdown", - "source": [ - "Just to confirm that this works, let's see if we can get the EM image source from the InfoService. If you get a reasonable looking path, everything is okay." - ], - "metadata": {} - }, - { - "cell_type": "code", - "execution_count": null, - "source": [ - "print(f\"The image source is: {client.info.image_source()}\")" - ], - "outputs": [], - "metadata": {} - }, - { - "cell_type": "markdown", - "source": [ - "If you don't know what datastacks exist, you can start a client with or without a datastack name (like we did when we set the auth token) and run `client.info.get_datastacks()`" - ], - "metadata": {} - }, - { - "cell_type": "markdown", - "source": [ - "### 2. Authentication Service\n", - "\n", - "The AuthClient handles storing and loading your token or tokens and inserting it into requests in other clients. We can access the auth client from `client.auth`. Once you have saved a token, you probably won't interact with this client very often, however it makes it convenient for saving new tokens the first time and has some other handy features. For example, to check what your token is (for example to set up a second computer)." - ], - "metadata": {} - }, - { - "cell_type": "code", - "execution_count": null, - "source": [ - "auth = client.auth\n", - "print(f\"My current token is: {auth.token}\")" - ], - "outputs": [], - "metadata": {} - }, - { - "cell_type": "markdown", - "source": [ - "#### Loading saved tokens\n", - "The credentials save by default in `~/.cloudvolume/secrets/cave-secret.json`. You can try opening that file to see what we just created.\n", - "\n", - "If we had wanted to use a different file or a different json key, we could have specified that in `auth.save_token`.\n", - "\n", - "Because we used the default values, this token is used automatically when we intialize a new CAVEclient. If we wanted to use a different token file, token key, or even directly specify a token we could also do so here. Of course, a bad token will cause an unauthorized error." - ], - "metadata": {} - }, - { - "cell_type": "code", - "execution_count": null, - "source": [ - "client = CAVEclient(datastack_name)\n", - "print(f\"Now my basic token is: {client.auth.token}\")\n", - "\n", - "client_direct = CAVEclient(datastack_name, auth_token='another_fake_token_678')\n", - "print(f\"A directly specified token is: {client_direct.auth.token}\")" - ], - "outputs": [], - "metadata": {} - }, - { - "cell_type": "markdown", - "source": [ - "If you use a CAVEclient, the AuthClient and its token will be automatically applied to any other services without further use. " - ], - "metadata": {} - }, - { - "cell_type": "markdown", - "source": [ - "### 3. Info Service\n", - "A datastack has a number of complex paths to various data sources that together comprise a datastack. Rather than hardcode these paths, the InfoService allows one to query the location of each data source. This is also convenient in case data sources change.\n", - "\n", - "An InfoClient is accessed at `client.info`." - ], - "metadata": {} - }, - { - "cell_type": "code", - "execution_count": null, - "source": [ - "client.info" - ], - "outputs": [], - "metadata": {} - }, - { - "cell_type": "code", - "execution_count": null, - "source": [ - "client = CAVEclient(datastack_name)\n", - "print(f\"This is an info client for {client.info.datastack_name} on {client.info.server_address}\")" - ], - "outputs": [], - "metadata": {} - }, - { - "cell_type": "markdown", - "source": [ - "#### Accessing datastack information\n", - "All of the information accessible for the datastack can be seen as a dict using `get_datastack_info()`." - ], - "metadata": {} - }, - { - "cell_type": "code", - "execution_count": null, - "source": [ - "client.info.get_datastack_info()" - ], - "outputs": [], - "metadata": {} - }, - { - "cell_type": "markdown", - "source": [ - "Individual entries can be found as well. Use tab autocomplete to see the various possibilities." - ], - "metadata": {} - }, - { - "cell_type": "code", - "execution_count": null, - "source": [ - "client.info.local_server()" - ], - "outputs": [], - "metadata": {} - }, - { - "cell_type": "markdown", - "source": [ - "### 4. JSON Service\n", - "\n", - "We store the JSON description of a Neuroglancer state in a simple database at the JSON Service. This is a convenient way to build states to distribute to people, or pull states to parse work by individuals. The JSON Client is at `client.state`. Note that the state service will work with or without a datastack set." - ], - "metadata": {} - }, - { - "cell_type": "code", - "execution_count": null, - "source": [ - "client.state" - ], - "outputs": [], - "metadata": {} - }, - { - "cell_type": "markdown", - "source": [ - "#### Retrieving a state\n", - "\n", - "JSON states are found simply by their ID, which you get when uploading a state. You can download a state with `get_state_json`." - ], - "metadata": {} - }, - { - "cell_type": "code", - "execution_count": null, - "source": [ - "example_id = 5762925562167296\n", - "example_state = client.state.get_state_json(example_id)\n", - "example_state['layers'][0]" - ], - "outputs": [], - "metadata": {} - }, - { - "cell_type": "markdown", - "source": [ - "#### Uploading a state\n", - "You can also upload states with `upload_state_json`. If you do this, the state id is returned by the function. Note that there is no easy way to query what you uploaded later, so be VERY CAREFUL with this state id if you wish to see it again.\n", - "\n", - "*Note: If you are working with a Neuroglancer Viewer object or similar, in order to upload, use viewer.state.to_json() to generate this representation.*" - ], - "metadata": {} - }, - { - "cell_type": "code", - "execution_count": null, - "source": [ - "example_state['layers'][0]['name'] = 'example_name'\n", - "new_id = client.state.upload_state_json(example_state)" - ], - "outputs": [], - "metadata": {} - }, - { - "cell_type": "code", - "execution_count": null, - "source": [ - "test_state = client.state.get_state_json(new_id)\n", - "test_state['layers'][0]['name']" - ], - "outputs": [], - "metadata": {} - }, - { - "cell_type": "markdown", - "source": [ - "#### Generating Neuroglancer URLs\n", - "\n", - "Neuroglancer can automatically look up a JSON state based on its ID if you pass the URL to it correctly. The function `build_neuroglancer_url` helps format these correctly. Note that you need to specify the base URL for the Neuroglancer deployment you wish to use." - ], - "metadata": {} - }, - { - "cell_type": "code", - "execution_count": null, - "source": [ - "url = client.state.build_neuroglancer_url(state_id=new_id, ngl_url='https://neuromancer-seung-import.appspot.com')\n", - "print(url)" - ], - "outputs": [], - "metadata": {} - }, - { - "cell_type": "markdown", - "source": [ - "### 5. ChunkedGraph\n", - "\n", - "The ChunkedGraph client allows one to interact with the ChunkedGraph, which stores and updates the supervoxel agglomeration graph. This is most often useful for looking up an object root id of a supervoxel or looking up supervoxels belonging to a root id. The ChunkedGraph client is at `client.chunkedgraph`." - ], - "metadata": {} - }, - { - "cell_type": "markdown", - "source": [ - "#### Look up a supervoxel\n", - "Usually in Neuroglancer, one never notices supervoxel ids, but they are important for programmatic work. In order to look up the root id for a location in space, one needs to use the supervoxel segmentation to get the associated supervoxel id. The ChunkedGraph client makes this easy using the `get_root_ids` method." - ], - "metadata": {} - }, - { - "cell_type": "code", - "execution_count": null, - "source": [ - "from caveclient import CAVEclient\n", - "\n", - "datastack_name = 'minnie65_phase3_v0'\n", - "client = CAVEclient(datastack_name)" - ], - "outputs": [], - "metadata": {} - }, - { - "cell_type": "code", - "execution_count": null, - "source": [ - "sv_id = 109362238070465629\n", - "client.chunkedgraph.get_root_id(supervoxel_id=sv_id)" - ], - "outputs": [], - "metadata": {} - }, - { - "cell_type": "markdown", - "source": [ - "However, as proofreading occurs, the root id that a supervoxel belongs to can change. By default, this function returns the current state, however one can also provide a UTC timestamp to get the root id at a particular moment in history. This can be useful for reproducible analysis." - ], - "metadata": {} - }, - { - "cell_type": "code", - "execution_count": null, - "source": [ - "import datetime\n", - "\n", - "date_3_days_ago = datetime.datetime.now() - datetime.timedelta(days=3)\n", - "\n", - "# I looked up the UTC POSIX timestamp from a day in early 2019. \n", - "#timestamp = datetime.datetime.utcfromtimestamp(1546595253)\n", - "\n", - "client.chunkedgraph.get_root_id(supervoxel_id=sv_id, timestamp=date_3_days_ago)" - ], - "outputs": [], - "metadata": {} - }, - { - "cell_type": "markdown", - "source": [ - "#### Getting supervoxels for a root id\n", - "\n", - "A root id is associated with a particular agglomeration of supervoxels, which can be found with the `get_leaves` method. A new root id is generated for every new change in the chunkedgraph, so time stamps do not apply." - ], - "metadata": {} - }, - { - "cell_type": "code", - "execution_count": null, - "source": [ - "root_id = 864691134988869442\n", - "client.chunkedgraph.get_leaves(root_id)" - ], - "outputs": [], - "metadata": {} - }, - { - "cell_type": "markdown", - "source": [ - "### 7. AnnotationEngine\n", - "\n", - "The AnnotationClient is used to interact with the AnnotationEngine service to create tables from existing schema, upload new data, and download existing annotations. Note that annotations in the AnnotationEngine are not linked to any particular segmentation, and thus do not include any root ids. An annotation client is accessed with `client.annotation`." - ], - "metadata": {} - }, - { - "cell_type": "markdown", - "source": [ - "#### Get existing tables\n", - "\n", - "A list of the existing tables for the datastack can be found at with `get_tables`." - ], - "metadata": {} - }, - { - "cell_type": "code", - "execution_count": null, - "source": [ - "all_tables = client.annotation.get_tables()\n", - "all_tables" - ], - "outputs": [], - "metadata": {} - }, - { - "cell_type": "markdown", - "source": [ - "Each table has three main properties that can be useful to know:\n", - "* `table_name` : The table name, used to refer to it when uploading or downloading annotations. This is also passed through to the table in the Materialized database.\n", - "* `schema_name` : The name of the table's schema from EMAnnotationSchemas (see below).\n", - "* `max_annotation_id` : An upper limit on the number of annotations already contained in the table." - ], - "metadata": {} - }, - { - "cell_type": "markdown", - "source": [ - "#### Downloading annotations\n", - "\n", - "You can download the JSON representation of a data point through the `get_annotation` method. This can be useful if you need to look up information on unmaterialized data, or to see what a properly templated annotation looks like." - ], - "metadata": {} - }, - { - "cell_type": "code", - "execution_count": null, - "source": [ - "table_name = all_tables[0]\n", - "annotation_id = 1\n", - "client.annotation.get_annotation(annotation_ids=1, table_name=table_name)" - ], - "outputs": [], - "metadata": {} - }, - { - "cell_type": "markdown", - "source": [ - "#### Create a new table\n", - "\n", - "One can create a new table with a specified schema with the `create_table` method:\n", - "\n", - "```\n", - "client.annotation.create_table(table_name='test_table',\n", - " schema_name='microns_func_coreg')\n", - "```\n", - "\n", - "Now, new data can be generated as a dict or list of dicts following the schema and uploaded with `post_annotation`.\n", - "For example, a `microns_func_coreg` point needs to have:\n", - " * `type` set to `microns_func_coreg`\n", - " * `pt` set to a dict with `position` as a key and the xyz location as a value.\n", - " * `func_id` set to an integer.\n", - " \n", - "The following will create a new annotation and upload it to the service: \n", - "```\n", - "new_data = {'type': 'microns_func_coreg',\n", - " 'pt': {'position': [1,2,3]},\n", - " 'func_id': 0}\n", - " \n", - "client.annotation.post_annotation(table_name='test_table', data=[new_data])\n", - "```" - ], - "metadata": {} - }, - { - "cell_type": "markdown", - "source": [ - "### 7. EMAnnotationSchemas\n", - "\n", - "The EMAnnotationSchemas client lets one look up the available schemas and how they are defined. This is mostly used for programmatic interactions between services, but can be useful when looking up schema definitions for new tables." - ], - "metadata": {} - }, - { - "cell_type": "markdown", - "source": [ - "#### Get the list of schema \n", - "One can get the list of all available schema with the `schema` method. Currently, new schema have to be generated on the server side, although we aim to have a generic set available to use." - ], - "metadata": {} - }, - { - "cell_type": "code", - "execution_count": null, - "source": [ - "client.schema.schema()" - ], - "outputs": [], - "metadata": {} - }, - { - "cell_type": "markdown", - "source": [ - "#### View a specific schema\n", - "\n", - "The details of each schema can be viewed with the `schema_definition` method, formatted as per JSONSchema. " - ], - "metadata": {} - }, - { - "cell_type": "code", - "execution_count": null, - "source": [ - "example_schema = client.schema.schema_definition('microns_func_coreg')\n", - "example_schema" - ], - "outputs": [], - "metadata": {} - }, - { - "cell_type": "markdown", - "source": [ - "This is mostly useful for programmatic interaction between services at the moment, but can also be used to inspect the expected form of an annotation by digging into the format." - ], - "metadata": {} - }, - { - "cell_type": "code", - "execution_count": null, - "source": [ - "example_schema['definitions']['FunctionalCoregistration']" - ], - "outputs": [], - "metadata": {} - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.7" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} \ No newline at end of file diff --git a/FlyWireAnnotationTutorial.ipynb b/FlyWireAnnotationTutorial.ipynb deleted file mode 100644 index 1f18659b..00000000 --- a/FlyWireAnnotationTutorial.ipynb +++ /dev/null @@ -1,323 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# The CAVEclient\n", - "\n", - "The CAVEclient is a client side library to allow easy interaction with the services within CAVE (connectome annotation versioning engine, also known as Dynamic Annotation Framework), eg. the annotations, stateserver. The github repository is public:\n", - "https://github.com/seung-lab/CAVEclient\n", - "\n", - "The library can be installed directly from the github repository or from the prebuilt versions using pip:\n", - "```\n", - "pip install caveclient\n", - "```\n", - "\n", - "\n", - "## Tutorials\n", - "\n", - "This tutorial mainly covers the interactions with the materialized annotation tables. More information and better explanations of the other functionalities of the client can be found in the following tutorial. Please be advised that depending on your permission level you may not be able to execute all queries in this tutorial with the preset parameters as it was written with defaults for iarpa's microns project:\n", - "https://github.com/seung-lab/CAVEclient/blob/master/CAVEclientExamples.ipynb\n", - "\n", - "\n", - "## Authentication & Authorization\n", - "\n", - "If this is your first time to interact with any part of CAVE, chances are you need to setup your local credentials for your FlyWire account first. Please follow the section \"Setting up your credentials\" at the beginning of the tutorial above to do so.\n", - "\n", - "You will need to have access to the FlyWire's production dataset to retrieve annotations. Otherwise you will see\n", - "\n", - "```HTTPError: 403 Client Error: FORBIDDEN for url```\n", - "\n", - "errors upon querying the materialization server." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Initializing the CAVEclient\n", - "\n", - "The FrameworkClient is instantiated with a datastack name. A datastack is a set of segmentation, and annotation tables and lives within an aligned volume (the coordinate space). FlyWire's main datastack is `flywire_fafb_production`, the aligned volume is `fafb_seung_alignment_v0` (v14.1). For convenience, there are other defaults set on the datastack level." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "import datetime\n", - "import pandas as pd\n", - "from caveclient import CAVEclient" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "datastack_name = \"flywire_fafb_production\"\n", - "client = CAVEclient(datastack_name)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Annotation tables\n", - "\n", - "Annotations are represented by points in space and parameters (such as size, type). At specific timepoints, annotations are combined with the (proofread) segmentation to create a materialized version of the annotation table. The AnnotationEngine (`client.annotation`) owns the raw annotations and the Materialization Service (`client.materialize`) owns the materialized versions of these tables. \n", - "\n", - "To check what annotation tables are visible to you run" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "https://prod.flywire-daf.com/annotation/api/v2/aligned_volume/fafb_seung_alignment_v0/table\n" - ] - }, - { - "data": { - "text/plain": [ - "['synapses_nt_v1', 'nuclei_v1']" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "client.annotation.get_tables()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Creating a table" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "All users with permissions to proofread in FlyWire can create annotation tables and upload annotations. Currently, only the user who created a table can upload annotations to it; all other users have read access to the table.\\\n", - "\n", - "Creating a table requires a unique `table_name`, a `schema`, a `description` and a `voxel_resolution`. The `voxel_resolution` defines the resolution with which annotations are uploaded. For instance, `[1, 1, 1]` would mean that annotations in this table will be uploaded in nanometer space. \n", - "\n", - "Schemas are managed in a separate [repository](https://github.com/seung-lab/EMAnnotationSchemas). Schemas can freely be chosen from [there](https://globalv1.daf-apis.com/schema/views/). If no applicable schema is available, we encourage users to create a new schema and submit a pull-request. \n", - "\n", - "In the example below, we are going to use the [`bound_tag`](https://globalv1.daf-apis.com/schema/views/type/bound_tag/view) schema which is an annotation with one coordinate and a text field. All annotations in a table follow the same schema." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To avoid the creation of a large list of test tables, the following code is embedded in markdown. " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "```\n", - "\n", - "table_name = \"my_table\"\n", - "\n", - "description = \"\"\"\n", - "This is a test table to demonstrate table creation and annotation upload.\n", - "The data in this table is random and should not be used for analysis.\n", - "\n", - "This table was create by ...\"\"\"\n", - "\n", - "client.annotation.create_table(table_name=random_table_name,\n", - " schema_name=\"bound_tag\",\n", - " description=description,\n", - " voxel_resolution=[1, 1, 1])\n", - " \n", - "```" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "All tables are listed here: https://prod.flywire-daf.com/annotation/views/aligned_volume/fafb_seung_alignment_v0\n", - "\n", - "A specific table can be viewed here: https://prod.flywire-daf.com/annotation/views/aligned_volume/fafb_seung_alignment_v0/table/table_name (with table_name replaced)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Uploading and updating annotations" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Next, we generate some annotations to be uploaded to the new table. Tables can be uploaded from pandas DataFrames with columns according to the schema. One can include an `id` column to specify specific annotation, otherwise `id`s are assigned by the backend. " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "```\n", - "random_locations_nm = np.random.randint([448000, 185000, 87000], [588000, 292000, 90000], size=[100, 3], dtype=int)\n", - "random_tags = [f\"tag {i}\" for i in range(100)]\n", - "\n", - "random_annotation_data = pd.DataFrame.from_dict({\"pt_position\": list(random_locations_nm), \n", - " \"tag\": random_tags,\n", - " \"id\": np.arange(100, 200)})\n", - "```" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Annotations can be uploaded using `client.annotation.post_annotation` or `client.annotation.post_annotation_df`." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "```\n", - "client.annotation.post_annotation_df(table_name=random_table_name,\n", - " df=random_annotation_data,\n", - " position_columns=[\"pt_position\"])\n", - "```" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Updating annotations works similarly to uploading them in the first place. Updating requires `id`s to be defined." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "```\n", - "random_locations_nm = np.random.randint([448000, 185000, 87000], [588000, 292000, 90000], size=[10, 3], dtype=int)\n", - "random_tags = [f\"tag {i}\" for i in range(10)]\n", - "\n", - "random_annotation_data = pd.DataFrame.from_dict({\"pt_position\": list(random_locations_nm), \n", - " \"tag\": random_tags,\n", - " \"id\": np.arange(100, 110)})\n", - " \n", - "client.annotation.update_annotation_df(table_name=random_table_name,\n", - " df=random_annotation_data,\n", - " position_columns=[\"pt_position\"])\n", - "```" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Reading annotations" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Annotations can be read directly from the annotation service. Annotations can be read by ID. Here, we use the nucleus table (`nuclei_v1`, see it online [here](https://prod.flywire-daf.com/annotation/views/aligned_volume/fafb_seung_alignment_v0/table/nuclei_v1)) as example." - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[{'volume': 6.65092096,\n", - " 'bb_start_position': [718496, 261792, 114640],\n", - " 'valid': True,\n", - " 'bb_end_position': [722016, 265120, 116200],\n", - " 'pt_position': [720000, 263360, 115520],\n", - " 'superceded_id': None,\n", - " 'created': '2021-06-23 19:55:35.166396',\n", - " 'deleted': 'None',\n", - " 'id': 7415718},\n", - " {'volume': 11.52339968,\n", - " 'bb_start_position': [708800, 262112, 128200],\n", - " 'valid': True,\n", - " 'bb_end_position': [712064, 264832, 131080],\n", - " 'pt_position': [710592, 263392, 129800],\n", - " 'superceded_id': None,\n", - " 'created': '2021-06-23 19:55:35.160196',\n", - " 'deleted': 'None',\n", - " 'id': 7416439}]" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "client.annotation.get_annotation(table_name=\"nuclei_v1\",\n", - " annotation_ids=[7416439, 7415718])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "No segment IDs will be assigned to the annotations through this interface. To access annotations with segment IDs assigned, one must use the [materialization interface](https://github.com/seung-lab/CAVEclient/blob/master/FlyWireSynapseTutorial.ipynb).\n", - "\n", - "For this to work, at least one materialization run has to complete after the upload. Currently, materialization happen every to every second date." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.5" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/FlyWireSynapseTutorial.ipynb b/FlyWireSynapseTutorial.ipynb deleted file mode 100644 index a1dd977f..00000000 --- a/FlyWireSynapseTutorial.ipynb +++ /dev/null @@ -1,1715 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# The CAVEclient\n", - "\n", - "The CAVEclient is a client side library to allow easy interaction with the services within CAVE (connectome annotation versioning engine, also known as Dynamic Annotation Framework), eg. the annotations, stateserver. The github repository is public:\n", - "https://github.com/seung-lab/CAVEclient\n", - "\n", - "The library can be installed directly from the github repository or from the prebuilt versions using pip:\n", - "```\n", - "pip install caveclient\n", - "```\n", - "\n", - "\n", - "## Tutorials\n", - "\n", - "This tutorial mainly covers the interactions with the materialized annotation tables. More information and better explanations of the other functionalities of the client can be found in the following tutorial. Please be advised that depending on your permission level you may not be able to execute all queries in this tutorial with the preset parameters as it was written with defaults for iarpa's microns project:\n", - "https://github.com/seung-lab/CAVEclient/blob/master/CAVEclientExamples.ipynb\n", - "\n", - "\n", - "## Authentication & Authorization\n", - "\n", - "If this is your first time to interact with any part of CAVE, chances are you need to setup your local credentials for your FlyWire account first. Please follow the section \"Setting up your credentials\" at the beginning of the tutorial above to do so.\n", - "\n", - "You will need to have access to the FlyWire's production dataset to retrieve annotations. Otherwise you will see\n", - "\n", - "```HTTPError: 403 Client Error: FORBIDDEN for url```\n", - "\n", - "errors upon querying the materialization server." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Initialize CAVEclient\n", - "\n", - "The CAVEclient is instantiated with a datastack name. A datastack is a set of segmentation, and annotation tables and lives within an aligned volume (the coordinate space). FlyWire's main datastack is `flywire_fafb_production`, the aligned volume is `fafb_seung_alignment_v0` (v14.1). For convenience, there are other defaults set on the datastack level." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "import datetime\n", - "from caveclient import CAVEclient" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "datastack_name = \"flywire_fafb_production\"\n", - "client = CAVEclient(datastack_name)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Annotation tables\n", - "\n", - "Annotations are represented by points in space and parameters (such as size, type). At specific timepoints, annotations are combined with the (proofread) segmentation to create a materialized version of the annotation table. The AnnotationEngine (`client.annotation`) owns the raw annotations and the Materialization Service (`client.materialize`) owns the materialized versions of these tables. \n", - "\n", - "To check what annotation tables are visible to you run" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "https://prod.flywire-daf.com/annotation/api/v2/aligned_volume/fafb_seung_alignment_v0/table\n" - ] - }, - { - "data": { - "text/plain": [ - "['synapses_nt_v1']" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "client.annotation.get_tables()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Every table has metadata associated with it which includes information about the owner/creator, a description and a schema that annotations in this table follow. Please review the metadata of any table you might use in the future before using it as it might contain instructions and restrictions for its usage and how to credit it's creators. For instance, the (v1) synapse table (`synapses_nt_v1`) includes an extensive description on all its columns, credits people that created it, contains instructions for citing this resource among others:" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "FlyWire synapse description\r\n", - "Synapse version: 20191211\r\n", - "NT version: 20201223\r\n", - "\r\n", - "Synapses in this table consist of a pre- and a postsynaptic point (in nm), confidence scores, and neurotransmitter information. \r\n", - "The synapses were predicted by Buhmann et al [1] for the v14 alignment of the FAFB dataset. The FlyWire team remapped these synapses into the v14.1 space used by FlyWire with an accuracy of <64nm (therefore, this is a potential source of error). This version of the Buhmann et al. synapses was trained on the initial training set from the calyx and performance varies across brain areas accordingly.\r\n", - "Buhmann et al. assigned two scores to their synapses representing different measurements of confidence. The “connection_score” column contains the scores assigned by them during prediction (higher is more confident) and “cleft_score” contains the scores acquired by Buhmann et al. by using the synapse segmentation from Heinrich et al. [2] (higher is more confident). For the latter, Buhmann et al. found the highest value in Heinrich et al.’s segmentation along the line between the pre- and postsynaptic point for each synapse. This represents an individual assessment of the validity of each synapse because Buhmann et al. found synapses without using the segmentation by Heinrich et al.. Buhmann et al. suggested using the product of both scores, others found that the “cleft_score” alone with a threshold of 50-100 to be a good filter. We suggest validating analyses with various thresholds.\r\n", - "Some synapses were (falsely) annotated multiple times. We found that most of these can be fixed by merging synapses between the same partners that are <150nm apart on the presynaptic side. The threshold might vary for different cell types and again we suggest validating analyses with various thresholds.\r\n", - "Eckstein et al. [3] developed a classifier to predict probabilities for each of six neurotransmitters for each synapse (sum to 1, Dopamine (DA), Acetylcholine (ACh), Glutamate (Glut), Octopamine (OCT), Serotonin (SER), Gabaergic (GABA)). Davi Bock, Gregory Jefferis and Eric Perlman contributed infrastructure to run their classifier on all synapses from Buhmann et al. . For some synapses (<20k) no neurotransmitter information could be acquired. Probabilities for these have been set to all 0. Additionally, the “valid_nt” column can be used to filter these out (False=no neurotransmitter information available). Improving this classifier through the acquisition of additional ground truth is actively worked on. The classifications of ACh, GABA and Glu appear to be more robust than DA, OCT and SER in this version. Additionally, there was little training data from the optic lobe and results are more robust in the central brain. \r\n", - "\r\n", - "For any use of these synapses in a study, please cite Buhmann et al. [1]. For using the “cleft_score” please also cite Heinrich et al. [2]. When including the neurotransmitter information please also cite Eckstein et al. [3].\r\n", - "\r\n", - "The prediction and dissemination of the neurotransmitter information was supported by NIH BRAIN Initiative (grant 1RF1MH120679-01); additional work including assembling ground truth data was also supported by Wellcome trust (203261/Z/16/Z). \r\n", - "\r\n", - "[1] Julia Buhmann, Arlo Sheridan, Stephan Gerhard, Renate Krause, Tri Nguyen, Larissa Heinrich, Philipp Schlegel, Wei-Chung Allen Lee, Rachel Wilson, Stephan Saalfeld, Gregory Jefferis, Davi Bock, Srinivas Turaga, Matthew Cook, Jan Funke. 2019. “Automatic Detection of Synaptic Partners in a Whole-Brain Drosophila EM Dataset”. bioRxiv\r\n", - "[2] Heinrich L., Funke J., Pape C., Nunez-Iglesias J., Saalfeld S. 2018. “Synaptic Cleft Segmentation in Non-isotropic Volume Electron Microscopy of the Complete Drosophila Brain”. MICCAI 2018. Lecture Notes in Computer Science, vol 11071. Springer\r\n", - "[3] Nils Eckstein, Alexander S. Bates, Michelle Du, Volker Hartenstein, Gregory S.X.E. Jefferis, Jan Funke. 2020. “Neurotransmitter Classification from Electron Microscopy Images at Synaptic Sites in Drosophila”. bioRxiv\r\n", - "\n" - ] - } - ], - "source": [ - "meta_data = client.annotation.get_table_metadata(\"synapses_nt_v1\")\n", - "print(meta_data[\"description\"])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The meta data contains information about the schema which ultimately determines how annotations in a table are structured. All annotations in a table follow the same schema. The synapse table follows the `fly_nt_synapse` schema:" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'fly_nt_synapse'" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "meta_data[\"schema_type\"]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Materialized annotation tables & Queries" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "```\n", - "materialization = annotation + segmentation snapshot\n", - "```\n", - "\n", - "As the segmentation and annotations change over time, we need to create snapshots of a combined view of them (materialized versions). Materialized versions of the annotation tables are (automatically) generated at a certain frequency. In addition to that, we are planning to include an option to retrieve any timestamp since the latest materialization (\"live\") but that is not available at the moment. \n", - "\n", - "There are usually a number of materialized versions available at the same time:" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[56, 64, 66, 67, 68, 70]" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "client.materialize.get_versions()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Each version comes with meta data about the time when it was created and when it will be deleted (expired). Different tables have different lifetimes and some may be LTS versions. The exact frequency and lifetime of tables will depend on how the community is using these tables. " - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'valid': True,\n", - " 'time_stamp': datetime.datetime(2021, 6, 10, 8, 10, 0, 286750, tzinfo=datetime.timezone.utc),\n", - " 'id': 59,\n", - " 'version': 70,\n", - " 'datastack': 'flywire_fafb_production',\n", - " 'expires_on': datetime.datetime(2021, 6, 12, 8, 10, 0, 286750, tzinfo=datetime.timezone.utc)}" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "latest_version = max(client.materialize.get_versions())\n", - "client.materialize.get_version_metadata(latest_version)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Generally, specifying versions for the materialize service is optional. The latest version is used if no version is defined. \n", - "\n", - "Each materialization version contains a set of annotation tables. At the moment all tables are included in a materialization but in the future we might not include all tables in every materialization:" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['synapses_nt_v1']" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "client.materialize.get_tables()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Queries" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Here, we demonstrate some queries with the synapses from Buhmann et al.. For some essential annotation types, default tables are define in the centralized info service. This way, one automatically uses the latest synapse table after an update. " - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "synapses_nt_v1\n" - ] - } - ], - "source": [ - "synapse_table = client.info.get_datastack_info()['synapse_table']\n", - "print(synapse_table)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Each table in this list is stored as a SQL table on the backend. The client allows users to query these tables through the frontend of the Materialization Service conventiently without the need for SQL specific language. The client will format the results as pandas dataframes. Queries are restricted to a size of 200k rows to not overwhelm the server. Should a query result in a larger list of rows, only the first 200k are returned. For bulk downloads (eg. for data preservation before a publication) please contact us.\n", - "\n", - "To demonstrate this this query would pull the entire table but will only gather 200k rows (should take <2min). A warning will be raised if the query is cut short." - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING:root:201 - \"Limited query to 200000 rows\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "CPU times: user 764 ms, sys: 187 ms, total: 951 ms\n", - "Wall time: 6.76 s\n" - ] - } - ], - "source": [ - "%%time\n", - "\n", - "syn_df = client.materialize.query_table(synapse_table)" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "200000\n" - ] - } - ], - "source": [ - "print(len(syn_df))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Here, we set the materialization version specifically. If the materialization version is not specified, the query defaults to the most recent version.\n", - "\n", - "Let's take a brief look at the columns to illustrate how the materialization extends an annotation table:" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idvalidpre_pt_supervoxel_idpre_pt_root_idpost_pt_supervoxel_idpost_pt_root_idconnection_scorecleft_scoregabaachglutoctserdavalid_ntpre_pt_positionpost_pt_position
0102406485t8155923039789001972057594063390834381559230397890088720575940572044374217.0209201220.0014830.9882521.837718e-050.0000660.0003370.009844t[636900, 135040, 152840][636832, 135124, 152880]
1101781363t811396292729070637205759406309690518113962927294824872057594059961431422.19149600.1059250.3526613.122027e-010.0001070.1760820.053023t[613864, 292888, 152600][613868, 292984, 152640]
2102336123t8325199726828565372057594062757659483181628524108478720575940590270455115.574806570.0059710.9275597.128069e-040.0631330.0000200.002605t[732880, 370040, 151320][732752, 370080, 151320]
3101781421t7346950487700621472057594058937223973469504876999566720575940587527270141.4603881440.0004680.5530681.522532e-080.0000130.0000030.446448t[166996, 294144, 151280][166920, 294024, 151280]
4101781599t809989605040569927205759406137323648099896050404395172057594061373236438.557209370.6742730.0120402.504642e-010.0010700.0129260.049227t[604412, 294744, 151520][604364, 294860, 151520]
\n", - "
" - ], - "text/plain": [ - " id valid pre_pt_supervoxel_id pre_pt_root_id \\\n", - "0 102406485 t 81559230397890019 720575940633908343 \n", - "1 101781363 t 81139629272907063 720575940630969051 \n", - "2 102336123 t 83251997268285653 720575940627576594 \n", - "3 101781421 t 73469504877006214 720575940589372239 \n", - "4 101781599 t 80998960504056992 720575940613732364 \n", - "\n", - " post_pt_supervoxel_id post_pt_root_id connection_score cleft_score \\\n", - "0 81559230397890088 720575940572044374 217.020920 122 \n", - "1 81139629272948248 720575940599614314 22.191496 0 \n", - "2 83181628524108478 720575940590270455 115.574806 57 \n", - "3 73469504876999566 720575940587527270 141.460388 144 \n", - "4 80998960504043951 720575940613732364 38.557209 37 \n", - "\n", - " gaba ach glut oct ser da valid_nt \\\n", - "0 0.001483 0.988252 1.837718e-05 0.000066 0.000337 0.009844 t \n", - "1 0.105925 0.352661 3.122027e-01 0.000107 0.176082 0.053023 t \n", - "2 0.005971 0.927559 7.128069e-04 0.063133 0.000020 0.002605 t \n", - "3 0.000468 0.553068 1.522532e-08 0.000013 0.000003 0.446448 t \n", - "4 0.674273 0.012040 2.504642e-01 0.001070 0.012926 0.049227 t \n", - "\n", - " pre_pt_position post_pt_position \n", - "0 [636900, 135040, 152840] [636832, 135124, 152880] \n", - "1 [613864, 292888, 152600] [613868, 292984, 152640] \n", - "2 [732880, 370040, 151320] [732752, 370080, 151320] \n", - "3 [166996, 294144, 151280] [166920, 294024, 151280] \n", - "4 [604412, 294744, 151520] [604364, 294860, 151520] " - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "syn_df.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Annotations consist of parameters and spatial points. Some or all of these spatial points are what we call \"BoundSpatialPoints\". These are linked to the segmentation during materialization. The synapse tables have two such points (`pre_pt`, `post_pt`). Per point there are three columns: `*_position`, `*_supervoxel_id`, `*_root_id`. Supervoxels are the small atomic segments, and root ids describe large components (neurons) consisting of many supervoxels. A root id always refers to the same version of a neuron and represents a snapshot in time in its own right. For a given annotation id (`id`), all but the `*_root_id` columns stay constant between materializations. " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "`query_table` has three parameters to define filters: filter_in_dict, filter_out_dict, filter_equal_dict. More options will be added. This can be used to query synapses between any lists of neurons. For instance, to query the outgoing synapses of an AMMC-B1 neuron we included in the FlyWire paper:\n", - "(see the next section for how to come up with a specific root id)" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "CPU times: user 34 ms, sys: 209 µs, total: 34.2 ms\n", - "Wall time: 413 ms\n" - ] - } - ], - "source": [ - "%%time\n", - "\n", - "syn_df = client.materialize.query_table(synapse_table, \n", - " filter_in_dict={\"pre_pt_root_id\": [720575940627197566]})" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "As described in the metadata above, we suggest filtering the synapse table using the `cleft_score` and `connection_score`. Tuning these will help to reduce the number of false positive synapses in the list. The best threshold(s) will depend on the specific neurons included in the analysis. Here we will just remove all synapses with a `cleft_score < 50`." - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [], - "source": [ - "syn_df = syn_df[syn_df[\"cleft_score\"] >= 50]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Some postsynaptic partners have a 0 id. Many of these are due to the synapse prediction covering a bigger space than the segmentation. Here, we remove these along with synapses onto itself as we are confident that this cell does not make autapses." - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [], - "source": [ - "syn_df = syn_df[syn_df[\"pre_pt_root_id\"] != syn_df[\"post_pt_root_id\"]]\n", - "syn_df = syn_df[syn_df[\"post_pt_root_id\"] != 0]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "This synapse table comes with neurotransmitter prediction from the work of Eckstein et al.. Please review the description in the metadata to understand the caveats of this data with regards your analysis. Here, we just look at the mean of the probablities of all outgoing synapses which shows that this neuron's neurotransmitter is very likely acetylcholine." - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "gaba 0.032069\n", - "ach 0.835793\n", - "glut 0.041500\n", - "oct 0.005178\n", - "ser 0.021450\n", - "da 0.064010\n", - "dtype: float64" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "np.mean(syn_df[[\"gaba\", \"ach\", \"glut\", \"oct\", \"ser\", \"da\"]])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Here we take a brief look at the postsynaptic partners and sorting them by number of synapses; displaying the top 10:" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[(720575940612001489, 96),\n", - " (720575940639811469, 92),\n", - " (720575940606297353, 88),\n", - " (720575940615361748, 86),\n", - " (720575940621893127, 84),\n", - " (720575940621301738, 80),\n", - " (720575940626312778, 73),\n", - " (720575940625492753, 71),\n", - " (720575940623903434, 65),\n", - " (720575940618489734, 64)]" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "u_post_root_ids, c_post_root_ids = np.unique(syn_df[\"post_pt_root_id\"], return_counts=True)\n", - "\n", - "sorting = np.argsort(c_post_root_ids)[::-1][:10]\n", - "list(zip(u_post_root_ids[sorting], c_post_root_ids[sorting]))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The main target is an AMMC-A1 (720575940613535430) which is a connection we described in Figure 6 in the FlyWire paper.\n", - "\n", - "We can further restrict the query by filtering the postsynaptic targets. For instance this query will only return the synapses between the these two root ids." - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idvalidpre_pt_supervoxel_idpre_pt_root_idpost_pt_supervoxel_idpost_pt_root_idconnection_scorecleft_scoregabaachglutoctserdavalid_ntpre_pt_positionpost_pt_position
0211365569t778322294428728497205759406271975667783222944288322972057594061200148952.1613541000.0507980.7061450.1039630.0208560.0070260.111212t[418808, 286736, 110840][418760, 286616, 110880]
1217890780t7769142323542439572057594062719756677691423235425756720575940612001489102.6923981450.0043960.9700690.0008830.0008400.0001480.023665t[412236, 283056, 121640][412208, 283148, 121680]
26180240t7776117363789426272057594062719756677761173637888123720575940612001489227.3899081580.0118520.9664360.0054910.0021030.0005800.013538t[414292, 245008, 144520][414160, 245016, 144520]
351374154t7769128572911625872057594062719756677691285729100925720575940612001489111.8180471320.0083700.7050350.0092020.0000050.2721420.005246t[409856, 274840, 93040][409984, 274868, 93040]
4237434942t7783202328476592872057594062719756677832023284771485720575940612001489161.7754671410.0237190.8907570.0137780.0022280.0195190.049999t[417900, 272952, 119880][417844, 273044, 119880]
......................................................
121236131087t777615858211215627205759406271975667776158582112034972057594061200148988.2543261160.0071500.9587110.0013550.0161790.0004080.016197t[414620, 270192, 119680][414520, 270216, 119680]
122217890770t776914232354058597205759406271975667769142323542575672057594061200148965.5305021450.0034930.9760930.0001200.0034060.0000330.016854t[412216, 283052, 121600][412128, 283092, 121640]
123147115465t778320232847301757205759406271975667783202328471615072057594061200148913.4544041420.0140810.7485650.0015520.0001990.0034850.232117t[419512, 274800, 118480][419480, 274656, 118480]
124221946020t77832160723343355720575940627197566778321607233463467205759406120014891038.6652831430.0483050.8954620.0239940.0069700.0012130.024056t[417700, 282552, 108800][417580, 282568, 108800]
125237435004t778320232847785367205759406271975667783202328479201672057594061200148991.1949011140.0561940.5601030.0660580.0143660.0592270.244052t[419064, 274020, 120360][419164, 273968, 120400]
\n", - "

96 rows × 17 columns

\n", - "
" - ], - "text/plain": [ - " id valid pre_pt_supervoxel_id pre_pt_root_id \\\n", - "0 211365569 t 77832229442872849 720575940627197566 \n", - "1 217890780 t 77691423235424395 720575940627197566 \n", - "2 6180240 t 77761173637894262 720575940627197566 \n", - "3 51374154 t 77691285729116258 720575940627197566 \n", - "4 237434942 t 77832023284765928 720575940627197566 \n", - ".. ... ... ... ... \n", - "121 236131087 t 77761585821121562 720575940627197566 \n", - "122 217890770 t 77691423235405859 720575940627197566 \n", - "123 147115465 t 77832023284730175 720575940627197566 \n", - "124 221946020 t 77832160723343355 720575940627197566 \n", - "125 237435004 t 77832023284778536 720575940627197566 \n", - "\n", - " post_pt_supervoxel_id post_pt_root_id connection_score cleft_score \\\n", - "0 77832229442883229 720575940612001489 52.161354 100 \n", - "1 77691423235425756 720575940612001489 102.692398 145 \n", - "2 77761173637888123 720575940612001489 227.389908 158 \n", - "3 77691285729100925 720575940612001489 111.818047 132 \n", - "4 77832023284771485 720575940612001489 161.775467 141 \n", - ".. ... ... ... ... \n", - "121 77761585821120349 720575940612001489 88.254326 116 \n", - "122 77691423235425756 720575940612001489 65.530502 145 \n", - "123 77832023284716150 720575940612001489 13.454404 142 \n", - "124 77832160723346346 720575940612001489 1038.665283 143 \n", - "125 77832023284792016 720575940612001489 91.194901 114 \n", - "\n", - " gaba ach glut oct ser da valid_nt \\\n", - "0 0.050798 0.706145 0.103963 0.020856 0.007026 0.111212 t \n", - "1 0.004396 0.970069 0.000883 0.000840 0.000148 0.023665 t \n", - "2 0.011852 0.966436 0.005491 0.002103 0.000580 0.013538 t \n", - "3 0.008370 0.705035 0.009202 0.000005 0.272142 0.005246 t \n", - "4 0.023719 0.890757 0.013778 0.002228 0.019519 0.049999 t \n", - ".. ... ... ... ... ... ... ... \n", - "121 0.007150 0.958711 0.001355 0.016179 0.000408 0.016197 t \n", - "122 0.003493 0.976093 0.000120 0.003406 0.000033 0.016854 t \n", - "123 0.014081 0.748565 0.001552 0.000199 0.003485 0.232117 t \n", - "124 0.048305 0.895462 0.023994 0.006970 0.001213 0.024056 t \n", - "125 0.056194 0.560103 0.066058 0.014366 0.059227 0.244052 t \n", - "\n", - " pre_pt_position post_pt_position \n", - "0 [418808, 286736, 110840] [418760, 286616, 110880] \n", - "1 [412236, 283056, 121640] [412208, 283148, 121680] \n", - "2 [414292, 245008, 144520] [414160, 245016, 144520] \n", - "3 [409856, 274840, 93040] [409984, 274868, 93040] \n", - "4 [417900, 272952, 119880] [417844, 273044, 119880] \n", - ".. ... ... \n", - "121 [414620, 270192, 119680] [414520, 270216, 119680] \n", - "122 [412216, 283052, 121600] [412128, 283092, 121640] \n", - "123 [419512, 274800, 118480] [419480, 274656, 118480] \n", - "124 [417700, 282552, 108800] [417580, 282568, 108800] \n", - "125 [419064, 274020, 120360] [419164, 273968, 120400] \n", - "\n", - "[96 rows x 17 columns]" - ] - }, - "execution_count": 18, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "syn_df = client.materialize.query_table(synapse_table, \n", - " filter_in_dict={\"pre_pt_root_id\": [720575940627197566],\n", - " \"post_pt_root_id\": [720575940612001489]})\n", - "syn_df = syn_df[syn_df[\"cleft_score\"] >= 50]\n", - "\n", - "syn_df" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## \"Live\" Materialization Queries" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Before using live materializations, please make sure that your installation of the caveclient is `>= 3.1.0`. You can upgrade your installed version with \n", - "\n", - "\n", - "```\n", - "pip install caveclient --upgrade\n", - "```\n", - "\n", - "To make sure the latest version of the library is used in this notebook after an upgrade it is best to reload the notebook kernel. Your current version is:" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'3.1.0'" - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import caveclient\n", - "caveclient.__version__" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\"Live\" materializations allow one to run queries without adhering to versions. This is useful when recent proofreading edits should be reflected in the analysis. Live materializations require a timestamp for which the query should be executed. " - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [], - "source": [ - "timestamp_now = datetime.datetime.utcnow()" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "CPU times: user 70.7 ms, sys: 1.2 ms, total: 71.9 ms\n", - "Wall time: 1.97 s\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idvalidpre_pt_supervoxel_idpre_pt_root_idpost_pt_supervoxel_idpost_pt_root_idconnection_scorecleft_scoregabaachglutoctserdavalid_ntpre_pt_positionpost_pt_position
043917406t847277480312717757205759406371861268472774803126458472057594062644510063.0223961240.0103900.9502720.0021320.0083743.866737e-040.028445t[820188, 250388, 153800][820212, 250440, 153840]
161506994t833203045623195727205759406371861268332030456232864972057594061908231754.0873871390.0003890.9971380.0000020.0017079.331072e-070.000762t[740408, 246192, 190520][740380, 246032, 190560]
2232489725t8409442940095157172057594063718612684094429400956510720575940590374582270.4783941370.0640270.3563400.5156140.0041066.949618e-030.052963t[782552, 248476, 180040][782508, 248392, 180080]
3232945196t8409442940097174572057594063718612684024060656788915720575940583849998100.0803381440.0218850.8132480.0286370.0064347.459946e-030.122336t[782036, 250656, 180240][781968, 250780, 180200]
4239559768t840944294008285047205759406371861268409442940082728072057594062643153231.9911501460.0875190.1234520.7446830.0029572.260582e-020.018783t[783352, 250516, 176320][783288, 250484, 176280]
......................................................
1807232945305t840944294009626177205759406371861268409442940099021972057594063343220651.2444991110.0065960.9465920.0037570.0341908.990807e-050.008774t[782376, 251864, 180800][782468, 251780, 180840]
1808232948742t840240606568032757205759406371861268402406065679947872057594062776509911.450621670.0329240.8986810.0321870.0015791.981975e-030.032648t[780720, 250520, 180640][780840, 250488, 180640]
1810239559737t8409442940083211772057594063718612684094429400820474720575940620162332275.8130801420.0108460.9396840.0262590.0201277.136991e-050.003012t[784380, 249888, 176080][784280, 249884, 176040]
1811239559595t8409442940080089772057594063718612684094429400805747720575940627076396241.3193051420.0020150.9880750.0004350.0076642.856799e-060.001808t[784652, 251304, 175600][784708, 251216, 175600]
1812242611511t845870105430158117205759406371861268458701054301708472057594049862041016.303911580.1907400.4917270.1504360.0096352.533201e-020.132131t[811568, 250932, 156880][811472, 250896, 156880]
\n", - "

1422 rows × 17 columns

\n", - "
" - ], - "text/plain": [ - " id valid pre_pt_supervoxel_id pre_pt_root_id \\\n", - "0 43917406 t 84727748031271775 720575940637186126 \n", - "1 61506994 t 83320304562319572 720575940637186126 \n", - "2 232489725 t 84094429400951571 720575940637186126 \n", - "3 232945196 t 84094429400971745 720575940637186126 \n", - "4 239559768 t 84094429400828504 720575940637186126 \n", - "... ... ... ... ... \n", - "1807 232945305 t 84094429400962617 720575940637186126 \n", - "1808 232948742 t 84024060656803275 720575940637186126 \n", - "1810 239559737 t 84094429400832117 720575940637186126 \n", - "1811 239559595 t 84094429400800897 720575940637186126 \n", - "1812 242611511 t 84587010543015811 720575940637186126 \n", - "\n", - " post_pt_supervoxel_id post_pt_root_id connection_score \\\n", - "0 84727748031264584 720575940626445100 63.022396 \n", - "1 83320304562328649 720575940619082317 54.087387 \n", - "2 84094429400956510 720575940590374582 270.478394 \n", - "3 84024060656788915 720575940583849998 100.080338 \n", - "4 84094429400827280 720575940626431532 31.991150 \n", - "... ... ... ... \n", - "1807 84094429400990219 720575940633432206 51.244499 \n", - "1808 84024060656799478 720575940627765099 11.450621 \n", - "1810 84094429400820474 720575940620162332 275.813080 \n", - "1811 84094429400805747 720575940627076396 241.319305 \n", - "1812 84587010543017084 720575940498620410 16.303911 \n", - "\n", - " cleft_score gaba ach glut oct ser \\\n", - "0 124 0.010390 0.950272 0.002132 0.008374 3.866737e-04 \n", - "1 139 0.000389 0.997138 0.000002 0.001707 9.331072e-07 \n", - "2 137 0.064027 0.356340 0.515614 0.004106 6.949618e-03 \n", - "3 144 0.021885 0.813248 0.028637 0.006434 7.459946e-03 \n", - "4 146 0.087519 0.123452 0.744683 0.002957 2.260582e-02 \n", - "... ... ... ... ... ... ... \n", - "1807 111 0.006596 0.946592 0.003757 0.034190 8.990807e-05 \n", - "1808 67 0.032924 0.898681 0.032187 0.001579 1.981975e-03 \n", - "1810 142 0.010846 0.939684 0.026259 0.020127 7.136991e-05 \n", - "1811 142 0.002015 0.988075 0.000435 0.007664 2.856799e-06 \n", - "1812 58 0.190740 0.491727 0.150436 0.009635 2.533201e-02 \n", - "\n", - " da valid_nt pre_pt_position post_pt_position \n", - "0 0.028445 t [820188, 250388, 153800] [820212, 250440, 153840] \n", - "1 0.000762 t [740408, 246192, 190520] [740380, 246032, 190560] \n", - "2 0.052963 t [782552, 248476, 180040] [782508, 248392, 180080] \n", - "3 0.122336 t [782036, 250656, 180240] [781968, 250780, 180200] \n", - "4 0.018783 t [783352, 250516, 176320] [783288, 250484, 176280] \n", - "... ... ... ... ... \n", - "1807 0.008774 t [782376, 251864, 180800] [782468, 251780, 180840] \n", - "1808 0.032648 t [780720, 250520, 180640] [780840, 250488, 180640] \n", - "1810 0.003012 t [784380, 249888, 176080] [784280, 249884, 176040] \n", - "1811 0.001808 t [784652, 251304, 175600] [784708, 251216, 175600] \n", - "1812 0.132131 t [811568, 250932, 156880] [811472, 250896, 156880] \n", - "\n", - "[1422 rows x 17 columns]" - ] - }, - "execution_count": 21, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "%%time \n", - "\n", - "# Code to retrieve a root id that will work with this query. See the next section for more details\n", - "latest_roots = client.chunkedgraph.get_latest_roots(720575940627185911, timestamp_future=timestamp_now)\n", - "latest_roots\n", - "\n", - "syn_df = client.materialize.live_query(synapse_table, \n", - " filter_in_dict={\"pre_pt_root_id\": [latest_roots[0]]},\n", - " timestamp=timestamp_now)\n", - "\n", - "syn_df = syn_df[syn_df[\"cleft_score\"] >= 50]\n", - "\n", - "syn_df" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "If the root id is incompatible with the timestamp, an error is raised:" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": {}, - "outputs": [ - { - "ename": "ValueError", - "evalue": "Timestamp incompatible with IDs: [720575940627185911] are expired, use chunkedgraph client to find valid ID(s)", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n", - "\u001b[0;32m~/CAVEclient/caveclient/materializationengine.py\u001b[0m in \u001b[0;36mlive_query\u001b[0;34m(self, table, timestamp, filter_in_dict, filter_out_dict, filter_equal_dict, filter_spatial, join_args, select_columns, offset, limit, datastack_name, split_positions, post_filter)\u001b[0m\n\u001b[1;32m 867\u001b[0m \u001b[0;31m# most recent materialization\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 868\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mTimeIt\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"map_filters\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 869\u001b[0;31m past_filters, future_map = self.map_filters(\n\u001b[0m\u001b[1;32m 870\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mfilter_in_dict\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfilter_out_dict\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfilter_equal_dict\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 871\u001b[0m \u001b[0mtimestamp\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/CAVEclient/caveclient/materializationengine.py\u001b[0m in \u001b[0;36mmap_filters\u001b[0;34m(self, filters, timestamp, timestamp_past)\u001b[0m\n\u001b[1;32m 685\u001b[0m \u001b[0mtoo_recent_str\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 686\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 687\u001b[0;31m raise ValueError(\n\u001b[0m\u001b[1;32m 688\u001b[0m \u001b[0;34mf\"Timestamp incompatible with IDs: {too_old_str}{too_recent_str}use chunkedgraph client to find valid ID(s)\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 689\u001b[0m )\n", - "\u001b[0;31mValueError\u001b[0m: Timestamp incompatible with IDs: [720575940627185911] are expired, use chunkedgraph client to find valid ID(s)" - ] - } - ], - "source": [ - "%%time \n", - "\n", - "syn_df = client.materialize.live_query(synapse_table, \n", - " filter_in_dict={\"pre_pt_root_id\": [720575940627185911]},\n", - " timestamp=timestamp_now)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Retrieving matching root ids\n", - "\n", - "Neuroglancer shows the most recent version of the segmentation by default. Neurons that have been updated since a materialized version are not included in a table of that version. To reconcile this, users need to look up root ids for their data with a timestamp. \n", - "\n", - "We generally recommend storing annotations as points in space as these can be mapped to root ids easily (that's basically what materialization is). Soon, users will be able to create their own annotation tables and CAVE will provide fitting root ids automatically. Still, use cases will arrive that require a manual materialization by the user." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Programmatically/Manually - Root id history\n", - "\n", - "The client interface can be used to query the \"lineage\" of a root id. This contains all ancestors and successors in time and can be restricted with timestamps in the past and future. The lineage can be retrieved as networkx graph:" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 23, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "client.chunkedgraph.get_lineage_graph(720575940627185911, as_nx_graph=True)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Based on the lineage graph the latest root ids can be retrieved:" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([720575940627197566])" - ] - }, - "execution_count": 24, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "latest_roots = client.chunkedgraph.get_latest_roots(720575940627197566, timestamp_future=timestamp_now)\n", - "latest_roots" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "As there can be multiple successors for a given ID (because of splits) the user will have to determine which of these matches the neuron of interest.\n", - "\n", - "The client also enables the retrieval of the original root ids that contributed to a given neuron:" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([720575940611736976, 720575940619683443, 720575940502382601,\n", - " 720575940502373129, 720575940502373897, 720575940502377225,\n", - " 720575940519142108, 720575940519144924, 720575940519141596,\n", - " 720575940519144156, 720575940502370057, 720575940519126492,\n", - " 720575940519126748, 720575940519130332, 720575940519131356,\n", - " 720575940519134684, 720575940618400560, 720575940615477396,\n", - " 720575940618047811, 720575940601614877, 720575940630717493])" - ] - }, - "execution_count": 25, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "original_roots = client.chunkedgraph.get_original_roots(720575940627197566)\n", - "original_roots" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Programmatically - Spatial lookup\n", - "\n", - "The client interface allows users to query a root id for a given supervoxel id (see Section 5 in [the related tutorial](https://github.com/seung-lab/CAVEclient/blob/master/CAVEclientExamples.ipynb). Supervoxel ids can be retrieved from the segmentation using [cloudvolume](https://github.com/seung-lab/cloud-volume/)." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Neuroglancer\n", - "\n", - "The segmentation layer has an option under the tab \"graph\" to lock a layer to a specific timestamps. Then, root ids are looked up with this specific timestamp (proofreading is not possible in this mode). Be aware that this mode does not prevent the pasting of root ids from different timestamps into the layer as that circumvents the lookup to the server." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Timestamps\n", - "\n", - "Timestamps are _always_ UTC. \n", - "\n", - "Please be aware that the package or browser you are using might format timestamps in your local timezone. The timestamp for all annotation tables within a materialization are the same:" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'valid': None,\n", - " 'time_stamp': datetime.datetime(2021, 3, 10, 19, 7, 56, 375440, tzinfo=datetime.timezone.utc),\n", - " 'id': 3,\n", - " 'version': 15,\n", - " 'datastack': 'flywire_fafb_production',\n", - " 'expires_on': datetime.datetime(2021, 4, 9, 19, 7, 56, 375440, tzinfo=datetime.timezone.utc)}" - ] - }, - "execution_count": 26, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "client.materialize.get_version_metadata(15)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Creating neuroglancer links programmatically\n", - "\n", - "We are building infrastructure into neuroglancer to display this information there while browsing neurons. Until this is ready, the most convenient way to visualize this information in neuroglancer is to programmatically create neuroglancer state and to upload them to the state server. The links can then be distributed. \n", - "\n", - "[NeuroglancerAnnotationUI (nglui)](https://github.com/seung-lab/NeuroglancerAnnotationUI) makes programmatic creation of neuroglancer states convenient. The [statebuilder examples](https://github.com/seung-lab/NeuroglancerAnnotationUI/blob/master/examples/statebuilder_examples.ipynb) shows how one can directly from dataframes as the one above to neuroglancer states. The [related tutorial on this client](https://github.com/seung-lab/CAVEclient/blob/master/CAVEclientExamples.ipynb) shows under \"4. JSON Service\" how this client can be used to upload states to the server and to create neuroglancer links.\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Further references\n", - "\n", - "\n", - "More examples for the usage of CAVE can be found in a related project:\n", - "\n", - "https://github.com/AllenInstitute/MicronsBinder\n", - "\n", - "A rough overview of the structure of our backend services can be found here:\n", - "\n", - "https://github.com/seung-lab/AnnotationPipelineOverview" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Credit\n", - "\n", - "CAVE is developed at Princeton University and the Allen Institute for Brain Science within the iarpa MICrONs project and the FlyWire project. Main contributors to the design and backend development \n", - "are Derrick Brittain, Forrest Collman, Sven Dorkenwald, Chris Jordan, Casey Schneider-Mizell\n", - "\n", - "A citable publication is in the works. Please contact us if you are interested in using CAVE on another dataset. " - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.5" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..496b7492 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2023, Ben Pedigo + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 00000000..b674b5b3 --- /dev/null +++ b/README.md @@ -0,0 +1,18 @@ +# CAVEclient + +This repository supplies client side code to interact with microservices +in the Connectome Annotation Versioning Engine (CAVE). + +## Installation + +Can be installed from pypi + +`pip install caveclient` + +## Documentation + +You can find full documentation on readthedocs (https://caveclient.readthedocs.io). + +## Usage examples + +Tutorial notebook for accessing the FlyWire Connectome dataset: https://github.com/seung-lab/FlyConnectome/blob/main/CAVE%20tutorial.ipynb diff --git a/README.rst b/README.rst index 7f20499a..4397320d 100644 --- a/README.rst +++ b/README.rst @@ -9,3 +9,18 @@ CAVEclient ########################### This repository supplies client side code to interact with microservices in the Connectome Annotation Versioning Engine (CAVE). + +Installation +########################### +Can be installed from pypi +:: + + pip install caveclient + +Documentation +############# +You can find full documentation on readthedocs (https://caveclient.readthedocs.io). + +Usage examples +############## +- Tutorial notebook for accessing the FlyWire Connectome dataset: https://github.com/seung-lab/FlyConnectome/blob/main/CAVE%20tutorial.ipynb diff --git a/caveclient/__init__.py b/caveclient/__init__.py index d986b75b..b6e4d7fd 100644 --- a/caveclient/__init__.py +++ b/caveclient/__init__.py @@ -1,3 +1,3 @@ -__version__ = "5.11.0" +__version__ = "5.14.0" from .frameworkclient import CAVEclient diff --git a/caveclient/auth.py b/caveclient/auth.py index 6d0704db..a3d0bfa0 100644 --- a/caveclient/auth.py +++ b/caveclient/auth.py @@ -1,14 +1,15 @@ +import json +import logging +import os +import urllib +import webbrowser + +import requests + from .base import ( handle_response, ) -import urllib from .endpoints import auth_endpoints_v1, default_global_server_address -import os -import webbrowser -import requests -import json -import logging -import time logger = logging.getLogger(__name__) @@ -23,7 +24,6 @@ def write_token(token, filepath, key, overwrite=True): - if os.path.exists(filepath): with open(filepath, "r") as f: secrets = json.load(f) @@ -45,26 +45,6 @@ def write_token(token, filepath, key, overwrite=True): class AuthClient(object): - """Client to find and use auth tokens to access the dynamic annotation framework services. - - Parameters - ---------- - token_file : str, optional - Path to a JSON key:value file holding your auth token. - By default, "~/.cloudvolume/secrets/cave-secret.json" - (will check deprecated token name "chunkedgraph-secret.json" as well) - token_key : str, optional - Key for the token in the token_file. - By default, "token" - - token : str or None, optional - Direct entry of the token as a string. If provided, overrides the files. - If None, attempts to use the file paths. - - server_address : str, optional, - URL to the auth server. By default, uses a default server address. - """ - def __init__( self, token_file=None, @@ -72,6 +52,25 @@ def __init__( token=None, server_address=default_global_server_address, ): + """Client to find and use auth tokens to access the dynamic annotation framework services. + + Parameters + ---------- + token_file : str, optional + Path to a JSON key:value file holding your auth token. + By default, "~/.cloudvolume/secrets/cave-secret.json" + (will check deprecated token name "chunkedgraph-secret.json" as well) + token_key : str, optional + Key for the token in the token_file. + By default, "token" + + token : str or None, optional + Direct entry of the token as a string. If provided, overrides the files. + If None, attempts to use the file paths. + + server_address : str, optional, + URL to the auth server. By default, uses a default server address. + """ if token_file is None: server = urllib.parse.urlparse(server_address).netloc server_file = server + "-cave-secret.json" diff --git a/caveclient/chunkedgraph.py b/caveclient/chunkedgraph.py index 25a28dae..f0fe2d22 100644 --- a/caveclient/chunkedgraph.py +++ b/caveclient/chunkedgraph.py @@ -1,28 +1,27 @@ """PyChunkedgraph service python interface""" -from typing import Iterable -from urllib.parse import urlencode import datetime import json +import logging +from typing import Iterable, Tuple, Union +from urllib.parse import urlencode + +import networkx as nx import numpy as np import pandas as pd import pytz + +from .auth import AuthClient +from .base import BaseEncoder, ClientBase, _api_endpoints, handle_response from .endpoints import ( chunkedgraph_api_versions, chunkedgraph_endpoints_common, default_global_server_address, ) -from .base import ( - _api_endpoints, - ClientBase, - BaseEncoder, - handle_response, -) -from .auth import AuthClient -import networkx as nx - SERVER_KEY = "cg_server_address" +logger = logging.getLogger(__name__) + def package_bounds(bounds): if bounds.shape != (3, 2): @@ -82,12 +81,12 @@ def root_id_int_list_check( root_id, make_unique=False, ): - if isinstance(root_id, int) or isinstance(root_id, np.uint64) or isinstance(root_id, np.int64): + if isinstance(root_id, (int, np.uint64, np.int64)): root_id = [root_id] elif isinstance(root_id, str): try: root_id = np.uint64(root_id) - except ValueError as esc: + except ValueError: raise ValueError( "When passing a string for 'root_id' make sure the string can be converted to a uint64" ) @@ -198,25 +197,26 @@ def _process_timestamp(self, timestamp): if self._default_timestamp is not None: return self._default_timestamp else: - return datetime.datetime.utcnow() + return datetime.datetime.now(datetime.timezone.utc) else: return timestamp - def get_roots(self, supervoxel_ids, timestamp=None, stop_layer=None): - """Get the root id for a specified supervoxel + def get_roots(self, supervoxel_ids, timestamp=None, stop_layer=None) -> np.ndarray: + """Get the root ID for a list of supervoxels. Parameters ---------- - supervoxel_ids : np.array(np.uint64) - Supervoxel ids values + supervoxel_ids : list or np.array of int + Supervoxel IDs to look up. timestamp : datetime.datetime, optional - UTC datetime to specify the state of the chunkedgraph at which to query, by default None. If None, uses the current time. + UTC datetime to specify the state of the chunkedgraph at which to query, by + default None. If None, uses the current time. stop_layer : int or None, optional - If True, looks up ids only up to a given stop layer. Default is None. + If True, looks up IDs only up to a given stop layer. Default is None. Returns ------- - np.array(np.uint64) + np.array of np.uint64 Root IDs containing each supervoxel. """ @@ -230,19 +230,20 @@ def get_roots(self, supervoxel_ids, timestamp=None, stop_layer=None): handle_response(response, as_json=False) return np.frombuffer(response.content, dtype=np.uint64) - def get_root_id(self, supervoxel_id, timestamp=None, level2=False): - """Get the root id for a specified supervoxel + def get_root_id(self, supervoxel_id, timestamp=None, level2=False) -> np.int64: + """Get the root ID for a specified supervoxel. Parameters ---------- - supervoxel_id : np.uint64 + supervoxel_id : int Supervoxel id value timestamp : datetime.datetime, optional - UTC datetime to specify the state of the chunkedgraph at which to query, by default None. If None, uses the current time. + UTC datetime to specify the state of the chunkedgraph at which to query, by + default None. If None, uses the current time. Returns ------- - np.uint64 + np.int64 Root ID containing the supervoxel. """ endpoint_mapping = self.default_url_mapping @@ -256,13 +257,13 @@ def get_root_id(self, supervoxel_id, timestamp=None, level2=False): response = self.session.get(url, params=query_d) return np.int64(handle_response(response, as_json=True)["root_id"]) - def get_merge_log(self, root_id): - """Get the merge log (splits and merges) for an object + def get_merge_log(self, root_id) -> list: + """Get the merge log (splits and merges) for an object. Parameters ---------- - root_id : np.uint64 - Object root id to look up + root_id : int + Object root ID to look up. Returns ------- @@ -276,18 +277,36 @@ def get_merge_log(self, root_id): response = self.session.get(url) return handle_response(response) - def get_change_log(self, root_id, filtered=True): - """Get the change log (splits and merges) for an object + def get_change_log(self, root_id, filtered=True) -> dict: + """Get the change log (splits and merges) for an object. Parameters ---------- - root_id : np.uint64 - Object root id to look up + root_id : int + Object root ID to look up. + filtered : bool + Whether to filter the change log to only include splits and merges which + affect the final state of the object (`filtered=True`), as opposed to + including edit history for objects which as some point were split from + the query object `root_id` (`filtered=False`). Defaults to True. Returns ------- - list - List of split and merge events in the object history + dict + Dictionary summarizing split and merge events in the object history, + containing the following keys: + + "n_merges": int + Number of merges + "n_splits": int + Number of splits + "operations_ids": list of int + Identifiers for each operation + "past_ids": list of int + Previous root ids for this object + "user_info": dict of dict + Dictionary keyed by user (string) to a dictionary specifying how many + merges and splits that user performed on this object """ endpoint_mapping = self.default_url_mapping endpoint_mapping["root_id"] = root_id @@ -303,15 +322,36 @@ def get_user_operations( timestamp_start: datetime.datetime, include_undo: bool = True, timestamp_end: datetime.datetime = None, - ): - """get operation details for a user_id + ) -> pd.DataFrame: + """ + Get operation details for a user ID. Currently, this is only available to + admins. + + + Parameters + ---------- + user_id : int + User ID to query (use 0 for all users (admin only)). + timestamp_start : datetime.datetime, optional + Timestamp to start filter (UTC). + include_undo : bool, optional + Whether to include undos. Defaults to True. + timestamp_end : datetime.datetime, optional + Timestamp to end filter (UTC). Defaults to now. - Args: - user_id (int): userID to query (use 0 for all users [admin only]) - timestamp_start (datetime.datetime, optional): timestamp to start filter (UTC). - include_undo (bool, optional): whether to include undos. Defaults to True. - timestamp_end (datetime.datetime, optional): timestamp to end filter (UTC). Defaults to now. + Returns + ------- + pd.DataFrame + DataFrame including the following columns: + + "operation_id": int + Identifier for the operation. + "timestamp": datetime.datetime + Timestamp of the operation. + "user_id": int + User who performed the operation. """ + endpoint_mapping = self.default_url_mapping url = self._endpoints["user_operations"].format_map(endpoint_mapping) @@ -338,17 +378,44 @@ def get_user_operations( ) return df - def get_tabular_change_log(self, root_ids, filtered=True): - """Get a detailed changelog for neurons + def get_tabular_change_log(self, root_ids, filtered=True) -> dict: + """Get a detailed changelog for neurons. Parameters ---------- - root_ids : list of np.uint64 - Object root ids to look up + root_ids : list of int + Object root IDs to look up. + filtered : bool + Whether to filter the change log to only include splits and merges which + affect the final state of the object (`filtered=True`), as opposed to + including edit history for objects which as some point were split from + the query objects in `root_ids` (`filtered=False`). Defaults to True. Returns ------- - dict of dataframe + dict of pd.DataFrame + The keys are the root IDs, and the values are DataFrames with the + following columns and datatypes: + + "operation_id": int + Identifier for the operation. + "timestamp": int + Timestamp of the operation, provided in *milliseconds*. To convert to + datetime, use ``datetime.datetime.utcfromtimestamp(timestamp/1000)``. + "user_id": int + User who performed the operation. + "before_root_ids: list of int + Root IDs of objects that existed before the operation. + "after_root_ids: list of int + Root IDs of objects created by the operation. Note that this only + records the root id that was kept as part of the query object, so there + will only be one in this list. + "is_merge": bool + Whether the operation was a merge. + "user_name": str + Name of the user who performed the operation. + "user_affiliation": str + Affiliation of the user who performed the operation. """ root_ids = [int(r) for r in np.unique(root_ids)] @@ -367,24 +434,24 @@ def get_tabular_change_log(self, root_ids, filtered=True): return changelog_dict - def get_leaves(self, root_id, bounds=None, stop_layer: int = None): - """Get all supervoxels for a root_id + def get_leaves(self, root_id, bounds=None, stop_layer: int = None) -> np.ndarray: + """Get all supervoxels for a root ID. Parameters ---------- - root_id : np.uint64 - Root id to query + root_id : int + Root ID to query. bounds: np.array or None, optional - If specified, returns supervoxels within a 3x2 numpy array of bounds [[minx,maxx],[miny,maxy],[minz,maxz]] - If None, finds all supervoxels. + If specified, returns supervoxels within a 3x2 numpy array of bounds + ``[[minx,maxx],[miny,maxy],[minz,maxz]]``. If None, finds all supervoxels. stop_layer: int, optional - If specified, returns chunkedgraph nodes at layer =stop_layer - default will be stop_layer=1 (supervoxels) + If specified, returns chunkedgraph nodes at layer `stop_layer` + default will be `stop_layer=1` (supervoxels). Returns ------- - list - List of supervoxel ids (or nodeids if stop_layer>1) + np.array of np.int64 + Array of supervoxel IDs (or node ids if `stop_layer>1`). """ endpoint_mapping = self.default_url_mapping endpoint_mapping["root_id"] = root_id @@ -397,14 +464,19 @@ def get_leaves(self, root_id, bounds=None, stop_layer: int = None): response = self.session.get(url, params=query_d) return np.int64(handle_response(response)["leaf_ids"]) - def do_merge(self, supervoxels, coords, resolution=(4, 4, 40)): - """Perform a merge on the chunkeded graph + def do_merge(self, supervoxels, coords, resolution=(4, 4, 40)) -> None: + """Perform a merge on the chunked graph. - Args: - supervoxels (iterable): a N long list of supervoxels to merge - coords (np.array): a Nx3 array of coordinates of the supervoxels in units of resolution - resolution (tuple, optional): what to multiple the coords by to get nm. Defaults to (4,4,40). + Parameters + ---------- + supervoxels : iterable + An N-long list of supervoxels to merge. + coords : np.array + An Nx3 array of coordinates of the supervoxels in units of `resolution`. + resolution : tuple, optional + What to multiply `coords` by to get nanometers. Defaults to (4,4,40). """ + endpoint_mapping = self.default_url_mapping url = self._endpoints["do_merge"].format_map(endpoint_mapping) @@ -422,12 +494,19 @@ def do_merge(self, supervoxels, coords, resolution=(4, 4, 40)): ) handle_response(response) - def undo_operation(self, operation_id): - """Undo an operation + def undo_operation(self, operation_id) -> dict: + """Undo an operation. - Args: - operation_id (int): operation id to undo + Parameters + ---------- + operation_id : int + Operation ID to undo. + + Returns + ------- + dict """ + # TODO clarify what the return is here endpoint_mapping = self.default_url_mapping url = self._endpoints["undo"].format_map(endpoint_mapping) @@ -449,7 +528,7 @@ def execute_split( root_id, source_supervoxels=None, sink_supervoxels=None, - ): + ) -> Tuple[int, list]: """Execute a multicut split based on points or supervoxels. Parameters @@ -459,18 +538,22 @@ def execute_split( sink_points : array or list Mx3 list or array of 3d points in nm coordinates for sink points (blue). root_id : int - root id of object to do split preview. + Root ID of object to do split preview. source_supervoxels : array, list or None, optional - If providing source supervoxels, an N-length array of supervoxel ids or Nones matched to source points. If None, treats as a full array of Nones. By default None + If providing source supervoxels, an N-length array of supervoxel IDs or + Nones matched to source points. If None, treats as a full array of Nones. + By default None. sink_supervoxels : array, list or None, optional - If providing sink supervoxels, an M-length array of supervoxel ids or Nones matched to source points. If None, treats as a full array of Nones. By default None + If providing sink supervoxels, an M-length array of supervoxel IDs or Nones + matched to source points. If None, treats as a full array of Nones. + By default None. Returns ------- - operation_id - Unique id of the split operation - new_root_ids - List of new root ids resulting from the split operation. + operation_id : int + Unique ID of the split operation + new_root_ids : list of int + List of new root IDs resulting from the split operation. """ endpoint_mapping = self.default_url_mapping url = self._endpoints["execute_split"].format_map(endpoint_mapping) @@ -496,7 +579,7 @@ def preview_split( source_supervoxels=None, sink_supervoxels=None, return_additional_ccs=False, - ): + ) -> Tuple[list, list, bool, list]: """Get supervoxel connected components from a preview multicut split. Parameters @@ -506,24 +589,31 @@ def preview_split( sink_points : array or list Mx3 list or array of 3d points in nm coordinates for sink points (blue). root_id : int - root id of object to do split preview. + Root ID of object to do split preview. source_supervoxels : array, list or None, optional - If providing source supervoxels, an N-length array of supervoxel ids or Nones matched to source points. If None, treats as a full array of Nones. By default None + If providing source supervoxels, an N-length array of supervoxel IDs or + Nones matched to source points. If None, treats as a full array of Nones. + By default None. sink_supervoxels : array, list or None, optional - If providing sink supervoxels, an M-length array of supervoxel ids or Nones matched to source points. If None, treats as a full array of Nones. By default None + If providing sink supervoxels, an M-length array of supervoxel IDs or Nones + matched to source points. If None, treats as a full array of Nones. + By default None. return_additional_ccs : bool, optional - If True, returns any additional connected components beyond the ones with source and sink points. In most situations, this can be ignored. By default, False. + If True, returns any additional connected components beyond the ones with + source and sink points. In most situations, this can be ignored. + By default, False. Returns ------- - source_connected_component - List of supervoxel ids in the component with the most source points. - sink_connected_component - List of supervoxel ids in the component with the most sink points. - successful_split - Boolean value that is True if the split worked. - other_connected_components (optional) - List of lists of supervoxel ids for any other resulting connected components. Only returned if `return_additional_ccs` is True. + source_connected_component : list + Supervoxel IDs in the component with the most source points. + sink_connected_component : list + Supervoxel IDs in the component with the most sink points. + successful_split : bool + True if the split worked. + other_connected_components (optional) : list of lists of int + List of lists of supervoxel IDs for any other resulting connected components. + Only returned if `return_additional_ccs` is True. """ endpoint_mapping = self.default_url_mapping url = self._endpoints["preview_split"].format_map(endpoint_mapping) @@ -552,18 +642,18 @@ def preview_split( else: return source_cc, sink_cc, success - def get_children(self, node_id): - """Get the children of a node in the hierarchy + def get_children(self, node_id) -> np.ndarray: + """Get the children of a node in the chunked graph hierarchy. Parameters ---------- - node_id : np.uint64 - Node id to query + node_id : int + Node ID to query. Returns ------- - list - List of np.uint64 ids of child nodes. + np.array of np.int64 + IDs of child nodes. """ endpoint_mapping = self.default_url_mapping endpoint_mapping["root_id"] = node_id @@ -572,17 +662,19 @@ def get_children(self, node_id): response = self.session.get(url) return np.array(handle_response(response)["children_ids"], dtype=np.int64) - def get_contact_sites(self, root_id, bounds, calc_partners=False): - """Get contacts for a root id + def get_contact_sites(self, root_id, bounds, calc_partners=False) -> dict: + """Get contacts for a root ID. Parameters ---------- - root_id : np.uint64 - Object root id + root_id : int + Root ID to query. bounds: np.array - Bounds within a 3x2 numpy array of bounds [[minx,maxx],[miny,maxy],[minz,maxz]] for which to find contacts. Running this query without bounds is too slow. + Bounds within a 3x2 numpy array of bounds + ``[[minx,maxx],[miny,maxy],[minz,maxz]]`` for which to find contacts. + Running this query without bounds is too slow. calc_partners : bool, optional - If True, get partner root ids. By default, False. + If True, get partner root IDs. By default, False. Returns ------- dict @@ -599,20 +691,35 @@ def get_contact_sites(self, root_id, bounds, calc_partners=False): contact_d = handle_response(response) return {int(k): v for k, v in contact_d.items()} - def find_path(self, root_id, src_pt, dst_pt, precision_mode=False): - """find a path between two locations on a root_id using the supervoxel lvl2 graph. + def find_path( + self, root_id, src_pt, dst_pt, precision_mode=False + ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: + """ + Find a path between two locations on a root ID using the level 2 chunked + graph. - Args: - root_id (np.int64): the root id to search on - src_pt (np.array): len(3) xyz location of the start location in nm - dst_pt ([type]): len(3) xyz location of the end location in nm - precision_mode (bool, optional): Whether to perform the search in precision mode. Defaults to False. - Returns: - centroids_list: centroids - l2_path: l2_path - failed_l2_ids: failed_l2_ids + Parameters + ---------- + root_id : int + Root ID to query. + src_pt : np.array + 3-element array of xyz coordinates in nm for the source point. + dst_pt : np.array + 3-element array of xyz coordinates in nm for the destination point. + precision_mode : bool, optional + Whether to perform the search in precision mode. Defaults to False. + + Returns + ------- + centroids_list : np.array + Array of centroids along the path. + l2_path : np.array of int + Array of level 2 chunk IDs along the path. + failed_l2_ids : np.array of int + Array of level 2 chunk IDs that failed to find a path. """ + endpoint_mapping = self.default_url_mapping endpoint_mapping["root_id"] = root_id url = self._endpoints["find_path"].format_map(endpoint_mapping) @@ -634,13 +741,28 @@ def find_path(self, root_id, src_pt, dst_pt, precision_mode=False): return centroids, l2_path, failed_l2_ids - def get_subgraph(self, root_id, bounds): - """Get subgraph of root id within a bounding box + def get_subgraph( + self, root_id, bounds + ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: + """Get subgraph of root id within a bounding box. - Args: - root_id ([int64]): root (or seg_id/node_id) of chunkedgraph to query - bounds ([np.array]): 3x2 bounding box (x,y,z)x (min,max) in chunkedgraph coordinates + Parameters + ---------- + root_id : int + Root (or any node ID) of chunked graph to query. + bounds : np.array + 3x2 bounding box (x,y,z) x (min,max) in chunked graph coordinates. + + Returns + ------- + np.array of np.int64 + Node IDs in the subgraph. + np.array of np.double + Affinities of edges in the subgraph. + np.array of np.int32 + Areas of nodes in the subgraph. """ + endpoint_mapping = self.default_url_mapping endpoint_mapping["root_id"] = root_id url = self._endpoints["get_subgraph"].format_map(endpoint_mapping) @@ -652,8 +774,10 @@ def get_subgraph(self, root_id, bounds): rd = handle_response(response) return np.int64(rd["nodes"]), np.double(rd["affinities"]), np.int32(rd["areas"]) - def level2_chunk_graph(self, root_id): - """Get graph of level 2 chunks, the smallest agglomeration level above supervoxels. + def level2_chunk_graph(self, root_id) -> list: + """ + Get graph of level 2 chunks, the smallest agglomeration level above supervoxels. + Parameters ---------- @@ -662,8 +786,9 @@ def level2_chunk_graph(self, root_id): Returns ------- - edge_list : list - Edge array of level 2 ids + list of list + Edge list for level 2 chunked graph. Each element of the list is an edge, + and each edge is a list of two node IDs (source and target). """ endpoint_mapping = self.default_url_mapping endpoint_mapping["root_id"] = root_id @@ -671,13 +796,13 @@ def level2_chunk_graph(self, root_id): r = handle_response(self.session.get(url)) return r["edge_graph"] - def remesh_level2_chunks(self, chunk_ids): + def remesh_level2_chunks(self, chunk_ids) -> None: """Submit specific level 2 chunks to be remeshed in case of a problem. Parameters ---------- chunk_ids : list - List of level 2 chunk ids. + List of level 2 chunk IDs. """ endpoint_mapping = self.default_url_mapping @@ -686,15 +811,41 @@ def remesh_level2_chunks(self, chunk_ids): r = self.session.post(url, json=data) r.raise_for_status() - def get_operation_details(self, operation_ids: Iterable[int]): - """get the details of a list of operations + def get_operation_details(self, operation_ids: Iterable[int]) -> dict: + """Get the details of a list of operations. - Args: - operation_ids (Iterable[int]): list of operation IDss + Parameters + ---------- + operation_ids: Iterable of int + List/array of operation IDs. - Returns: - dict: a dict of dictss of operation info, keys are operationids - values are a dictionary of operation info for the operation + Returns + ------- + dict of str to dict + A dict of dicts of operation info, keys are operation IDs (as strings), + values are a dictionary of operation info for the operation. These + dictionaries contain the following keys: + + "added_edges"/"removed_edges": list of list of int + List of edges added (if a merge) or removed (if a split) by this + operation. Each edge is a list of two supervoxel IDs (source and + target). + "roots": list of int + List of root IDs that were created by this operation. + "sink_coords": list of list of int + List of sink coordinates for this operation. The sink is one of the + points placed by the user when specifying the operation. Each sink + coordinate is a list of three integers (x, y, z), corresponding to + spatial coordinates in segmentation voxel space. + "source_coords": list of list of int + List of source coordinates for this operation. The source is one of the + points placed by the user when specifying the operation. Each source + coordinate is a list of three integers (x, y, z), corresponding to + spatial coordinates in segmentation voxel space. + "timestamp": str + Timestamp of the operation. + "user": str + User ID number who performed the operation (as a string). """ if isinstance(operation_ids, np.ndarray): operation_ids = operation_ids.tolist() @@ -709,25 +860,65 @@ def get_operation_details(self, operation_ids: Iterable[int]): return r.json() def get_lineage_graph( - self, root_id, timestamp_past=None, timestamp_future=None, as_nx_graph=False - ): - """Returns the lineage graph for a root id, optionally cut off in the past or the future. + self, + root_id, + timestamp_past=None, + timestamp_future=None, + as_nx_graph=False, + exclude_links_to_future=False, + exclude_links_to_past=False, + ) -> Union[dict, nx.DiGraph]: + """ + Returns the lineage graph for a root ID, optionally cut off in the past or + the future. + + Each change in the chunked graph creates a new root ID for the object after + that change. This function returns a graph of all root IDs for a given object, + tracing the history of the object in terms of merges and splits. + Parameters ---------- root_id : int - Object root id + Object root ID. timestamp_past : datetime.datetime or None, optional Cutoff for the lineage graph backwards in time. By default, None. timestamp_future : datetime.datetime or None, optional Cutoff for the lineage graph going forwards in time. By default, None. as_nx_graph: bool - if True, a networkx graph is returned + If True, a NetworkX graph is returned. + exclude_links_to_future: bool + If True, links from nodes before `timestamp_future` to after + `timestamp_future` are removed. If False, the link(s) which has one node + before timestamp and one node after timestamp is kept. + exclude_links_to_past: bool + If True, links from nodes before `timestamp_past` to after `timestamp_past` + are removed. If False, the link(s) which has one node before timestamp and + one node after timestamp is kept. Returns ------- dict - Dictionary describing the lineage graph and operations for the root id. + Dictionary describing the lineage graph and operations for the root ID. Not + returned if `as_nx_graph` is True. The dictionary contains the following + keys: + + "directed" : bool + Whether the graph is directed. + "graph" : dict + Dictionary of graph attributes. + "links" : list of dict + Each element of the list is a dictionary describing an edge in the + lineage graph as "source" and "target" keys. + "multigraph" : bool + Whether the graph is a multigraph. + "nodes" : list of dict + Each element of the list is a dictionary describing a node in the + lineage graph, usually with "id", "timestamp", and "operation_id" + keys. + nx.DiGraph + NetworkX directed graph of the lineage graph. Only returned if `as_nx_graph` + is True. """ root_id = root_id_int_list_check(root_id, make_unique=True) @@ -743,56 +934,114 @@ def get_lineage_graph( data = json.dumps({"root_ids": root_id}, cls=BaseEncoder) r = handle_response(self.session.post(url, data=data, params=params)) + if exclude_links_to_future or exclude_links_to_past: + bad_ids = [] + for node in r["nodes"]: + node_ts = datetime.datetime.fromtimestamp(node["timestamp"]) + node_ts = node_ts.astimezone(datetime.timezone.utc) + if ( + exclude_links_to_past and (node_ts < timestamp_past) + if timestamp_past is not None + else False + ): + bad_ids.append(node["id"]) + if ( + exclude_links_to_future and (node_ts > timestamp_future) + if timestamp_future is not None + else False + ): + bad_ids.append(node["id"]) + + r["nodes"] = [node for node in r["nodes"] if node["id"] not in bad_ids] + r["links"] = [ + link + for link in r["links"] + if link["source"] not in bad_ids and link["target"] not in bad_ids + ] + if as_nx_graph: return nx.node_link_graph(r) else: return r - def get_latest_roots(self, root_id, timestamp_future=None): - """Returns root ids that are the latest successors of a given root id. + def get_latest_roots( + self, root_id, timestamp=None, timestamp_future=None + ) -> np.ndarray: + """ + Returns root IDs that are related to the given `root_id` at a given + timestamp. Can be used to find the "latest" root IDs associated with an object. + Parameters ---------- root_id : int - Object root id + Object root ID. + timestamp : datetime.datetime or None, optional + Timestamp of where to query IDs from. If None then will assume you want + till now. timestamp_future : datetime.datetime or None, optional - Cutoff for the search going forwards in time. By default, None. + DEPRECATED name, use `timestamp` instead. + Timestamp to suggest IDs from (note can be in the past relative to the + root). By default, None. Returns ------- np.ndarray - 1d array with all latest successors + 1d array with all latest successors. """ root_id = root_id_int_list_check(root_id, make_unique=True) - timestamp_past = self.get_root_timestamps(root_id).min() - - lineage_graph = self.get_lineage_graph( - root_id, - timestamp_past=timestamp_past, - timestamp_future=timestamp_future, - as_nx_graph=True, - ) + timestamp_root = self.get_root_timestamps(root_id).min() + if timestamp_future is not None: + logger.warning("timestamp_future is deprecated, use timestamp instead") + timestamp = timestamp_future - out_degree_dict = dict(lineage_graph.out_degree) - nodes = np.array(list(out_degree_dict.keys())) - out_degrees = np.array(list(out_degree_dict.values())) - return nodes[out_degrees == 0] + if timestamp is None: + timestamp = datetime.datetime.now(datetime.timezone.utc) + elif timestamp.tzinfo is None: + timestamp = timestamp.replace(tzinfo=datetime.timezone.utc) + + # or if timestamp_root is less than timestamp_future + if (timestamp is None) or (timestamp_root < timestamp): + lineage_graph = self.get_lineage_graph( + root_id, + timestamp_past=timestamp_root, + timestamp_future=timestamp, + exclude_links_to_future=True, + as_nx_graph=True, + ) + # then we want the leaves of the tree + out_degree_dict = dict(lineage_graph.out_degree) + nodes = np.array(list(out_degree_dict.keys())) + out_degrees = np.array(list(out_degree_dict.values())) + return nodes[out_degrees == 0] + else: + # then timestamp is in fact in the past + lineage_graph = self.get_lineage_graph( + root_id, + timestamp_future=timestamp_root, + timestamp_past=timestamp, + as_nx_graph=True, + ) + in_degree_dict = dict(lineage_graph.in_degree) + nodes = np.array(list(in_degree_dict.keys())) + in_degrees = np.array(list(in_degree_dict.values())) + return nodes[in_degrees == 0] - def get_original_roots(self, root_id, timestamp_past=None): - """Returns root ids that are the latest successors of a given root id. + def get_original_roots(self, root_id, timestamp_past=None) -> np.ndarray: + """Returns root IDs that are the latest successors of a given root ID. Parameters ---------- root_id : int - Object root id + Object root ID. timestamp_past : datetime.datetime or None, optional Cutoff for the search going backwards in time. By default, None. Returns ------- np.ndarray - 1d array with all latest successors + 1d array with all latest successors. """ root_id = root_id_int_list_check(root_id, make_unique=True) @@ -810,16 +1059,21 @@ def get_original_roots(self, root_id, timestamp_past=None): in_degrees = np.array(list(in_degree_dict.values())) return nodes[in_degrees == 0] - def is_latest_roots(self, root_ids, timestamp=None): - """Check whether these root_ids are still a root at this timestamp + def is_latest_roots(self, root_ids, timestamp=None) -> np.ndarray: + """Check whether these root IDs are still a root at this timestamp. Parameters ---------- - root_ids ([type]): root ids to check - timestamp (datetime.dateime, optional): timestamp to check whether these IDs are valid root_ids. Defaults to None (assumes now). + root_ids : list or array of int + Root IDs to check. + timestamp : datetime.datetime, optional + Timestamp to check whether these IDs are valid root IDs in the chunked + graph. Defaults to None (assumes now). - Returns: - np.array[bool]: boolean array of whether these are valid root_ids + Returns + ------- + np.array of bool + Array of whether these are valid root IDs. """ root_ids = root_id_int_list_check(root_ids, make_unique=False) @@ -848,28 +1102,36 @@ def suggest_latest_roots( return_all=False, return_fraction_overlap=False, ): - """Suggest latest roots for a given root id, based on overlap of component chunk ids. - Note that edits change chunk ids, and so this effectively measures the fraction of unchanged chunks - at a given chunk layer, which sets the size scale of chunks. Higher layers are coarser. + """ + Suggest latest roots for a given root id, based on overlap of component + chunk IDs. Note that edits change chunk IDs, and so this effectively measures + the fraction of unchanged chunks at a given chunk layer, which sets the size + scale of chunks. Higher layers are coarser. + Parameters ---------- - root_id : int64 - Root id of the potentially outdated object. + root_id : int + Root ID of the potentially outdated object. timestamp : datetime, optional - Datetime at which "latest" roots are being computed, by default None. If None, the current time is used. - Note that this has to be a timestamp after the creation of the root_id. + Datetime at which "latest" roots are being computed, by default None. If + None, the current time is used. Note that this has to be a timestamp after + the creation of the `root_id`. stop_layer : int, optional Chunk level at which to compute overlap, by default None. - No value will take the 4th from the top layer, which emphasizes speed and works well for larger objects. - Lower values are slower but more fine-grained. - Values under 2 (i.e. supervoxels) are not recommended except in extremely fine grained scenarios. + No value will take the 4th from the top layer, which emphasizes speed and + works well for larger objects. Lower values are slower but more + fine-grained. Values under 2 (i.e. supervoxels) are not recommended except + in extremely fine grained scenarios. return_all : bool, optional - If True, return all current ids sorted from most overlap to least, by default False. If False, only the top is returned. + If True, return all current IDs sorted from most overlap to least, by + default False. If False, only the top is returned. return_fraction_overlap : bool, optional - If True, return all fractions sorted by most overlap to least, by default False. If False, only the topmost value is returned. + If True, return all fractions sorted by most overlap to least, by default + False. If False, only the top value is returned. """ - curr_ids = self.get_latest_roots(root_id, timestamp_future=timestamp) + curr_ids = self.get_latest_roots(root_id, timestamp=timestamp) + if root_id in curr_ids: if return_all: if return_fraction_overlap: @@ -884,10 +1146,21 @@ def suggest_latest_roots( delta_layers = 4 if stop_layer is None: - stop_layer = self.segmentation_info.get("graph", {}).get("n_layers", 6) - delta_layers + stop_layer = ( + self.segmentation_info.get("graph", {}).get("n_layers", 6) + - delta_layers + ) stop_layer = max(1, stop_layer) - + chunks_orig = self.get_leaves(root_id, stop_layer=stop_layer) + while len(chunks_orig) == 0: + stop_layer -= 1 + if stop_layer == 1: + raise ValueError( + f"There were no children for root_id={root_id} at level 2, something is wrong with the chunkedgraph" + ) + chunks_orig = self.get_leaves(root_id, stop_layer=stop_layer) + chunk_list = np.array( [ len( @@ -909,21 +1182,31 @@ def suggest_latest_roots( else: return curr_ids[order] - def is_valid_nodes(self, node_ids, start_timestamp=None, end_timestamp=None): - """Check whether nodes are valid for given timestamp range + def is_valid_nodes( + self, node_ids, start_timestamp=None, end_timestamp=None + ) -> np.ndarray: + """Check whether nodes are valid for given timestamp range. - Valid is defined as existing in the chunkedgraph. This makes no statement + Valid is defined as existing in the chunked graph. This makes no statement about these IDs being roots, supervoxel or anything in-between. It also - does not take into account whether a root id has since been edited. + does not take into account whether a root ID has since been edited. + Parameters ---------- - node ids ([type]): node ids to check - start_timestamp (datetime.dateime, optional): timestamp to check whether these IDs were valid after this timestamp. Defaults to None (assumes now). - end_timestamp (datetime.dateime, optional): timestamp to check whether these IDs were valid before this timestamp. Defaults to None (assumes now). + node_ids : list or array of int + Node IDs to check. + start_timestamp : datetime.datetime, optional + Timestamp to check whether these IDs were valid after this timestamp. + Defaults to None (assumes now). + end_timestamp : datetime.datetime, optional + Timestamp to check whether these IDs were valid before this timestamp. + Defaults to None (assumes now). - Returns: - np.array[np.Boolean]: boolean array of whether these are valid IDs + Returns + ------- + np.array of bool + Array of whether these are valid IDs. """ node_ids = root_id_int_list_check(node_ids, make_unique=False) @@ -960,17 +1243,18 @@ def is_valid_nodes(self, node_ids, start_timestamp=None, end_timestamp=None): return np.isin(node_ids, valid_ids) - def get_root_timestamps(self, root_ids): + def get_root_timestamps(self, root_ids) -> np.ndarray: """Retrieves timestamps when roots where created. Parameters ---------- - root_ids: Iterable, - Iterable of seed root ids. + root_ids: Iterable of int + Iterable of root IDs to query. Returns ------- - + np.array of datetime.datetime + Array of timestamps when `root_ids` were created. """ root_ids = root_id_int_list_check(root_ids, make_unique=False) @@ -986,23 +1270,30 @@ def get_root_timestamps(self, root_ids): [datetime.datetime.fromtimestamp(ts, pytz.UTC) for ts in r["timestamp"]] ) - def get_past_ids(self, root_ids, timestamp_past=None, timestamp_future=None): - """For a set of root ids, get the list of ids at a past or future time point that could contain parts of the same object. + def get_past_ids( + self, root_ids, timestamp_past=None, timestamp_future=None + ) -> dict: + """ + For a set of root IDs, get the list of IDs at a past or future time point + that could contain parts of the same object. + Parameters ---------- - root_ids: Iterable, - Iterable of seed root ids. + root_ids : Iterable of int + Iterable of root IDs to query. timestamp_past : datetime.datetime or None, optional Time of a point in the past for which to look up root ids. Default is None. timestamp_future : datetime.datetime or None, optional - Time of a point in the future for which to look up root ids. Not implemented on the server currently. Default is None. + Time of a point in the future for which to look up root ids. Not + implemented on the server currently. Default is None. Returns ------- dict - Dict with keys `future_id_map` and `past_id_map`. Each is a dict whose keys are the supplied root ids and whose values - are the list of related root ids at the past/future time stamp. + Dict with keys "future_id_map" and "past_id_map". Each is a dict whose keys + are the supplied `root_ids` and whose values are the list of related + root IDs at `timestamp_past`/`timestamp_future`. """ root_ids = root_id_int_list_check(root_ids, make_unique=True) @@ -1035,19 +1326,31 @@ def get_past_ids(self, root_ids, timestamp_past=None, timestamp_future=None): def get_delta_roots( self, timestamp_past: datetime.datetime, - timestamp_future: datetime.datetime = datetime.datetime.utcnow(), - ): - """get the list of roots that have changed between timetamp_past and timestamp_future + timestamp_future: datetime.datetime = datetime.datetime.now( + datetime.timezone.utc + ), + ) -> Tuple[np.ndarray, np.ndarray]: + """ + Get the list of roots that have changed between `timetamp_past` and + `timestamp_future`. - Args: - timestamp_past (datetime.datetime): past timepoint to query - timestamp_future (datetime.datetime, optional): future timepoint to query. Defaults to datetime.datetime.utcnow(). + Parameters + ---------- + timestamp_past : datetime.datetime + Past timepoint to query + timestamp_future : datetime.datetime, optional + Future timepoint to query. Defaults to + ``datetime.datetime.now(datetime.timezone.utc)``. - Returns: - old_roots (np.ndarray): roots that have expired in that interval - new_roots (np.ndarray): roots that are new in that interval + Returns + ------- + old_roots : np.ndarray of np.int64 + Roots that have expired in that interval. + new_roots : np.ndarray of np.int64 + Roots that are new in that interval. """ + endpoint_mapping = self.default_url_mapping params = package_timestamp(timestamp_past, name="timestamp_past") params.update(package_timestamp(timestamp_future, name="timestamp_future")) @@ -1056,13 +1359,13 @@ def get_delta_roots( r = handle_response(self.session.get(url, params=params)) return np.array(r["old_roots"]), np.array(r["new_roots"]) - def get_oldest_timestamp(self): - """Get the oldest timestamp in the database + def get_oldest_timestamp(self) -> datetime.datetime: + """Get the oldest timestamp in the database. Returns ------- datetime.datetime - Oldest timestamp in the database + Oldest timestamp in the database. """ endpoint_mapping = self.default_url_mapping url = self._endpoints["oldest_timestamp"].format_map(endpoint_mapping) diff --git a/caveclient/endpoints.py b/caveclient/endpoints.py index e3e6bd0f..d31c85ba 100644 --- a/caveclient/endpoints.py +++ b/caveclient/endpoints.py @@ -61,14 +61,14 @@ "join_query": mat_v3_api + "/datastack/{datastack_name}/version/{version}/query", "table_count": mat_v2_api + "/datastack/{datastack_name}/version/{version}/table/{table_name}/count", - "versions": mat_v2_api + "/datastack/{datastack_name}/versions", - "version_metadata": mat_v2_api + "/datastack/{datastack_name}/version/{version}", + "versions": mat_v3_api + "/datastack/{datastack_name}/versions", + "version_metadata": mat_v3_api + "/datastack/{datastack_name}/version/{version}", "tables": mat_v2_api + "/datastack/{datastack_name}/version/{version}/tables", "metadata": mat_v3_api + "/datastack/{datastack_name}/version/{version}/table/{table_name}/metadata", "all_tables_metadata": mat_v3_api + "/datastack/{datastack_name}/version/{version}/tables/metadata", - "versions_metadata": mat_v2_api + "/datastack/{datastack_name}/metadata", + "versions_metadata": mat_v3_api + "/datastack/{datastack_name}/metadata", "ingest_annotation_table": mat_v2_api + "/materialize/run/ingest_annotations/datastack/{datastack_name}/{table_name}", "segmentation_metadata": mat_v3_api @@ -85,6 +85,8 @@ + "/datastack/{datastack_name}/version/{version}/views/{view_name}/schema", "view_schemas": mat_v3_api + "/datastack/{datastack_name}/version/{version}/views/schemas", + "unique_string_values": mat_v3_api + + "/datastack/{datastack_name}/table/{table_name}/unique_string_values", } materialization_api_versions = { @@ -260,6 +262,17 @@ l2cache_endpoints_v1 = { "l2cache_data": l2cache_v1 + "/table/{table_id}/attributes", "l2cache_meta": l2cache_v1 + "/attribute_metadata", + "l2cache_table_mapping": l2cache_v1 + "/table_mapping", } l2cache_api_versions = {1: l2cache_endpoints_v1} + +# ------------------------------- +# ------ Neuroglancer endpoints +# ------------------------------- + +fallback_ngl_endpoint = "https://neuroglancer.neuvue.io/" +ngl_endpoints_common = { + 'get_info': "{ngl_url}/version.json", + 'fallback_ngl_url': fallback_ngl_endpoint, +} \ No newline at end of file diff --git a/caveclient/jsonservice.py b/caveclient/jsonservice.py index 94c8e451..3ea66418 100644 --- a/caveclient/jsonservice.py +++ b/caveclient/jsonservice.py @@ -10,14 +10,33 @@ jsonservice_common, jsonservice_api_versions, default_global_server_address, + ngl_endpoints_common, ) +import os import requests +import numpy as np +import numbers import json import re server_key = "json_server_address" +def neuroglancer_json_encoder(obj): + """JSON encoder for neuroglancer states. + Differs from normal in that it expresses ints as strings""" + if isinstance(obj, numbers.Integral): + return str(obj) + if isinstance(obj, np.integer): + return str(obj) + elif isinstance(obj, np.floating): + return float(obj) + elif isinstance(obj, np.ndarray): + return list(obj) + elif isinstance(obj, (set, frozenset)): + return list(obj) + raise TypeError + def JSONService( server_address=None, auth_client=None, @@ -118,6 +137,36 @@ def ngl_url(self): def ngl_url(self, new_ngl_url): self._ngl_url = new_ngl_url + def get_neuroglancer_info(self, ngl_url=None): + """Get the info field from a Neuroglancer deployment + + Parameters + ---------- + ngl_url : str (optional) + URL to a Neuroglancer deployment. + If None, defaults to the value for the datastack or the client. + + Returns + ------- + dict + JSON-formatted info field from the Neuroglancer deployment + """ + if ngl_url is None: + ngl_url = self.ngl_url + + url_mapping = self.default_url_mapping + url_mapping["ngl_url"] = ngl_url + url = ngl_endpoints_common.get('get_info').format_map(url_mapping) + response = self.session.get(url) + # Not all neuroglancer deployments have a version.json, + # so return empty if not found rather than throw error. + if response.status_code == 404: + return {} + + handle_response(response, as_json=False) + return json.loads(response.content) + + def get_state_json(self, state_id): """Download a Neuroglancer JSON state @@ -144,7 +193,7 @@ def upload_state_json(self, json_state, state_id=None, timestamp=None): Parameters ---------- json_state : dict - JSON-formatted Neuroglancer state + Dict representation of a neuroglancer state state_id : int ID of a JSON state uploaded to the state service. Using a state_id is an admin feature. @@ -165,15 +214,48 @@ def upload_state_json(self, json_state, state_id=None, timestamp=None): url_mapping["state_id"] = state_id url = self._endpoints["upload_state_w_id"].format_map(url_mapping) - response = self.session.post(url, data=json.dumps(json_state, cls=BaseEncoder)) + response = self.session.post( + url, + data=json.dumps( + json_state, + default=neuroglancer_json_encoder, + ) + ) handle_response(response, as_json=False) response_re = re.search(".*\/(\d+)", str(response.content)) return int(response_re.groups()[0]) - def build_neuroglancer_url(self, state_id, ngl_url=None): + def save_state_json_local(self, json_state, filename, overwrite=False): + """Save a Neuroglancer JSON state to a JSON file locally. + + Parameters + ---------- + json_state : dict + Dict representation of a neuroglancer state + filename : str + Filename to save the state to + overwrite : bool + Whether to overwrite the file if it exists. Default False. + + Returns + ------- + None + """ + if os.path.exists(filename) and not overwrite: + raise ValueError("File exists and overwrite is False") + with open(filename, "w") as f: + json.dump(json_state, f, default=neuroglancer_json_encoder) + + def build_neuroglancer_url( + self, + state_id, + ngl_url=None, + target_site=None, + static_url=False, + ): """Build a URL for a Neuroglancer deployment that will automatically retrieve specified state. If the datastack is specified, this is prepopulated from the info file field "viewer_site". - If no ngl_url is specified in either the function or the client, only the JSON state url is returned. + If no ngl_url is specified in either the function or the client, a fallback neuroglancer deployment is used. Parameters ---------- @@ -181,7 +263,13 @@ def build_neuroglancer_url(self, state_id, ngl_url=None): State id to retrieve ngl_url : str Base url of a neuroglancer deployment. If None, defaults to the value for the datastack or the client. - If no value is found, only the URL to the JSON state is returned. + As a fallback, a default deployment is used. + target_site : 'seunglab' or 'cave-explorer' or 'mainline' or None + Set this to 'seunglab' for a seunglab deployment, or either 'cave-explorer'/'mainline' for a google main branch deployment. + If None, checks the info field of the neuroglancer endpoint to determine which to use. + Default is None. + static_url : bool + If True, treats "state_id" as a static URL directly to the JSON and does not use the state service. Returns ------- @@ -189,19 +277,41 @@ def build_neuroglancer_url(self, state_id, ngl_url=None): The full URL requested """ if ngl_url is None: - ngl_url = self.ngl_url - if ngl_url is None: - ngl_url = "" - parameter_text = "" - elif ngl_url[-1] == "/": - parameter_text = "?json_url=" + if self.ngl_url is not None: + ngl_url = self.ngl_url + else: + ngl_url = ngl_endpoints_common['fallback_ngl_url'] + + if target_site is None and ngl_url is not None: + ngl_info = self.get_neuroglancer_info(ngl_url) + if len(ngl_info) > 0: + target_site = 'cave-explorer' + else: + target_site = "seunglab" + + if target_site == "seunglab": + if ngl_url[-1] == "/": + parameter_text = "?json_url=" + else: + parameter_text = "/?json_url=" + auth_text = "" + elif target_site == "cave-explorer" or target_site == "mainline": + if ngl_url[-1] == "/": + parameter_text = "#!" + else: + parameter_text = "/#!" + auth_text = "middleauth+" else: - parameter_text = "/?json_url=" + target_site_error = "A specified target_site must be one of 'seunglab', 'cave-explorer' or 'mainline'" + raise ValueError(target_site_error) - url_mapping = self.default_url_mapping - url_mapping["state_id"] = state_id - get_state_url = self._endpoints["get_state"].format_map(url_mapping) - url = ngl_url + parameter_text + get_state_url + if static_url: + url = ngl_url + parameter_text + state_id + else: + url_mapping = self.default_url_mapping + url_mapping["state_id"] = state_id + get_state_url = self._endpoints["get_state"].format_map(url_mapping) + url = ngl_url + parameter_text + auth_text + get_state_url return url diff --git a/caveclient/l2cache.py b/caveclient/l2cache.py index 10a74703..0a77a86a 100644 --- a/caveclient/l2cache.py +++ b/caveclient/l2cache.py @@ -1,10 +1,15 @@ -from .base import ClientBase, _api_endpoints, handle_response, BaseEncoder +import json +from urllib.parse import urlparse +from warnings import warn + +from requests.exceptions import HTTPError + +from .auth import AuthClient +from .base import BaseEncoder, ClientBase, _api_endpoints, handle_response from .endpoints import ( l2cache_api_versions, l2cache_endpoints_common, ) -from .auth import AuthClient -import json server_key = "l2cache_server_address" @@ -83,16 +88,21 @@ def default_url_mapping(self): return self._default_url_mapping.copy() def get_l2data(self, l2_ids, attributes=None): - """Gets the attributed statistics data for L2 ids. + """ + Gets the attributed statistics data for L2 ids. - Args: - l2_ids (list or np.ndarray): a list of level 2 ids - attributes (list, optional): a list of attributes to retrieve. - Defaults to None which will return all that are available. - Available stats are ['area_nm2', 'chunk_intersect_count', 'max_dt_nm', 'mean_dt_nm', 'pca', 'pca_val', 'rep_coord_nm', 'size_nm3']. See docs for more description. + Parameters + ---------- + l2_ids : list or np.ndarray + a list of level 2 ids + attributes : list, optional + a list of attributes to retrieve. Defaults to None which will return all that are available. + Available stats are ['area_nm2', 'chunk_intersect_count', 'max_dt_nm', 'mean_dt_nm', 'pca', 'pca_val', 'rep_coord_nm', 'size_nm3']. See docs for more description. - Returns: - dict: keys are l2 ids, values are data + Returns + ------- + dict + keys are l2 ids, values are data """ query_d = {"int64_as_str": False} @@ -135,6 +145,50 @@ def attributes(self): self._available_attributes = list(self.cache_metadata().keys()) return self._available_attributes + def table_mapping(self): + """Retrieves table mappings for l2 cache. + Parameters + ---------- + + Returns + ------- + dict + keys are pcg table names, values are dicts with fields `l2cache_id` and `cv_path`. + """ + endpoint_mapping = self.default_url_mapping + url = self._endpoints["l2cache_table_mapping"].format_map(endpoint_mapping) + response = self.session.get(url) + return handle_response(response) + + def has_cache(self, datastack_name=None): + """Checks if the l2 cache is available for the dataset + + Parameters + ---------- + datastack_name : str, optional + The name of the datastack to check, by default None (if None, uses the client's datastack) + + Returns + ------- + bool + True if the l2 cache is available, False otherwise + """ + seg_source = self.fc.info.segmentation_source(datastack_name=datastack_name) + if urlparse(seg_source).scheme != "graphene": + return False + table_name = self.fc.chunkedgraph.table_name + try: + table_mapping = self.table_mapping() + except HTTPError as e: + if e.response.status_code == 404: + warn( + f"L2cache deployment '{self.server_address}/l2cache' does not have a l2 cache table mapping. Assuming no cache." + ) + return False + else: + raise e + return table_name in table_mapping + client_mapping = { 1: L2CacheClientLegacy, diff --git a/caveclient/materializationengine.py b/caveclient/materializationengine.py index c4f53bbf..a30f2ef8 100644 --- a/caveclient/materializationengine.py +++ b/caveclient/materializationengine.py @@ -1,27 +1,29 @@ +import itertools +import json +import logging import re -from urllib.error import HTTPError import warnings -import pytz -import pandas as pd -from IPython.display import HTML -from .mytimer import MyTimeIt -from typing import Union, Iterable -import itertools -import pyarrow as pa from datetime import datetime, timezone +from typing import Iterable, Optional, Union +from urllib.error import HTTPError + import numpy as np -import json -from .endpoints import materialization_api_versions, materialization_common +import pandas as pd +import pyarrow as pa +import pytz +from cachetools import TTLCache, cached +from IPython.display import HTML + from .auth import AuthClient from .base import ( - ClientBase, BaseEncoder, + ClientBase, _api_endpoints, handle_response, ) -from cachetools import cached, TTLCache +from .endpoints import materialization_api_versions, materialization_common +from .mytimer import MyTimeIt from .tools.table_manager import TableManager, ViewManager -import logging logger = logging.getLogger(__name__) @@ -29,6 +31,7 @@ DEFAULT_COMPRESSION = "zstd" + def deserialize_query_response(response): """Deserialize pyarrow responses""" content_type = response.headers.get("Content-Type") @@ -110,7 +113,7 @@ def concatenate_position_columns(df, inplace=False): def convert_timestamp(ts: datetime): if ts == "now": - ts = datetime.utcnow() + ts = datetime.now(timezone.utc) if isinstance(ts, datetime): if ts.tzinfo is None: @@ -279,30 +282,49 @@ def version(self, x): else: raise ValueError("Version not in materialized database") - def most_recent_version(self, datastack_name=None): - """get the most recent version of materialization - for this datastack name + def most_recent_version(self, datastack_name=None) -> np.int: + """ + Get the most recent version of materialization for this datastack name - Args: - datastack_name (str, optional): datastack name to find most - recent materialization of. + Parameters + ---------- + datastack_name : str or None, optional + Name of the datastack, by default None. If None, uses the one specified in the client. + Will be set correctly if you are using the framework_client + + Returns + ------- + np.int + Most recent version of materialization for this datastack name """ + versions = self.get_versions(datastack_name=datastack_name) return np.max(np.array(versions)) - def get_versions(self, datastack_name=None): - """get versions available + def get_versions(self, datastack_name=None, expired=False): + """Get the versions available - Args: - datastack_name ([type], optional): [description]. Defaults to None. + Parameters + ---------- + datastack_name : str or None, optional + Name of the datastack, by default None. + If None, uses the one specified in the client. + expired : bool, optional + Whether to include expired versions, by default False. + + Returns + ------- + dict + Dictionary of versions available """ if datastack_name is None: datastack_name = self.datastack_name endpoint_mapping = self.default_url_mapping endpoint_mapping["datastack_name"] = datastack_name url = self._endpoints["versions"].format_map(endpoint_mapping) - response = self.session.get(url) + query_args = {"expired": expired} + response = self.session.get(url, params=query_args) self.raise_for_status(response) return response.json() @@ -327,8 +349,9 @@ def get_tables(self, datastack_name=None, version=None): Name of the datastack, by default None. If None, uses the one specified in the client. Will be set correctly if you are using the framework_client - version: int or None, optional + version : int or None, optional the version to query, else get the tables in the most recent version + Returns ------- list @@ -349,21 +372,6 @@ def get_tables(self, datastack_name=None, version=None): return response.json() def get_annotation_count(self, table_name: str, datastack_name=None, version=None): - """Get number of annotations in a table - - Parameters - ---------- - table_name (str): - name of table to mark for deletion - datastack_name: str or None, optional, - Name of the datastack_name. If None, uses the one specified in the client. - version: int or None, optional - the version to query, else get the tables in the most recent version - Returns - ------- - int - number of annotations - """ if datastack_name is None: datastack_name = self.datastack_name if version is None: @@ -380,11 +388,19 @@ def get_annotation_count(self, table_name: str, datastack_name=None, version=Non return response.json() def get_version_metadata(self, version: int = None, datastack_name: str = None): - """get metadata about a version + """Get metadata about a version - Args: - version (int, optional): version number to get metadata about. Defaults to client default version. - datastack_name (str, optional): datastack to query. Defaults to client default datastack. + Parameters + ---------- + version : int or None, optional + Materialization version, by default None. If None, defaults to the value set in the client. + datastack_name : str or None, optional + Datastack name, by default None. If None, defaults to the value set in the client. + + Returns + ------- + dict + Dictionary of metadata about the version """ if datastack_name is None: datastack_name = self.datastack_name @@ -421,21 +437,29 @@ def get_timestamp(self, version: int = None, datastack_name: str = None): return convert_timestamp(meta["time_stamp"]) @cached(cache=TTLCache(maxsize=100, ttl=60 * 60 * 12)) - def get_versions_metadata(self, datastack_name=None): - """get the metadata for all the versions that are presently available and valid + def get_versions_metadata(self, datastack_name=None, expired=False): + """Get the metadata for all the versions that are presently available and valid - Args: - datastack_name (str, optional): datastack to query. If None, defaults to the value set in the client. + Parameters + ---------- + datastack_name : str or None, optional + Datastack name, by default None. If None, defaults to the value set in the client. + expired : bool, optional + Whether to include expired versions, by default False. - Returns: - list[dict]: a list of metadata dictionaries + Returns + ------- + list[dict] + List of metadata dictionaries """ + if datastack_name is None: datastack_name = self.datastack_name endpoint_mapping = self.default_url_mapping endpoint_mapping["datastack_name"] = datastack_name url = self._endpoints["versions_metadata"].format_map(endpoint_mapping) - response = self.session.get(url) + query_args = {"expired": expired} + response = self.session.get(url, params=query_args) d = handle_response(response) for md in d: md["time_stamp"] = convert_timestamp(md["time_stamp"]) @@ -452,21 +476,23 @@ def get_table_metadata( ): """Get metadata about a table - Args: - table_name (str): - name of table to mark for deletion - datastack_name: str or None, optional, - Name of the datastack_name. - If None, uses the one specified in the client. - version (int, optional): - version to get. If None, uses the one specified in the client. - log_warning (bool, optional): - whether to print out warnings to the logger. - Defaults to True. - - Returns: - dict: metadata dictionary for table + Parameters + ---------- + table_name : str + name of table to mark for deletion + datastack_name : str or None, optional + Name of the datastack_name. If None, uses the one specified in the client. + version : int, optional + Version to get. If None, uses the one specified in the client. + log_warning : bool, optional + Whether to print out warnings to the logger. Defaults to True. + + Returns + ------- + dict + Metadata dictionary for table """ + if datastack_name is None: datastack_name = self.datastack_name if version is None: @@ -622,55 +648,70 @@ def query_table( get_counts: bool = False, random_sample: int = None, ): - """generic query on materialization tables - - Args: - table: 'str' - - filter_in_dict (dict , optional): - keys are column names, values are allowed entries. - Defaults to None. - filter_out_dict (dict, optional): - keys are column names, values are not allowed entries. - Defaults to None. - filter_equal_dict (dict, optional): - inner layer: keys are column names, values are specified entry. - Defaults to None. - filter_spatial (dict, optional): - inner layer: keys are column names, values are bounding boxes - as [[min_x, min_y,min_z],[max_x, max_y, max_z]] - Expressed in units of the voxel_resolution of this dataset. - filter_regex_dict (dict, optional): - inner layer: keys are column names, values are regex strings - offset (int, optional): offset in query result - limit (int, optional): maximum results to return (server will set upper limit, see get_server_config) - select_columns (list of str, optional): columns to select. Defaults to None. - suffixes: (list[str], optional): suffixes to use on duplicate columns - offset (int, optional): result offset to use. Defaults to None. - will only return top K results. - datastack_name (str, optional): datastack to query. - If None defaults to one specified in client. - return_df (bool, optional): whether to return as a dataframe - default True, if False, data is returned as json (slower) - split_positions (bool, optional): whether to break position columns into x,y,z columns - default False, if False data is returned as one column with [x,y,z] array (slower) - materialization_version (int, optional): version to query. - If None defaults to one specified in client. - timestamp (datetime.datetime, optional): timestamp to query - If passsed will do a live query. Error if also passing a materialization version - metadata: (bool, optional) : toggle to return metadata (default True) - If True (and return_df is also True), return table and query metadata in the df.attr dictionary. - merge_reference: (bool, optional) : toggle to automatically join reference table - If True, metadata will be queries and if its a reference table it will perform a join - on the reference table to return the rows of that - desired_resolution: (Iterable[float], Optional) : desired resolution you want all spatial points returned in - If None, defaults to one specified in client, if that is None then points are returned - as stored in the table and should be in the resolution specified in the table metadata - random_sample: (int, optional) : if given, will do a tablesample of the table to return that many annotations - Returns: - pd.DataFrame: a pandas dataframe of results of query + """Generic query on materialization tables + + Parameters + ---------- + table : str + Table to query + filter_in_dict : dict, optional + Keys are column names, values are allowed entries, by default None + filter_out_dict : dict, optional + Keys are column names, values are not allowed entries, by default None + filter_equal_dict : dict, optional + Keys are column names, values are specified entry, by default None + filter_spatial_dict : dict, optional + Keys are column names, values are bounding boxes expressed in units of the + voxel_resolution of this dataset. Bounding box is [[min_x, min_y,min_z],[max_x, max_y, max_z]], by default None + filter_regex_dict : dict, optional + Keys are column names, values are regex strings, by default None + select_columns : list of str, optional + Columns to select, by default None + offset : int, optional + Result offset to use, by default None. Will only return top K results. + limit : int, optional + Maximum results to return (server will set upper limit, + see get_server_config), by default None + datastack_name : str, optional + Datastack to query, by default None. If None, defaults to one + specified in client. + return_df : bool, optional + Whether to return as a dataframe, by default True. If False, data is + returned as json (slower). + split_positions : bool, optional + Whether to break position columns into x,y,z columns, by default False. + If False data is returned as one column with [x,y,z] array (slower) + materialization_version : int, optional + Version to query, by default None. + If None, defaults to one specified in client. + timestamp : datetime.datetime, optional + Timestamp to query, by default None. If passsed will do a live query. + Error if also passing a materialization version + metadata : bool, optional + Toggle to return metadata (default True), by default True. If True + (and return_df is also True), return table and query metadata in the + df.attr dictionary. + merge_reference : bool, optional + Toggle to automatically join reference table, by default True. If True, + metadata will be queries and if its a reference table it will perform a + join on the reference table to return the rows of that + desired_resolution : Iterable[float], optional + Desired resolution you want all spatial points returned in, by default None. + If None, defaults to one specified in client, if that is None then points + are returned as stored in the table and should be in the resolution + specified in the table metadata + get_counts : bool, optional + Whether to get counts of the query, by default False + random_sample : int, optional + If given, will do a tablesample of the of the table to return that many + annotations + Returns + ------- + pd.DataFrame + A pandas dataframe of results of query """ + if desired_resolution is None: desired_resolution = self.desired_resolution if timestamp is not None: @@ -743,7 +784,6 @@ def query_table( df = deserialize_query_response(response) if desired_resolution is not None: if not response.headers.get("dataframe_resolution", None): - if len(desired_resolution) != 3: raise ValueError( "desired resolution needs to be of length 3, for xyz" @@ -804,58 +844,65 @@ def join_query( desired_resolution: Iterable = None, random_sample: int = None, ): - """generic query on materialization tables - - Args: - tables: list of lists with length 2 or 'str' - list of two lists: first entries are table names, second - entries are the columns used for the join - filter_in_dict (dict of dicts, optional): - outer layer: keys are table names - inner layer: keys are column names, values are allowed entries. - Defaults to None. - filter_out_dict (dict of dicts, optional): - outer layer: keys are table names - inner layer: keys are column names, values are not allowed entries. - Defaults to None. - filter_equal_dict (dict of dicts, optional): - outer layer: keys are table names - inner layer: keys are column names, values are specified entry. - Defaults to None. - filter_spatial (dict of dicts, optional): - outer layer: keys are table names: - inner layer: keys are column names, values are bounding boxes - as [[min_x, min_y,min_z],[max_x, max_y, max_z]] - Expressed in units of the voxel_resolution of this dataset. - Defaults to None - filter_regex_dict (dict of dicts, optional): - outer layer: keys are table names: - inner layer: keys are column names, values are regex strings - Defaults to None - select_columns (dict of lists of str, optional): keys are table names,values are the list of columns from that table. - Defaults to None, which will select all tables. Will be passed to server as select_column_maps. - Passing a list will be passed as select_columns which is deprecated. - offset (int, optional): result offset to use. Defaults to None. - will only return top K results. - limit (int, optional): maximum results to return (server will set upper limit, see get_server_config) - suffixes (dict, optional): suffixes to use for duplicate columns, keys are table names, values are the suffix - datastack_name (str, optional): datastack to query. - If None defaults to one specified in client. - return_df (bool, optional): whether to return as a dataframe - default True, if False, data is returned as json (slower) - split_positions (bool, optional): whether to break position columns into x,y,z columns - default False, if False data is returned as one column with [x,y,z] array (slower) - materialization_version (int, optional): version to query. - If None defaults to one specified in client. - metadata: (bool, optional) : toggle to return metadata - If True (and return_df is also True), return table and query metadata in the df.attr dictionary. - desired_resolution (Iterable, optional): - What resolution to convert position columns to. Defaults to None will use defaults. - random_sample: (int, optional) : if given, will do a tablesample of the table to return that many annotations - Returns: - pd.DataFrame: a pandas dataframe of results of query + """Generic query on materialization tables + Parameters + ---------- + tables : list of lists with length 2 or 'str' + list of two lists: first entries are table names, second entries are the + columns used for the join. + filter_in_dict : dict of dicts, optional + outer layer: keys are table names + inner layer: keys are column names, values are allowed entries, by default None + filter_out_dict : dict of dicts, optional + outer layer: keys are table names + inner layer: keys are column names, values are not allowed entries, by default None + filter_equal_dict : dict of dicts, optional + outer layer: keys are table names + inner layer: keys are column names, values are specified entry, by default None + filter_spatial_dict : dict of dicts, optional + outer layer: keys are table names, inner layer: keys are column names. + Values are bounding boxes as [[min_x, min_y,min_z],[max_x, max_y, max_z]], + expressed in units of the voxel_resolution of this dataset. Defaults to None. + filter_regex_dict : dict of dicts, optional + outer layer: keys are table names. inner layer: keys are column names, + values are regex strings. Defaults to None + select_columns : dict of lists of str, optional + keys are table names,values are the list of columns from that table. + Defaults to None, which will select all tables. Will be passed to server + as select_column_maps. Passing a list will be passed as select_columns + which is deprecated. + offset : int, optional + result offset to use. Defaults to None. Will only return top K results. + limit : int, optional + maximum results to return (server will set upper limit, see get_server_config) + suffixes : dict, optional + suffixes to use for duplicate columns, keys are table names, values are the suffix + datastack_name : str, optional + datastack to query. If None defaults to one specified in client. + return_df : bool, optional + whether to return as a dataframe default True, if False, data is returned + as json (slower) + split_positions : bool, optional + whether to break position columns into x,y,z columns default False, if False + data is returned as one column with [x,y,z] array (slower) + materialization_version : int, optional + version to query. If None defaults to one specified in client. + metadata : bool, optional + toggle to return metadata If True (and return_df is also True), return + table and query metadata in the df.attr dictionary. + desired_resolution : Iterable, optional + What resolution to convert position columns to. Defaults to None will use + defaults. + random_sample : int, optional + if given, will do a tablesample of the table to return that many annotations + + Returns + ------- + pd.DataFrame + a pandas dataframe of results of query """ + if materialization_version is None: materialization_version = self.version if datastack_name is None: @@ -924,16 +971,24 @@ def join_query( return concatenate_position_columns(df, inplace=True) def map_filters(self, filters, timestamp, timestamp_past): - """translate a list of filter dictionaries - from a point in the future, to a point in the past + """Translate a list of filter dictionaries from a point in the + future to a point in the past - Args: - filters (list[dict]): filter dictionaries with - timestamp ([type]): [description] - timestamp_past ([type]): [description] + Parameters + ---------- + filters : list[dict] + filter dictionaries with root_ids + timestamp : datetime.datetime + timestamp to query + timestamp_past : datetime.datetime + timestamp to query from - Returns: - [type]: [description] + Returns + ------- + list[dict] + filter dictionaries with past root_ids + dict + mapping of future root_ids to past root_ids """ timestamp = convert_timestamp(timestamp) timestamp_past = convert_timestamp(timestamp_past) @@ -1077,15 +1132,19 @@ def ingest_annotation_table( table_name: str, datastack_name: str = None, ): - """Trigger supervoxel lookup and rootID looksup of new annotations in a table. - + """Trigger supervoxel lookup and root ID lookup of new annotations in a table. - Args: - table_name (str): table to drigger - datastack_name (str, optional): datastack to trigger it. Defaults to what is set in client. + Parameters + ---------- + table_name : str + Table to trigger + datastack_name : str, optional + Datastack to trigger it. Defaults to what is set in client. - Returns: - response: status code of response from server + Returns + ------- + dict + Status code of response from server """ if datastack_name is None: datastack_name = self.datastack_name @@ -1102,18 +1161,24 @@ def lookup_supervoxel_ids( table_name: str, annotation_ids: list = None, datastack_name: str = None, - ): + ) -> dict: """Trigger supervoxel lookups of new annotations in a table. - Args: - table_name (str): table to drigger - annotation_ids: (list, optional): list of annotation ids to lookup. Default is None, - which will trigger lookup of entire table. - datastack_name (str, optional): datastack to trigger it. Defaults to what is set in client. + Parameters + ---------- + table_name : str + Table to trigger + annotation_ids : list, optional + List of annotation ids to lookup. Default is None, which will trigger + lookup of entire table. + datastack_name : str, optional + Datastack to trigger it. Defaults to what is set in client. - Returns: - response: status code of response from server + Returns + ------- + dict + Status code of response from server """ if datastack_name is None: datastack_name = self.datastack_name @@ -1156,70 +1221,103 @@ def live_live_query( allow_missing_lookups: bool = False, random_sample: int = None, ): - """Beta method for querying cave annotation tables with rootIDs and annotations at a particular - timestamp. Note: this method requires more explicit mapping of filters and selection to table - as its designed to test a more general endpoint that should eventually support complex joins. - - Args: - table (str): principle table to query - timestamp (datetime): timestamp to use for querying - joins (list): a list of joins, where each join is a list of [table1,column1, table2, column2] - filter_in_dict (dict, optional): a dictionary with tables as keys, values are dicts with column keys and list values to accept . Defaults to None. - filter_out_dict (dict, optional): a dictionary with tables as keys, values are dicts with column keys and list values to reject. Defaults to None. - filter_equal_dict (dict, optional): a dictionary with tables as keys, values are dicts with column keys and values to equate. Defaults to None. - filter_spatial_dict (dict, optional): a dictionary with tables as keys, values are dicts with column keys and values of 2x3 list of bounds. Defaults to None. - filter_regex_dict (dict, optional): a dictionary with tables as keys, values are dicts with column keys and values of regex strings. Defaults to None. - select_columns (_type_, optional): a dictionary with tables as keys, values are list of columns. Defaults to None. - offset (int, optional): value to offset query by. Defaults to None. - limit (int, optional): limit of query. Defaults to None. - datastack_name (str, optional): datastack to query. Defaults to set by client. - split_positions (bool, optional): whether to split positions into seperate columns, True is faster. Defaults to False. - metadata (bool, optional): whether to attach metadata to dataframe. Defaults to True. - suffixes (dict, optional): what suffixes to use on joins, keys are table_names, values are suffixes. Defaults to None. - desired_resolution (Iterable, optional): What resolution to convert position columns to. Defaults to None will use defaults. - allow_missing_lookups (bool, optional): If there are annotations without supervoxels and rootids yet, allow results. Defaults to False. - random_sample: (int, optional) : if given, will do a tablesample of the table to return that many annotations - Example: - live_live_query("table_name",datetime.datetime.utcnow(), - joins=[[table_name, table_column, joined_table, joined_column], - [joined_table, joincol2, third_table, joincol_third]] - suffixes={ - "table_name":"suffix1", - "joined_table":"suffix2", - "third_table":"suffix3" - }, - select_columns= { - "table_name":[ "column","names"], - "joined_table":["joined_colum"] - }, - filter_in_dict= { - "table_name":{ - "column_name":[included,values] - } - }, - filter_out_dict= { - "table_name":{ - "column_name":[excluded,values] - } - }, - filter_equal_dict"={ - "table_name":{ - "column_name":value - }, - filter_spatial_dict"= { - "table_name": { - "column_name": [[min_x, min_y, min_z], [max_x, max_y, max_z]] - } - filter_regex_dict"= { - "table_name": { - "column_name": "regex_string" - } - Returns: - pd.DataFrame: result of query + """Beta method for querying cave annotation tables with rootIDs and annotations + at a particular timestamp. Note: this method requires more explicit mapping of + filters and selection to table as its designed to test a more general endpoint + that should eventually support complex joins. + + Parameters + ---------- + table: + Principle table to query + timestamp: + Timestamp to query + joins: list of lists of str, optional + List of joins, where each join is a list of [table1,column1, table2, column2] + filter_in_dict: dict of dicts, optional + A dictionary with tables as keys, values are dicts with column keys and list + values to accept. + filter_out_dict: dict of dicts, optional + A dictionary with tables as keys, values are dicts with column keys and list + values to reject. + filter_equal_dict: dict of dicts, optional + A dictionary with tables as keys, values are dicts with column keys and values + to equate. + filter_spatial_dict: dict of dicts, optional + A dictionary with tables as keys, values are dicts with column keys and values + of 2x3 list of bounds. + filter_regex_dict: dict of dicts, optional + A dictionary with tables as keys, values are dicts with column keys and values + of regex strings. + select_columns: dict of lists of str, optional + A dictionary with tables as keys, values are lists of columns to select. + offset: + Value to offset query by. + limit: + Limit of query. + datastack_name: + Datastack to query. Defaults to set by client. + split_positions: + Whether to split positions into separate columns, True is faster. + metadata: + Whether to attach metadata to dataframe. + suffixes: + What suffixes to use on joins, keys are table_names, values are suffixes. + desired_resolution: + What resolution to convert position columns to. + allow_missing_lookups: + If there are annotations without supervoxels and root IDs yet, allow results. + random_sample: + If given, will do a table sample of the table to return that many annotations. + + Returns + ------- + : + Results of query + + Examples + -------- + >>> from caveclient import CAVEclient + >>> client = CAVEclient('minnie65_public_v117') + >>> live_live_query("table_name", datetime.datetime.now(datetime.timezone.utc), + >>> joins=[[table_name, table_column, joined_table, joined_column], + >>> [joined_table, joincol2, third_table, joincol_third]] + >>> suffixes={ + >>> "table_name":"suffix1", + >>> "joined_table":"suffix2", + >>> "third_table":"suffix3" + >>> }, + >>> select_columns= { + >>> "table_name":[ "column","names"], + >>> "joined_table":["joined_colum"] + >>> }, + >>> filter_in_dict= { + >>> "table_name":{ + >>> "column_name":[included,values] + >>> } + >>> }, + >>> filter_out_dict= { + >>> "table_name":{ + >>> "column_name":[excluded,values] + >>> } + >>> }, + >>> filter_equal_dict"={ + >>> "table_name":{ + >>> "column_name":value + >>> }, + >>> filter_spatial_dict"= { + >>> "table_name": { + >>> "column_name": [[min_x, min_y, min_z], [max_x, max_y, max_z]] + >>> } + >>> filter_regex_dict"= { + >>> "table_name": { + >>> "column_name": "regex_string" + >>> } """ + logging.warning( "Deprecation: this method is to facilitate beta testing of this feature, \ -it will likely get removed in future versions. " + it will likely get removed in future versions. " ) timestamp = convert_timestamp(timestamp) return_df = True @@ -1232,7 +1330,7 @@ def live_live_query( data = {} query_args = {} query_args["return_pyarrow"] = True - query_args['arrow_format'] = True + query_args["arrow_format"] = True query_args["merge_reference"] = False query_args["allow_missing_lookups"] = allow_missing_lookups if random_sample: @@ -1283,7 +1381,6 @@ def live_live_query( warnings.simplefilter(action="ignore", category=DeprecationWarning) df = deserialize_query_response(response) if desired_resolution is not None: - if len(desired_resolution) != 3: raise ValueError( "desired resolution needs to be of length 3, for xyz" @@ -1344,57 +1441,65 @@ def live_query( desired_resolution: Iterable = None, random_sample: int = None, ): - """generic query on materialization tables - - Args: - table: 'str' - timestamp (datetime.datetime): time to materialize (in utc) - pass datetime.datetime.utcnow() for present time - filter_in_dict (dict , optional): - keys are column names, values are allowed entries. - Defaults to None. - filter_out_dict (dict, optional): - keys are column names, values are not allowed entries. - Defaults to None. - filter_equal_dict (dict, optional): - inner layer: keys are column names, values are specified entry. - Defaults to None. - filter_spatial (dict, optional): - inner layer: keys are column names, values are bounding boxes - as [[min_x, min_y,min_z],[max_x, max_y, max_z]] - Expressed in units of the voxel_resolution of this dataset. - Defaults to None - filter_regex_dict (dict, optional): - inner layer: keys are column names, values are regex strings - offset (int, optional): offset in query result - limit (int, optional): maximum results to return (server will set upper limit, see get_server_config) - select_columns (list of str, optional): columns to select. Defaults to None. - suffixes: (list[str], optional): suffixes to use on duplicate columns - offset (int, optional): result offset to use. Defaults to None. - will only return top K results. - datastack_name (str, optional): datastack to query. - If None defaults to one specified in client. - split_positions (bool, optional): whether to break position columns into x,y,z columns - default False, if False data is returned as one column with [x,y,z] array (slower) - post_filter (bool, optional): whether to filter down the result based upon the filters specified - if false, it will return the query with present root_ids in the root_id columns, - but the rows will reflect the filters translated into their past IDs. - So if, for example, a cell had a false merger split off since the last materialization. - those annotations on that incorrect portion of the cell will be included if this is False, - but will be filtered down if this is True. (Default=True) - metadata: (bool, optional) : toggle to return metadata - If True (and return_df is also True), return table and query metadata in the df.attr dictionary. - merge_reference: (bool, optional) : toggle to automatically join reference table - If True, metadata will be queries and if its a reference table it will perform a join - on the reference table to return the rows of that - desired_resolution: (Iterable[float], Optional) : desired resolution you want all spatial points returned in - If None, defaults to one specified in client, if that is None then points are returned - as stored in the table and should be in the resolution specified in the table metadata - random_sample: (int, optional) : if given, will do a tablesample of the table to return that many annotations - Returns: - pd.DataFrame: a pandas dataframe of results of query + """Generic query on materialization tables + + Parameters + ---------- + table : str + Table to query + timestamp : datetime.datetime + Time to materialize (in utc). Pass + datetime.datetime.now(datetime.timezone.utc) for present time. + filter_in_dict : dict, optional + Keys are column names, values are allowed entries. + filter_out_dict : dict, optional + Keys are column names, values are not allowed entries. + filter_equal_dict : dict, optional + Keys are column names, values are specified entry. + filter_spatial_dict : dict, optional + Keys are column names, values are bounding boxes expressed in units of the + voxel_resolution of this dataset. Bounding box is + [[min_x, min_y,min_z],[max_x, max_y, max_z]]. + filter_regex_dict : dict, optional + Keys are column names, values are regex strings. + select_columns : list of str, optional + Columns to select. + offset : int, optional + Offset in query result. + limit : int, optional + Maximum results to return (server will set upper limit, see get_server_config). + datastack_name : str, optional + Datastack to query. If None, defaults to one specified in client. + split_positions : bool, optional + Whether to break position columns into x,y,z columns. If False data is + returned as one column with [x,y,z] array (slower). + post_filter : bool, optional + Whether to filter down the result based upon the filters specified. If False, + it will return the query with present root_ids in the root_id columns, but the + rows will reflect the filters translated into their past IDs. So if, for example, + a cell had a false merger split off since the last materialization, those + annotations on that incorrect portion of the cell will be included if this is + False, but will be filtered down if this is True. + metadata : bool, optional + Toggle to return metadata. If True (and return_df is also True), return table + and query metadata in the df.attr dictionary. + merge_reference : bool, optional + Toggle to automatically join reference table. If True, metadata will be queries + and if its a reference table it will perform a join on the reference table to + return the rows of that table. + desired_resolution : Iterable, optional + Desired resolution you want all spatial points returned in. If None, defaults to + one specified in client, if that is None then points are returned as stored in + the table and should be in the resolution specified in the table metadata. + random_sample : int, optional + If given, will do a tablesample of the table to return that many annotations. + Returns + ------- + pd.DataFrame + A pandas dataframe of results of query """ + timestamp = convert_timestamp(timestamp) return_df = True if self.cg_client is None: @@ -1524,7 +1629,6 @@ def live_query( df = deserialize_query_response(response) if desired_resolution is not None: if not response.headers.get("dataframe_resolution", None): - if len(desired_resolution) != 3: raise ValueError( "desired resolution needs to be of length 3, for xyz" @@ -1584,7 +1688,7 @@ def synapse_query( self, pre_ids: Union[int, Iterable, np.ndarray] = None, post_ids: Union[int, Iterable, np.ndarray] = None, - bounding_box=None, + bounding_box: Optional[Union[list, np.ndarray]] = None, bounding_box_column: str = "post_pt_position", timestamp: datetime = None, remove_autapses: bool = True, @@ -1597,36 +1701,53 @@ def synapse_query( synapse_table: str = None, datastack_name: str = None, metadata: bool = True, - ): - """Convience method for quering synapses. Will use the synapse table specified in the info service by default. - It will also remove autapses by default. NOTE: This is not designed to allow querying of the entire synapse table. - A query with no filters will return only a limited number of rows (configured by the server) and will do so in a non-deterministic fashion. - Please contact your dataset administrator if you want access to the entire table. - - Args: - pre_ids (Union[int, Iterable, optional): pre_synaptic cell(s) to query. Defaults to None. - post_ids (Union[int, Iterable, optional): post synaptic cell(s) to query. Defaults to None. - timestamp (datetime.datetime, optional): timestamp to query (optional). - If passed recalculate query at timestamp, do not pass with materialization_verison - bounding_box: [[min_x, min_y, min_z],[max_x, max_y, max_z]] bounding box to filter - synapse locations. Expressed in units of the voxel_resolution of this dataset (optional) - bounding_box_column (str, optional): which synapse location column to filter by (Default to "post_pt_position") - remove_autapses (bool, optional): post-hoc filter out synapses. Defaults to True. - include_zeros (bool, optional): whether to include synapses to/from id=0 (out of segmentation). Defaults to True. - limit (int, optional): number of synapses to limit, Defaults to None (server side limit applies) - offset (int, optional): number of synapses to offset query, Defaults to None (no offset). - split_positions (bool, optional): whether to return positions as seperate x,y,z columns (faster) - defaults to False - desired_resolution : Iterable[float] or None, optional - If given, should be a list or array of the desired resolution you want queries returned in - useful for materialization queries. - synapse_table (str, optional): synapse table to query. If None, defaults to self.synapse_table. - datastack_name: (str, optional): datastack to query - materialization_version (int, optional): version to query. - defaults to self.materialization_version if not specified - metadata: (bool, optional) : toggle to return metadata - If True (and return_df is also True), return table and query metadata in the df.attr dictionary. + ) -> pd.DataFrame: + """Convenience method for querying synapses. + + Will use the synapse table + specified in the info service by default. It will also remove autapses by + default. NOTE: This is not designed to allow querying of the entire synapse table. + A query with no filters will return only a limited number of rows (configured + by the server) and will do so in a non-deterministic fashion. Please contact + your dataset administrator if you want access to the entire table. + + Parameters + ---------- + pre_ids: + Pre-synaptic cell(s) to query. + post_ids: + Post-synaptic cell(s) to query. + bounding_box: + [[min_x, min_y, min_z],[max_x, max_y, max_z]] bounding box to filter + synapse locations. Expressed in units of the voxel_resolution of this dataset. + bounding_box_column: + Which synapse location column to filter by. + timestamp: + Timestamp to query. If passed recalculate query at timestamp, do not pass + with materialization_version. + remove_autapses: + Whether to remove autapses from query results. + include_zeros: + Whether to include synapses to/from id=0 (out of segmentation). + limit: + Number of synapses to limit. Server-side limit still applies. + offset: + Number of synapses to offset query. + split_positions: + Whether to split positions into separate columns, True is faster. + desired_resolution: + List or array of the desired resolution you want queries returned in + useful for materialization queries. + materialization_version: + Version to query. If passed, do not pass timestamp. Defaults to + `self.materialization_version` if not specified. + metadata: + Whether to attach metadata to dataframe in the df.attr dictionary. + Returns + ------- + : + Results of query. """ filter_in_dict = {} filter_equal_dict = {} @@ -1767,21 +1888,22 @@ def get_tables_metadata( datastack_name=None, version: int = None, log_warning: bool = True, - ): - """Get metadata about a table + ) -> dict: + """Get metadata about tables - Args: - datastack_name: str or None, optional, - Name of the datastack_name. - If None, uses the one specified in the client. - version (int, optional): - version to get. If None, uses the one specified in the client. - log_warning (bool, optional): - whether to print out warnings to the logger. - Defaults to True. - - Returns: - dict: metadata dictionary for table + Parameters + ---------- + datastack_name : str or None, optional + Name of the datastack_name. If None, uses the one specified in the client. + version : + Version to get. If None, uses the one specified in the client. + log_warning : + Whether to print out warnings to the logger. Defaults to True. + + Returns + ------- + : + Metadata dictionary for table """ if datastack_name is None: datastack_name = self.datastack_name @@ -1822,69 +1944,105 @@ def live_live_query( desired_resolution: Iterable = None, allow_missing_lookups: bool = False, allow_invalid_root_ids: bool = False, + random_sample: int = None, ): - """Beta method for querying cave annotation tables with rootIDs and annotations at a particular - timestamp. Note: this method requires more explicit mapping of filters and selection to table - as its designed to test a more general endpoint that should eventually support complex joins. - - Args: - table (str): principle table to query - timestamp (datetime): timestamp to use for querying - joins (list): a list of joins, where each join is a list of [table1,column1, table2, column2] - filter_in_dict (dict, optional): a dictionary with tables as keys, values are dicts with column keys and list values to accept . Defaults to None. - filter_out_dict (dict, optional): a dictionary with tables as keys, values are dicts with column keys and list values to reject. Defaults to None. - filter_equal_dict (dict, optional): a dictionary with tables as keys, values are dicts with column keys and values to equate. Defaults to None. - filter_spatial_dict (dict, optional): a dictionary with tables as keys, values are dicts with column keys and values of 2x3 list of bounds. Defaults to None. - filter_regex_dict (dict, optional): a dictionary with tables as keys, values are dicts with column keys and values of regex strings. Defaults to None. - select_columns (_type_, optional): a dictionary with tables as keys, values are list of columns. Defaults to None. - offset (int, optional): value to offset query by. Defaults to None. - limit (int, optional): limit of query. Defaults to None. - datastack_name (str, optional): datastack to query. Defaults to set by client. - split_positions (bool, optional): whether to split positions into seperate columns, True is faster. Defaults to False. - metadata (bool, optional): whether to attach metadata to dataframe. Defaults to True. - suffixes (dict, optional): what suffixes to use on joins, keys are table_names, values are suffixes. Defaults to None. - desired_resolution (Iterable, optional): What resolution to convert position columns to. Defaults to None will use defaults. - allow_missing_lookups (bool, optional): If there are annotations without supervoxels and rootids yet, allow results. Defaults to False. - allow_invalid_root_ids (bool, optional): If True, ignore root ids not valid at the given timestamp, otherwise raise an Error. Defaults to False. - Example: - live_live_query("table_name",datetime.datetime.utcnow(), - joins=[[table_name, table_column, joined_table, joined_column], - [joined_table, joincol2, third_table, joincol_third]] - suffixes={ - "table_name":"suffix1", - "joined_table":"suffix2", - "third_table":"suffix3" - }, - select_columns= { - "table_name":[ "column","names"], - "joined_table":["joined_colum"] - }, - filter_in_dict= { - "table_name":{ - "column_name":[included,values] - } - }, - filter_out_dict= { - "table_name":{ - "column_name":[excluded,values] - } - }, - filter_equal_dict"={ - "table_name":{ - "column_name":value - }, - filter_spatial_dict"= { - "table_name": { - "column_name": [[min_x, min_y, min_z], [max_x, max_y, max_z]] - } - filter_regex_dict"= { - "table_name": { - "column_name": "regex" - } - } - Returns: - pd.DataFrame: result of query + """Beta method for querying cave annotation tables with root IDs and annotations + at a particular timestamp. Note: this method requires more explicit mapping of + filters and selection to table as its designed to test a more general endpoint + that should eventually support complex joins. + + Parameters + ---------- + table: + Principle table to query + timestamp: + Timestamp to query + joins: list of lists of str, optional + List of joins, where each join is a list of [table1,column1, table2, column2] + filter_in_dict: dict of dicts, optional + A dictionary with tables as keys, values are dicts with column keys and list + values to accept. + filter_out_dict: dict of dicts, optional + A dictionary with tables as keys, values are dicts with column keys and list + values to reject. + filter_equal_dict: dict of dicts, optional + A dictionary with tables as keys, values are dicts with column keys and values + to equate. + filter_spatial_dict: dict of dicts, optional + A dictionary with tables as keys, values are dicts with column keys and values + of 2x3 list of bounds. + filter_regex_dict: dict of dicts, optional + A dictionary with tables as keys, values are dicts with column keys and values + of regex strings. + select_columns: dict of lists of str, optional + A dictionary with tables as keys, values are lists of columns to select. + offset: + Value to offset query by. + limit: + Limit of query. + datastack_name: + Datastack to query. Defaults to set by client. + split_positions: + Whether to split positions into separate columns, True is faster. + metadata: + Whether to attach metadata to dataframe. + suffixes: + What suffixes to use on joins, keys are table_names, values are suffixes. + desired_resolution: + What resolution to convert position columns to. + allow_missing_lookups: + If there are annotations without supervoxels and root IDs yet, allow results. + allow_invalid_root_ids: + If True, ignore root ids not valid at the given timestamp, otherwise raise + an error. + random_sample: + If given, will do a table sample of the table to return that many annotations. + + Returns + ------- + : + Results of query + + Examples + -------- + >>> from caveclient import CAVEclient + >>> client = CAVEclient('minnie65_public_v117') + >>> live_live_query("table_name", datetime.datetime.now(datetime.timezone.utc), + >>> joins=[[table_name, table_column, joined_table, joined_column], + >>> [joined_table, joincol2, third_table, joincol_third]] + >>> suffixes={ + >>> "table_name":"suffix1", + >>> "joined_table":"suffix2", + >>> "third_table":"suffix3" + >>> }, + >>> select_columns= { + >>> "table_name":[ "column","names"], + >>> "joined_table":["joined_colum"] + >>> }, + >>> filter_in_dict= { + >>> "table_name":{ + >>> "column_name":[included,values] + >>> } + >>> }, + >>> filter_out_dict= { + >>> "table_name":{ + >>> "column_name":[excluded,values] + >>> } + >>> }, + >>> filter_equal_dict"={ + >>> "table_name":{ + >>> "column_name":value + >>> }, + >>> filter_spatial_dict"= { + >>> "table_name": { + >>> "column_name": [[min_x, min_y, min_z], [max_x, max_y, max_z]] + >>> } + >>> filter_regex_dict"= { + >>> "table_name": { + >>> "column_name": "regex_string" + >>> } """ + logging.warning( "Deprecation: this method is to facilitate beta testing of this feature, \ it will likely get removed in future versions. " @@ -1899,10 +2057,12 @@ def live_live_query( data = {} query_args = {} query_args["return_pyarrow"] = True - query_args['arrow_format'] = True + query_args["arrow_format"] = True query_args["merge_reference"] = False query_args["allow_missing_lookups"] = allow_missing_lookups query_args["allow_invalid_root_ids"] = allow_invalid_root_ids + if random_sample: + query_args["random_sample"] = random_sample data["table"] = table data["timestamp"] = timestamp @@ -1954,7 +2114,6 @@ def live_live_query( df = deserialize_query_response(response) if desired_resolution is not None: if not response.headers.get("dataframe_resolution", None): - if len(desired_resolution) != 3: raise ValueError( "desired resolution needs to be of length 3, for xyz" @@ -2000,16 +2159,20 @@ def live_live_query( return df def get_views(self, version: int = None, datastack_name: str = None): - """get all available views for a version + """ + Get all available views for a version - Args: - version (int, optional): version to query. - Defaults to None. (will version set by client) - datastack_name (str, optional): datastack to query. Defaults to None. - (will use datastack set by client) + Parameters + ---------- + version : + Version to query. If None, uses the one specified in the client. + datastack_name : + Datastack to query. If None, uses the one specified in the client. - Returns: - list[str]: a list of views + Returns + ------- + list + List of views """ if datastack_name is None: datastack_name = self.datastack_name @@ -2030,16 +2193,21 @@ def get_view_metadata( datastack_name: str = None, log_warning: bool = True, ): - """get metadata for a view + """Get metadata for a view - Args: - view_name (str): name of view to query - materialization_version (int, optional): version to query. - Defaults to None. (will use version set by client) - log_warning (bool, optional): whether to log warnings. Defaults to True. - Returns: - dict: metadata of view + Parameters + ---------- + view_name : + Name of view to query. + materialization_version : + Version to query. If None, will use version set by client. + log_warning : + Whether to log warnings. + Returns + ------- + dict + Metadata of view """ if datastack_name is None: datastack_name = self.datastack_name @@ -2063,15 +2231,21 @@ def get_view_schema( datastack_name: str = None, log_warning: bool = True, ): - """get schema for a view - - Args: - view_name (str): name of view to query - materialization_version (int, optional): version to query. - Defaults to None. (will version set by client) - log_warning (bool, optional): whether to log warnings. Defaults to True. - Returns: - dict: schema of view + """Get schema for a view + + Parameters + ---------- + view_name: + Name of view to query. + materialization_version: + Version to query. If None, will use version set by client. + log_warning: + Whether to log warnings. + + Returns + ------- + dict + Schema of view. """ if datastack_name is None: datastack_name = self.datastack_name @@ -2094,14 +2268,19 @@ def get_view_schemas( datastack_name: str = None, log_warning: bool = True, ): - """get schemas for all views - - Args: - materialization_version (int, optional): version to query. - Defaults to None. (will version set by client) - log_warning (bool, optional): whether to log warnings. Defaults to True. - Returns: - dict: schemas of all views + """Get schema for a view + + Parameters + ---------- + materialization_version: + Version to query. If None, will use version set by client. + log_warning: + Whether to log warnings. + + Returns + ------- + dict + Schema of view. """ if datastack_name is None: datastack_name = self.datastack_name @@ -2138,51 +2317,65 @@ def query_view( get_counts: bool = False, random_sample: int = None, ): - """generic query on a view - - Args: - table: 'str' - - filter_in_dict (dict , optional): - keys are column names, values are allowed entries. - Defaults to None. - filter_out_dict (dict, optional): - keys are column names, values are not allowed entries. - Defaults to None. - filter_equal_dict (dict, optional): - inner layer: keys are column names, values are specified entry. - Defaults to None. - filter_spatial (dict, optional): - inner layer: keys are column names, values are bounding boxes - as [[min_x, min_y,min_z],[max_x, max_y, max_z]] - Expressed in units of the voxel_resolution of this dataset. - filter_regex_dict (dict, optional): - inner layer: keys are column names, values are regex strings. - offset (int, optional): offset in query result - limit (int, optional): maximum results to return (server will set upper limit, see get_server_config) - select_columns (list of str, optional): columns to select. Defaults to None. - suffixes: (list[str], optional): suffixes to use on duplicate columns - offset (int, optional): result offset to use. Defaults to None. - will only return top K results. - datastack_name (str, optional): datastack to query. - If None defaults to one specified in client. - return_df (bool, optional): whether to return as a dataframe - default True, if False, data is returned as json (slower) - split_positions (bool, optional): whether to break position columns into x,y,z columns - default False, if False data is returned as one column with [x,y,z] array (slower) - materialization_version (int, optional): version to query. - If None defaults to one specified in client. - metadata: (bool, optional) : toggle to return metadata (default True) - If True (and return_df is also True), return table and query metadata in the df.attr dictionary. - merge_reference: (bool, optional) : toggle to automatically join reference table - If True, metadata will be queries and if its a reference table it will perform a join - on the reference table to return the rows of that - desired_resolution: (Iterable[float], Optional) : desired resolution you want all spatial points returned in - If None, defaults to one specified in client, if that is None then points are returned - as stored in the table and should be in the resolution specified in the table metadata - random_sample: (int, optional) : if given, will do a tablesample of the table to return that many annotations - Returns: - pd.DataFrame: a pandas dataframe of results of query + """Generic query on a view + + Parameters + ---------- + view_name : str + View to query + filter_in_dict : dict, optional + Keys are column names, values are allowed entries, by default None + filter_out_dict : dict, optional + Keys are column names, values are not allowed entries, by default None + filter_equal_dict : dict, optional + Keys are column names, values are specified entry, by default None + filter_spatial_dict : dict, optional + Keys are column names, values are bounding boxes expressed in units of the + voxel_resolution of this dataset. Bounding box is [[min_x, min_y,min_z],[max_x, max_y, max_z]], by default None + filter_regex_dict : dict, optional + Keys are column names, values are regex strings, by default None + select_columns : list of str, optional + Columns to select, by default None + offset : int, optional + Result offset to use, by default None. Will only return top K results. + limit : int, optional + Maximum results to return (server will set upper limit, + see get_server_config), by default None + datastack_name : str, optional + Datastack to query, by default None. If None, defaults to one + specified in client. + return_df : bool, optional + Whether to return as a dataframe, by default True. If False, data is + returned as json (slower). + split_positions : bool, optional + Whether to break position columns into x,y,z columns, by default False. + If False data is returned as one column with [x,y,z] array (slower) + materialization_version : int, optional + Version to query, by default None. + If None, defaults to one specified in client. + metadata : bool, optional + Toggle to return metadata (default True), by default True. If True + (and return_df is also True), return table and query metadata in the + df.attr dictionary. + merge_reference : bool, optional + Toggle to automatically join reference table, by default True. If True, + metadata will be queries and if its a reference table it will perform a + join on the reference table to return the rows of that + desired_resolution : Iterable[float], optional + Desired resolution you want all spatial points returned in, by default None. + If None, defaults to one specified in client, if that is None then points + are returned as stored in the table and should be in the resolution + specified in the table metadata + get_counts : bool, optional + Whether to get counts of the query, by default False + random_sample : int, optional + If given, will do a tablesample of the of the table to return that many + annotations + + Returns + ------- + pd.DataFrame + A pandas dataframe of results of query """ if desired_resolution is None: @@ -2259,6 +2452,33 @@ def query_view( else: return response.json() + def get_unique_string_values(self, table: str, datastack_name: str = None): + """Get unique string values for a table + + Parameters + ---------- + table : + Table to query + datastack_name : + Datastack to query. If None, uses the one specified in the client. + + Returns + ------- + dict[str] + A dictionary of column names and their unique values + """ + if datastack_name is None: + datastack_name = self.datastack_name + + endpoint_mapping = self.default_url_mapping + endpoint_mapping["datastack_name"] = datastack_name + endpoint_mapping["table_name"] = table + + url = self._endpoints["unique_string_values"].format_map(endpoint_mapping) + response = self.session.get(url, verify=self.verify) + self.raise_for_status(response) + return response.json() + client_mapping = { 2: MaterializatonClientV2, diff --git a/caveclient/tools/stage.py b/caveclient/tools/stage.py index b4286d7c..d4b5d9c1 100644 --- a/caveclient/tools/stage.py +++ b/caveclient/tools/stage.py @@ -119,11 +119,11 @@ def add_dataframe(self, df): if len(missing_cols) > 0 or len(additional_cols) > 0: if len(missing_cols) == 0: raise ValueError( - f"Schema needs columns not in dataframe: {additional_cols}." + f"Dataframe has columns that are not in schema: {additional_cols}." ) if len(additional_cols) == 0: raise ValueError( - f"Dataframe has columns that do not match fields: {missing_cols}." + f"Schema needs columns not in dataframe: {missing_cols}." ) raise ValueError( f"Schema needs columns not in dataframe: {missing_cols} and dataframe has columns that do not match fields: {additional_cols}." diff --git a/changelog.md b/changelog.md deleted file mode 100644 index cffa932d..00000000 --- a/changelog.md +++ /dev/null @@ -1,58 +0,0 @@ -# Changelog - -5.1.0 ----- -* added get_oldest_timestamp call to chunkedgraph - -5.0.1 ------ -* Fixed bug with desired_resolution being set at the client level -was being ignored in >5.0.0 - -5.0.0 ------ -* Added support for the new CAVE Materialization 3.0 API - Includes support for the new materialization API, which allows for - server side conversion of the units of position, and ensures that - all positions are returned with the same units, even after joins. -* Added support for querying databases that were materialized without merging - tables together. This will allow for faster materializations. -* Removed support for LiveLive query from the Materialization 2.0 API client. - Note.. <5.0.0 clients interacting with MaterializationEngine >4.7.0 servers will - use live live query but will doubly convert the units of position if you ask - for a desired resolution, as the old client will also do a conversion server side. -* Fixed interaction with api version querying of servers from individual - clients to work with verify=False. (useful for testing) -* Stored infromation from client about mapping between dataframe and table names - and original column names. -* Added support for suffixes and select columns to be passed by dictionary rather than list - making the selection an application of suffixes more explicit when there are collisions - between column names in joined tables. - -Upgrade Notes -~~~~~~~~~~~~~ -Change all select_column calls to pass dictionaries rather than lists. -Change all suffix calls to pass dictionaries rather than lists. -Advocate for your server administrator to upgrade to MaterializationEngine 4.7.0 or later, -so you can use the new MaterializationEngine 3.0 API and client. - - - -### Added -- **JSONStateService**: Neuroglancer URL can be specified for the client under the property `ngl_url`. -For a FrameworkClient with a datastack name, the value is set using the `viewer_site` field from the info client. - -### Changed - -- **JSONStateService**: In `build_neuroglancer_url`, if `ngl_url` is None the url will be pulled from the default client value. -If there is the default value is None, only the URL to the JSON file will be returned. - -## [2.0.1] - 2020-10-20 - -### Fixed -- **AuthClient** : Token creation and setting is more robust. Directories are created if not previously present. - -## [2.0.0] - -### Added -- First release of the unified FrameworkClient and system-wide authentication. \ No newline at end of file diff --git a/doc_requirements.txt b/doc_requirements.txt new file mode 100644 index 00000000..02586e5b --- /dev/null +++ b/doc_requirements.txt @@ -0,0 +1,14 @@ +mkdocs +mkdocs-autorefs +mkdocs-git-authors-plugin +mkdocs-git-committers-plugin-2 +mkdocs-git-revision-date-localized-plugin +mkdocs-include-markdown-plugin +mkdocs-jupyter +mkdocs-macros-plugin +mkdocs-material +mkdocs-material-extensions +mkdocs-section-index +mkdocstrings[crystal,python] +mknotebooks +pymdown-extensions \ No newline at end of file diff --git a/docs/Makefile b/docs/Makefile deleted file mode 100644 index d4bb2cbb..00000000 --- a/docs/Makefile +++ /dev/null @@ -1,20 +0,0 @@ -# Minimal makefile for Sphinx documentation -# - -# You can set these variables from the command line, and also -# from the environment for the first two. -SPHINXOPTS ?= -SPHINXBUILD ?= sphinx-build -SOURCEDIR = . -BUILDDIR = _build - -# Put it first so that "make" without argument is like "make help". -help: - @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) - -.PHONY: help Makefile - -# Catch-all target: route all unknown targets to Sphinx using the new -# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). -%: Makefile - @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/_static/css/custom.css b/docs/_static/css/custom.css deleted file mode 100644 index e8d4863a..00000000 --- a/docs/_static/css/custom.css +++ /dev/null @@ -1,4 +0,0 @@ -.wy-table-responsive table td, -.wy-table-responsive table th { - white-space: inherit; -} \ No newline at end of file diff --git a/docs/api/caveclient.rst b/docs/api/caveclient.rst deleted file mode 100644 index d995e00d..00000000 --- a/docs/api/caveclient.rst +++ /dev/null @@ -1,133 +0,0 @@ -caveclient package -================== - -Subpackages ------------ - -.. toctree:: - :maxdepth: 4 - - caveclient.tools - -Submodules ----------- - -caveclient.annotationengine module ----------------------------------- - -.. automodule:: caveclient.annotationengine - :members: - :undoc-members: - :show-inheritance: - -caveclient.auth module ----------------------- - -.. automodule:: caveclient.auth - :members: - :undoc-members: - :show-inheritance: - -caveclient.base module ----------------------- - -.. automodule:: caveclient.base - :members: - :undoc-members: - :show-inheritance: - -caveclient.chunkedgraph module ------------------------------- - -.. automodule:: caveclient.chunkedgraph - :members: - :undoc-members: - :show-inheritance: - -caveclient.emannotationschemas module -------------------------------------- - -.. automodule:: caveclient.emannotationschemas - :members: - :undoc-members: - :show-inheritance: - -caveclient.endpoints module ---------------------------- - -.. automodule:: caveclient.endpoints - :members: - :undoc-members: - :show-inheritance: - -caveclient.format\_utils module -------------------------------- - -.. automodule:: caveclient.format_utils - :members: - :undoc-members: - :show-inheritance: - -caveclient.frameworkclient module ---------------------------------- - -.. automodule:: caveclient.frameworkclient - :members: - :undoc-members: - :show-inheritance: - -caveclient.infoservice module ------------------------------ - -.. automodule:: caveclient.infoservice - :members: - :undoc-members: - :show-inheritance: - -caveclient.jsonservice module ------------------------------ - -.. automodule:: caveclient.jsonservice - :members: - :undoc-members: - :show-inheritance: - -caveclient.l2cache module -------------------------- - -.. automodule:: caveclient.l2cache - :members: - :undoc-members: - :show-inheritance: - -caveclient.materializationengine module ---------------------------------------- - -.. automodule:: caveclient.materializationengine - :members: - :undoc-members: - :show-inheritance: - -caveclient.session\_config module ---------------------------------- - -.. automodule:: caveclient.session_config - :members: - :undoc-members: - :show-inheritance: - -caveclient.timeit module ------------------------- - -.. automodule:: caveclient.timeit - :members: - :undoc-members: - :show-inheritance: - -Module contents ---------------- - -.. automodule:: caveclient - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/api/caveclient.tools.rst b/docs/api/caveclient.tools.rst deleted file mode 100644 index 53dbc0e8..00000000 --- a/docs/api/caveclient.tools.rst +++ /dev/null @@ -1,21 +0,0 @@ -caveclient.tools package -======================== - -Submodules ----------- - -caveclient.tools.stage module ------------------------------ - -.. automodule:: caveclient.tools.stage - :members: - :undoc-members: - :show-inheritance: - -Module contents ---------------- - -.. automodule:: caveclient.tools - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/api/modules.rst b/docs/api/modules.rst deleted file mode 100644 index f4e8ce48..00000000 --- a/docs/api/modules.rst +++ /dev/null @@ -1,7 +0,0 @@ -caveclient -========== - -.. toctree:: - :maxdepth: 4 - - caveclient diff --git a/docs/changelog.md b/docs/changelog.md new file mode 100644 index 00000000..5a68d24a --- /dev/null +++ b/docs/changelog.md @@ -0,0 +1,41 @@ +--- +title: Changelog +--- + +## 5.1.0 + +- Added get_oldest_timestamp call to chunkedgraph + +## 5.0.1 + +- Fixed bug with desired_resolution being set at the client level + was being ignored in >5.0.0 + +## 5.0.0 + +- Added support for the new CAVE Materialization 3.0 API + Includes support for the new materialization API, which allows for + server side conversion of the units of position, and ensures that + all positions are returned with the same units, even after joins. +- Added support for querying databases that were materialized without merging + tables together. This will allow for faster materializations. +- Removed support for LiveLive query from the Materialization 2.0 API client. + Note.. <5.0.0 clients interacting with MaterializationEngine >4.7.0 servers will + use live live query but will doubly convert the units of position if you ask + for a desired resolution, as the old client will also do a conversion server side. +- Fixed interaction with api version querying of servers from individual + clients to work with verify=False. (useful for testing) +- Stored infromation from client about mapping between dataframe and table names + and original column names. +- Added support for suffixes and select columns to be passed by dictionary rather than list + making the selection an application of suffixes more explicit when there are collisions + between column names in joined tables. + +--- + +## Older Upgrade Notes + +Change all select_column calls to pass dictionaries rather than lists. +Change all suffix calls to pass dictionaries rather than lists. +Advocate for your server administrator to upgrade to MaterializationEngine 4.7.0 or later, +so you can use the new MaterializationEngine 3.0 API and client. diff --git a/docs/client_api/annotation.md b/docs/client_api/annotation.md new file mode 100644 index 00000000..0744d38f --- /dev/null +++ b/docs/client_api/annotation.md @@ -0,0 +1,16 @@ +--- +title: client.annotation +--- + +!!! note + The functionality described here will be accurate if the datastack you are using is using the most up-to-date version of the annotation service. If something seems wrong with the documentation here, try checking the version of the annotation engine returned by your client: + ```python + type(client.annotation) + ``` + Extended documentation for all versions of the annotation client can be found + [here](../extended_api/annotation.md). + +::: caveclient.annotationengine.AnnotationClientV2 + options: + heading_level: 2 + show_bases: false diff --git a/docs/client_api/auth.md b/docs/client_api/auth.md new file mode 100644 index 00000000..9a0c80c6 --- /dev/null +++ b/docs/client_api/auth.md @@ -0,0 +1,10 @@ +--- +title: client.auth +--- + +::: caveclient.auth.AuthClient + options: + heading_level: 2 + show_bases: false + filters: ["!__init__"] + merge_init_into_class: false diff --git a/docs/client_api/chunkedgraph.md b/docs/client_api/chunkedgraph.md new file mode 100644 index 00000000..a4494244 --- /dev/null +++ b/docs/client_api/chunkedgraph.md @@ -0,0 +1,20 @@ +--- +title: client.chunkedgraph +--- + +Note: the functionality described here will be accurate if the datastack you are using +is using the most up-to-date version of the chunkedgraph engine. If something seems +wrong with the documentation here, try checking the version of the chunkedgraph +engine returned by your client: + +```python +type(client.chunkedgraph) +``` + +Extended documentation for all versions of the chunkedgraph client can be found +[here](../extended_api/chunkedgraph.md). + +::: caveclient.chunkedgraph.ChunkedGraphClientV1 + options: + heading_level: 2 + show_bases: false diff --git a/docs/client_api/index.md b/docs/client_api/index.md new file mode 100644 index 00000000..9c6b02bc --- /dev/null +++ b/docs/client_api/index.md @@ -0,0 +1,10 @@ +--- +title: Overview +--- + +The most common method of interacting with the CAVE Framework is by instantiating a +client (`caveclient.CAVEclient`) and then using that client to interact with various +services. Under the hood, the CAVEclient is a collection of individual clients, which +can be accessed via properties. For example, to access the materialization client, +you can use `client.materialize`, which (up to the exact version) will actually return a +[caveclient.materializationengine.MaterializatonClientV3][] object. diff --git a/docs/client_api/l2cache.md b/docs/client_api/l2cache.md new file mode 100644 index 00000000..b99bd658 --- /dev/null +++ b/docs/client_api/l2cache.md @@ -0,0 +1,19 @@ +--- +title: client.l2cache +--- + +Note: the functionality described here will be accurate if the datastack you are using +is using the most up-to-date version of the level-2 cache. If something seems +wrong with the documentation here, try checking the version of the level-2 cache returned by your client: + +```python +type(client.l2cache) +``` + +Extended documentation for all versions of the level-2 cache client can be found +[here](../extended_api/l2cache.md). + +::: caveclient.l2cache.L2CacheClientLegacy + options: + heading_level: 2 + show_bases: false diff --git a/docs/client_api/materialize.md b/docs/client_api/materialize.md new file mode 100644 index 00000000..0eab42df --- /dev/null +++ b/docs/client_api/materialize.md @@ -0,0 +1,20 @@ +--- +title: client.materialize +--- + +Note: the functionality described here will be accurate if the datastack you are using +is using the most up-to-date version of the materialization engine. If something seems +wrong with the documentation here, try checking the version of the materialization +engine returned by your client: + +```python +type(client.materialize) +``` + +Extended documentation for all versions of the materialization client can be found +[here](../extended_api/materialization.md). + +::: caveclient.materializationengine.MaterializatonClientV3 + options: + heading_level: 2 + show_bases: false diff --git a/docs/client_api/state.md b/docs/client_api/state.md new file mode 100644 index 00000000..018ba39c --- /dev/null +++ b/docs/client_api/state.md @@ -0,0 +1,20 @@ +--- +title: client.state +--- + +Note: the functionality described here will be accurate if the datastack you are using +is using the most up-to-date version of the state engine. If something seems +wrong with the documentation here, try checking the version of the state +engine returned by your client: + +```python +type(client.state) +``` + +Extended documentation for all versions of the state client can be found +[here](../extended_api/state.md). + +::: caveclient.jsonservice.JSONServiceV1 + options: + heading_level: 2 + show_bases: false diff --git a/docs/conf.py b/docs/conf.py deleted file mode 100644 index 6a011476..00000000 --- a/docs/conf.py +++ /dev/null @@ -1,71 +0,0 @@ -# Configuration file for the Sphinx documentation builder. -# -# This file only contains a selection of the most common options. For a full -# list see the documentation: -# https://www.sphinx-doc.org/en/master/usage/configuration.html - -# -- Path setup -------------------------------------------------------------- - -# If extensions (or modules to document with autodoc) are in another directory, -# add these directories to sys.path here. If the directory is relative to the -# documentation root, use os.path.abspath to make it absolute, like shown here. -# -import sphinx_rtd_theme -import os -import sys - -sys.path.insert(0, os.path.abspath("..")) - -# -- Project information ----------------------------------------------------- - -project = "CAVEclient" -copyright = "2020, Casey Schneider-Mizell, Forrest Collman, Sven Dorkenwald" -author = "Casey Schneider-Mizell, Forrest Collman, Sven Dorkenwald" - -# The full version, including alpha/beta/rc tags -release = "1.0" - - -# -- General configuration --------------------------------------------------- - -# Add any Sphinx extension module names here, as strings. They can be -# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom -# ones. -extensions = [ - "sphinx.ext.autodoc", - "sphinx.ext.coverage", - "sphinx.ext.napoleon", - "sphinx.ext.viewcode", - "sphinx_rtd_theme", -] - -# Add any paths that contain templates here, relative to this directory. -templates_path = ["_templates"] - -# List of patterns, relative to source directory, that match files and -# directories to ignore when looking for source files. -# This pattern also affects html_static_path and html_extra_path. -exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] - -# These folders are copied to the documentation's HTML output -html_static_path = ["_static"] - -# These paths are either relative to html_static_path -# or fully qualified paths (eg. https://...) -html_css_files = [ - "css/custom.css", -] - -master_doc = "index" - -# -- Options for HTML output ------------------------------------------------- - -# The theme to use for HTML and HTML Help pages. See the documentation for -# a list of builtin themes. -# -html_theme = "sphinx_rtd_theme" - -# Add any paths that contain custom static files (such as style sheets) here, -# relative to this directory. They are copied after the builtin static files, -# so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ["_static"] diff --git a/docs/contributing.md b/docs/contributing.md new file mode 100644 index 00000000..fb82c161 --- /dev/null +++ b/docs/contributing.md @@ -0,0 +1,112 @@ +--- +title: Contributing +--- + +Contributions are welcome, and they are greatly appreciated! Every little bit +helps, and credit will always be given. + +You can contribute in many ways: + +## Types of Contributions + +### Report Bugs + +Report bugs to our [issues page](https://github.com/{{ config.repo_name }}/issues). + +If you are reporting a bug, please include: + +- Your operating system name and version. +- Any details about your local setup that might be helpful in troubleshooting. +- Detailed steps to reproduce the bug, in the form of a [minimal reproducible example](https://stackoverflow.com/help/minimal-reproducible-example). + +### Fix Bugs + +Look through the GitHub issues for bugs. Anything tagged with "bug" and "help +wanted" is open to whoever wants to implement it. + +### Implement Features + +Look through the GitHub issues for features. Anything tagged with "enhancement" +and "help wanted" is open to whoever wants to implement it. + +### Write Documentation + +networkframe could always use more documentation, whether as part of the +official networkframe docs, in docstrings, or even on the web in blog posts, +articles, and such. + +### Submit Feedback + +The best way to send feedback is to [create an issue](https://github.com/{{ config.repo_name }}/issues/new) on GitHub. + +If you are proposing a feature: + +- Explain in detail how it would work. +- Keep the scope as narrow as possible, to make it easier to implement. +- Remember that while contributions are welcome, developer/maintainer time is limited. + +## Get Started + +Ready to contribute? Here's how to set up `{{ names.package }}` for local development. + +- [Fork](https://github.com/{{ config.repo_name }}/fork) the repo on GitHub. +- Clone your fork locally + +```console +git clone git@github.com:your_name_here/{{ names.repo_title }}.git +``` + +- Ensure [pip](https://pip.pypa.io/en/stable/installation/) is installed. +- Create a virtual environment using pipenv: +- Start your virtualenv: + + ```console + source .venv/bin/activate + ``` + +- Create a branch for local development: + + ```console + git checkout -b name-of-your-bugfix-or-feature + ``` + +- Make your changes locally +- When you're done making changes, check that your changes pass the + tests by running [pytest](https://docs.pytest.org/en/): + + ```console + pytest tests + ``` + + Note that once you submit your pull request, GitHub Actions will run the tests also, + including on multiple operating systems and Python versions. Your pull request will + have to pass on all of these before it can be merged. + +- Commit your changes and push your branch to GitHub: + + ```console + git add . + git commit -m "Your detailed description of your changes." + git push origin name-of-your-bugfix-or-feature + ``` + +- [Submit a pull request](https://github.com/{{ config.repo_name }}/compare) through the GitHub website. + +## Pull Request Guidelines + +Before you submit a pull request, check that it meets these guidelines: + +- The pull request should include tests if adding a new feature. +- The docs should be updated with whatever changes you have made. Put + your new functionality into a function with a docstring, and make sure the new + functionality is documented after building the documentation. + +## Documentation style + +We use [mkdocs](https://www.mkdocs.org/) to build the documentation. In particular, we +use the [mkdocs-material](https://squidfunk.github.io/mkdocs-material/) theme, and a +variety of other extensions. + +!!! note + More information codifying our documentation style and principles coming soon. For + now, just try to follow the style of the existing documentation. diff --git a/docs/extended_api/annotationengine.md b/docs/extended_api/annotationengine.md new file mode 100644 index 00000000..dceb18ea --- /dev/null +++ b/docs/extended_api/annotationengine.md @@ -0,0 +1,5 @@ +--- +title: annotationengine +--- + +::: caveclient.annotationengine \ No newline at end of file diff --git a/docs/extended_api/auth.md b/docs/extended_api/auth.md new file mode 100644 index 00000000..e96123b8 --- /dev/null +++ b/docs/extended_api/auth.md @@ -0,0 +1,5 @@ +--- +title: auth +--- + +::: caveclient.auth diff --git a/docs/extended_api/base.md b/docs/extended_api/base.md new file mode 100644 index 00000000..423c5104 --- /dev/null +++ b/docs/extended_api/base.md @@ -0,0 +1,5 @@ +--- +title: base +--- + +::: caveclient.base \ No newline at end of file diff --git a/docs/extended_api/chunkedgraph.md b/docs/extended_api/chunkedgraph.md new file mode 100644 index 00000000..15575a44 --- /dev/null +++ b/docs/extended_api/chunkedgraph.md @@ -0,0 +1,5 @@ +--- +title: chunkedgraph +--- + +::: caveclient.chunkedgraph \ No newline at end of file diff --git a/docs/extended_api/datastack_lookup.md b/docs/extended_api/datastack_lookup.md new file mode 100644 index 00000000..552017b6 --- /dev/null +++ b/docs/extended_api/datastack_lookup.md @@ -0,0 +1,5 @@ +--- +title: datastack_lookup +--- + +::: caveclient.datastack_lookup diff --git a/docs/extended_api/emannotationschemas.md b/docs/extended_api/emannotationschemas.md new file mode 100644 index 00000000..6efbd3a5 --- /dev/null +++ b/docs/extended_api/emannotationschemas.md @@ -0,0 +1,5 @@ +--- +title: emannotationschemas +--- + +::: caveclient.emannotationschemas diff --git a/docs/extended_api/framework.md b/docs/extended_api/framework.md new file mode 100644 index 00000000..4ff8e756 --- /dev/null +++ b/docs/extended_api/framework.md @@ -0,0 +1,5 @@ +--- +title: framework +--- + +::: caveclient.frameworkclient diff --git a/docs/extended_api/index.md b/docs/extended_api/index.md new file mode 100644 index 00000000..760497b0 --- /dev/null +++ b/docs/extended_api/index.md @@ -0,0 +1,7 @@ +--- +title: Extended API documentation +--- + +This page contains extended documentation for the entire CAVEclient API. This includes +versions of the API that may not be the latest version, but may still be in use by your +particular datastack. \ No newline at end of file diff --git a/docs/extended_api/infoservice.md b/docs/extended_api/infoservice.md new file mode 100644 index 00000000..69532e4a --- /dev/null +++ b/docs/extended_api/infoservice.md @@ -0,0 +1,5 @@ +--- +title: infoservice +--- + +::: caveclient.infoservice \ No newline at end of file diff --git a/docs/extended_api/jsonservice.md b/docs/extended_api/jsonservice.md new file mode 100644 index 00000000..bdf77780 --- /dev/null +++ b/docs/extended_api/jsonservice.md @@ -0,0 +1,5 @@ +--- +title: jsonservice +--- + +::: caveclient.jsonservice \ No newline at end of file diff --git a/docs/extended_api/l2cache.md b/docs/extended_api/l2cache.md new file mode 100644 index 00000000..294a22db --- /dev/null +++ b/docs/extended_api/l2cache.md @@ -0,0 +1,5 @@ +--- +title: l2cache +--- + +::: caveclient.l2cache diff --git a/docs/extended_api/materialization.md b/docs/extended_api/materialization.md new file mode 100644 index 00000000..03fc6926 --- /dev/null +++ b/docs/extended_api/materialization.md @@ -0,0 +1,5 @@ +--- +title: materializationengine +--- + +::: caveclient.materializationengine \ No newline at end of file diff --git a/docs/extended_api/session_config.md b/docs/extended_api/session_config.md new file mode 100644 index 00000000..61df510b --- /dev/null +++ b/docs/extended_api/session_config.md @@ -0,0 +1,5 @@ +--- +title: session_config +--- + +::: caveclient.session_config diff --git a/docs/glossary.md b/docs/glossary.md new file mode 100644 index 00000000..e7c80eac --- /dev/null +++ b/docs/glossary.md @@ -0,0 +1,15 @@ +--- +title: Glossary +--- + +!!! warning + This glossary is a work in progress; for now we are documenting the commonly used + terms that we need to define. Please feel free to contribute definitions or + additional terms. + +- Datastack +- Voxel resolution +- Segmentation resolution +- MIP +- Segmentation +- View diff --git a/docs/guide/annotation.rst b/docs/guide/annotation.rst deleted file mode 100644 index c073c098..00000000 --- a/docs/guide/annotation.rst +++ /dev/null @@ -1,180 +0,0 @@ -AnnotationEngine -================ - -The AnnotationClient is used to interact with the AnnotationEngine -service to create tables from existing schema, upload new data, and -download existing annotations. Note that annotations in the -AnnotationEngine are not linked to any particular segmentation, and thus -do not include any root ids. An annotation client is accessed with -``client.annotation``. - -Getting existing tables -^^^^^^^^^^^^^^^^^^^^^^^ - -A list of the existing tables for the datastack can be found at with -:func:`~caveclient.annotationengine.AnnotationClientV2.get_tables`. - -.. code:: python - - all_tables = client.annotation.get_tables() - all_tables[0] - -Each table has three main properties that can be useful to know: - -* ``table_name`` : The table name, used to refer to it when uploading or downloading annotations. This is also passed through to the table in the Materialized database. -* ``schema_name`` : The name of the table’s schema from EMAnnotationSchemas (see below). -* ``max_annotation_id`` : An upper limit on the number of annotations already contained in the table. - -Downloading annotations -^^^^^^^^^^^^^^^^^^^^^^^ - -You can download the JSON representation of a data point through the -:func:`~caveclient.annotationengine.AnnotationClientV2.get_annotation` method. This can be useful if you need to look up -information on unmaterialized data, or to see what a properly templated -annotation looks like. - -.. code:: python - - table_name = all_tables[0]['table_name'] # 'ais_analysis_soma' - annotation_id = 100 - client.annotation.get_annotation(annotation_ids=annotation_id, table_name=table_name) - -Create a new table -^^^^^^^^^^^^^^^^^^ - -One can create a new table with a specified schema with the -:func:`~caveclient.annotationengine.AnnotationClientV2.create_table` method: - -.. code:: python - - client.annotation.create_table(table_name='test_table', - schema_name='microns_func_coreg', - voxel_resolution = [1,1,1], - description="some text to describe your table") -The voxel resolution is the units your position columns will be uploaded in. -[1,1,1] would imply a nm location, where as [4,4,40] would correspond to voxels of that size. -If you are uploading points from a neuroglancer session, you want this to match the units of that neuroglancer view. - -Note there are some optional metadata parameters to :func:`~caveclient.annotationengine.AnnotationClientV2.create_table`. - -* ``notice_text`` : This is text that will show up to users who access this data as a warning. This could be used to warn users that the data is not complete or checked yet, or to advertise that a particular publication should be cited when using this table. -* ``read_permission`` : one of "PRIVATE" which means only you can read data in this table. "PUBLIC" (default) which means anyone can read this table that has read permissions to this dataset. So if and only if you can read the segmentation results of this data, you can read this table. "GROUP" which means that you must share a common group with this user for them to be able to read. We need to make a way to discover what groups you are in and who you share groups with. -* ``write_permission``: one of "PRIVATE" (default), which means only you can write to this table. "PUBLIC" which means anyone can write to this table that has write permissions to this dataset. Note although this means anyone can add data, no annotations are ever truly overwritten. "GROUP" which means that you must share a common group with this user for them to be able to write. We need to make a way to discover what groups you are in and who you share groups with. - -If you change your mind about what you want for metadata, some but not all fields can be updated with :func:`~caveclient.annotationengine.AnnotationClientV2.update_metadata`. This includes the description, the notice_text, and the permissions, but not the name, schema or voxel resolution. - -.. code:: python - - # to update description - client.annotation.update_metadata(table_name='test_table', - description="a new description for my table") - - # to make your table readable by anybody who can read this dataset - client.annotation.update_metadata(table_name='test_table', - notice_text="This table isn't done yet, don't trust it. Contact me") - - # to make your table readable by anybody who can read this dataset - client.annotation.update_metadata(table_name='test_table', - read_permisison="PUBLIC") - -New data can be generated as a dict or list of dicts following the -schema and uploaded with ``post_annotation``. For example, a -``microns_func_coreg`` point needs to have: \* ``type`` set to -``microns_func_coreg`` \* ``pt`` set to a dict with ``position`` as a -key and the xyz location as a value. \* ``func_id`` set to an integer. - -The following could would create a new annotation and then upload it to the service. Note that you get back the annotation id(s) of what you uploaded. - -.. code:: python - - new_data = {'type': 'microns_func_coreg', - 'pt': {'position': [1,2,3]}, - 'func_id': 0} - client.annotation.post_annotation(table_name='test_table', data=[new_data]) - -There are methods to simplify annotation uploads if you have a pandas dataframe -whose structure mirrors the struction of the annotation schema you want to upload - -.. code:: python - - import pandas as pd - - df = pd.DataFrame([{'id':0, - 'type': 'microns_func_coreg', - 'pt_position': [1,2,3]}, - 'func_id': 0}, - {'id':1, - 'type': 'microns_func_coreg', - 'pt_position': [3,2,1]}, - 'func_id': 2}]) - client.annotation.post_annotation_df('test_table', df) - -Note that here I specified the IDs of my annotations, which you can do, -but then its up to you to assure that the IDs don't collide with other IDs. -If you leave them blank then the service will assign the IDs for you. - -There is a similar method for updating -:func:`~caveclient.annotationengine.AnnotationClientV2.update_annotation_df` - -Staged Annotations -^^^^^^^^^^^^^^^^^^ - -Staged anotations help ensure that the annotations you post follow the appropriate schema, both by providing guides to the field names and locally validating against a schema before uploading. -The most common use case for staged annotations is to create a StagedAnnotation object for a given table, then add annotations to it individually or as a group, and finally upload to the annotation table. - -To get a StagedAnnotation object, you can start with either a table name or a schema name. Here, we'll assume that there's already a table called "my_table" that is running a "cell_type_local" schema. -If we want to add new annotations to the table, we simply use the table name with :func:`~caveclient.annotationengine.AnnotationClientV2.stage_annotations`. - -.. code:: python - - stage = client.annotation.stage_annotations("my_table") - -This ``stage`` object retrieves the schema for the table and hosts a local collection of annotations. Every time you add an annotation, it is immediately validated against the schema. To add an annotation, use the ``add`` method: - -.. code:: python - - stage.add( - cell_type = "pyramidal_cell", - classification_system="excitatory", - pt_position=[100,100,10], - ) - -The argument names derive from fields in the schema and you must provide all required fields. Any number of annotations can be added to the stage. -A dataframe of annotations can also be added with ``stage.add_dataframe``, and requires an exact match between column names and schema fields. -The key difference between this and posting a dataframe directly is that annotations added to a StagedAnnotations are validated locally, allowing any issues to be caught before uploading. - -You can see the annotations as a list of dictionary records with ``stage.annotation_list`` or as a Pandas dataframe with ``stage.annotation_dataframe``. -Finally, if you initialized the stage with a table name, this information is stored in the ``stage`` and you can simply upload it from the client. - -.. code:: python - - client.annotation.upload_staged_annotations(stage) - -Updating annotations requires knowing the annotation id of the annotation you are updating, which is not required in the schema otherwise. In order to stage updated annotations, set the ``update`` parameter to ``True`` when creating the stage. - -.. code:: python - - update_stage = client.annotation.stage_annotations("my_table", update=True) - update_stage.add( - id=1, - cell_type = "stellate_cell", - classification_system="excitatory", - pt_position=[100,100,10], - ) - -The ``update`` also informs the framework client to treat the annotations as an update and it will use the appropriate methods automatically when uploading ``client.annotation.upload_staged_annotations``. - -If you want to specify ids when posting new annotations, ``id_field`` can be set to True when creating the StagedAnnotation object. This will enforce an ``id`` column but still post the data as new annotations. - -If you might be adding spatial data in coordinates that might be different than the resolution for the table, you can also set the ``annotation_resolution`` when creating the stage. -The stage will convert between the resolution you specify for your own annotations and the resolution that the table expects. - -.. code:: python - - stage = client.annotation.stage_annotations("my_table", annotation_resolution=[8,8,40]) - stage.add( - cell_type='pyramidal_cell', - classification_system="excitatory", - pt_position=[50,50,10], - ) - diff --git a/docs/guide/authentication.rst b/docs/guide/authentication.rst deleted file mode 100644 index acbdc562..00000000 --- a/docs/guide/authentication.rst +++ /dev/null @@ -1,73 +0,0 @@ -Authentication Service -====================== - -Authentication tokens are generally needed for programmatic access to -our services. The AuthClient handles storing and -loading your token or tokens and inserting it into requests in other -clients. - -We can access the auth client from ``client.auth``. Once you have saved -a token, you probably won’t interact with this client very often, -however it has some convenient features for saving new tokens the first -time. Let’s see if you have a token already. Probably not. - -.. code:: python - - client = CAVEclient() - auth = client.auth - print(f"My current token is: {auth.token}") - -.. _new-token: - -Getting a new token -^^^^^^^^^^^^^^^^^^^ - -To get a new token, you will need to manually aquire it. For convenience, -the function ``get_new_token()`` provides instructions for how to get and -save the token. - -By default, the token is saved to -``~/.cloudvolume/secrets/cave-secret.json`` as a string under -the key ``token``. This makes it compatible by default with -`Cloudvolume `_ projects, which -can come in handy. The following steps will save a token to the default -location. - -.. code:: python - - auth.get_new_token() - -.. code:: python - - new_token = 'abcdef1234567890' #This is the text you see after you visit the website. - auth.save_token(token=new_token) - print(f"My token is now: {auth.token}") - -Note that requesting a new token will invalidate your previous token on the -same project. If you want to use the same token across different computers, -you will need to share the same token information. - -Loading saved tokens -^^^^^^^^^^^^^^^^^^^^ - -Try opening ``~/.cloudvolume/secrets/cave-secret.json`` to see -what we just created. - -If we had wanted to use a different file or a different json key, we -could have specified that in auth.save_token. - -Because we used the default values, this token is used automatically -when we intialize a new CAVEclient. If we wanted to use a different -token file, token key, or even directly specify a token we could do so -here. - -.. code:: python - - client = CAVEclient(datastack_name) - print(f"Now my basic token is: {client.auth.token}") - - client_direct = CAVEclient(datastack_name, auth_token='another_fake_token_678') - print(f"A directly specified token is: {client_direct.auth.token}") - -If you use a CAVEclient, the AuthClient and its token will be -automatically applied to any other services without further use. \ No newline at end of file diff --git a/docs/guide/chunkedgraph.rst b/docs/guide/chunkedgraph.rst deleted file mode 100644 index 50e93625..00000000 --- a/docs/guide/chunkedgraph.rst +++ /dev/null @@ -1,93 +0,0 @@ -ChunkedGraph -============ -The chunkedgraph is a dynamic oct-tree connected components supervoxel graph. - -.. figure:: images/2880px-Octree2.svg.png - :width: 600px - -A visual representation of an oct-tree (`Wikipedia (WhiteTimberwolf) `_ `CC BY-SA 3.0 `_) - -As with any oct-tree, it is organized in hierarchical levels, with the bottom level 1 -corresponding to the supervoxels of the segmentations, and the top level being the unique -connected components of the supervoxel graph. - -.. figure:: images/PCG_oct_tree.png - :width: 700px - -A figure illustrating the spatial chunking and editing of a the chunkedgraph. From `Dorkenwald et. al 2021 `_ - -The ChunkedGraph client allows one to interact with the ChunkedGraph service, -which stores and updates the supervoxel agglomeration graph. This is -most often useful for looking up an object root id of a supervoxel or -looking up supervoxels belonging to a root id. The ChunkedGraph client -is at ``client.chunkedgraph``. - -Look up a supervoxel -^^^^^^^^^^^^^^^^^^^^ - -Usually in Neuroglancer, one never notices supervoxel ids, but they are -important for programmatic work. In order to look up the root id for a -location in space, one needs to use the supervoxel segmentation to get -the associated supervoxel id. The ChunkedGraph client makes this easy -using the :func:`~caveclient.chunkedgraph.ChunkedGraphClientV1.get_root_id` method. - -.. code:: python - - sv_id = 104200755619042523 - client.chunkedgraph.get_root_id(supervoxel_id=sv_id) - -However, as proofreading occurs, the root id that a supervoxel belongs -to can change. By default, this function returns the current state, -however one can also provide a UTC timestamp to get the root id at a -particular moment in history. This can be useful for reproducible -analysis. Note below that the root id for the same supervoxel is -different than it is now. - -.. code:: python - - import datetime - - # I looked up the UTC POSIX timestamp from a day in early 2019. - timestamp = datetime.datetime.utcfromtimestamp(1546595253) - - sv_id = 104200755619042523 - client.chunkedgraph.get_root_id(supervoxel_id=sv_id, timestamp=timestamp) - -If you are doing this across lots of supervoxels (or any nodes) -then you can do it more efficently in one request with -:func:`~caveclient.chunkedgraph.ChunkedGraphClientV1.get_roots` - -.. code:: python - - node_ids = [104200755619042523, 104200755619042524,104200755619042525] - root_ids = client.chunkedgraph.get_roots(node_ids) - -Getting supervoxels for a root id -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -A root id is associated with a particular agglomeration of supervoxels, -which can be found with the :func:`~caveclient.chunkedgraph.ChunkedGraphClientV1.get_leaves` method. A new root id is -generated for every new change in the chunkedgraph, so time stamps do -not apply. - -.. code:: python - - root_id = 648518346349541252 - client.chunkedgraph.get_leaves(root_id) - -You can also query the chunkedgraph not all the way to the bottom, using the stop_layer -option - -.. code:: python - - root_id = 648518346349541252 - client.chunkedgraph.get_leaves(root_id,stop_layer=2) - -This will get all the level 2 IDs for this root, which correspond to the lowest chunk of the heirachy. -An analogous option exists for :func:`~caveclient.chunkedgraph.ChunkedGraphClientV1.get_roots`. -This is useful to help find nodes to query within the :doc:`l2cache`, amongst other things. - -Other functions -^^^^^^^^^^^^^^^ - -There are a variety of other interesting functions to explore in the :class:`~caveclient.chunkedgraph.ChunkedGraphClientV1` diff --git a/docs/guide/framework.rst b/docs/guide/framework.rst deleted file mode 100644 index 60f15999..00000000 --- a/docs/guide/framework.rst +++ /dev/null @@ -1,85 +0,0 @@ -CAVEclient: One client for all services -============================================= - -The CAVE Framework consists of a number of different -services, each with a specific set of tasks that it can perform through -REST endpoints. -The CAVEclient is designed to ease programmatic interaction -with all of the various endpoints. -In addition, most programmatic access requires the use of authentication tokens. -In order to collect a given server, datastack name, and user token together into a coherent package that can be used -on multiple endpoints, the CAVEclient builds -appropriately configured clients for each of the specific services. -Each of the individual services has their own specific documentation as well. - -Global and Local Services -~~~~~~~~~~~~~~~~~~~~~~~~~ - -There are two categories of data in CAVE: Global and local. -Local services are associated with a single so-called "datastack", which refers to a precise collection of imagery and segmentation data that function together. -For example, EM imagery and a specific pychunkedgraph segmentation would be one datastack, while the same EM imagery but an initial static segmentation would be another. -Datastacks are refered to by a short name, for instance ``pinky100_public_flat_v185``. - -Global services are those that are potentially shared across multiple different specific datastacks. -These include the info service, which can describe the properties of all available datastacks, -the authentication service, and the state service that hosts neuroglancer states. -Global services are associated with a particular URL (by default ``http://globalv1.daf-apis.com``), -but not a single datastack. - -Initializing a CAVEclient -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Assuming that the services are on ``http://globalv1.daf-apis.com`` -and authentication tokens are either not being used or set up with -default values (see :doc:`authentication`), a simple CAVEclient that can -only access global services can be initialized: - -.. code:: python - - from caveclient import CAVEclient - - client = CAVEclient() - -Just to confirm that this works, let’s see if we can get the EM image -source from the InfoService. -If you get a list of names of datastacks, all is good. If you have not yet set up an -authentication token or you get an authentication error, look at :ref:`new-token` -for information about how to set up your auth token. - -.. code:: python - - client.info.get_datastacks() - -If you have a specific datastack you want to use, you can inititialize your CAVEclient with it. -This gives you access to the full range of client functions. - -.. code:: python - - client = CAVEclient(datastack_name='my_datastack') - -Using Other Server Addresses -^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -If your data is hosted by a different global server, you specify its address when initializing the client. - -.. code:: python - - client = CAVEclient(datastack_name='my_datastack', server_address='http://global.myserver.com') - -By default, if you pass both a server address and a datastack, the client will store the mapping from datastack to server address -in the same location as the default for authentication tokens. -Once stored, the client will automatically use the correct server address for the datastack if none is provided. -You can override storing the server address by passing ``write_server_address=False``. -Datastacks can be removed from the cache using `caveclient.datastack_lookup.reset_server_address_cache(datastack_name)`. - - -Accessing specific clients -~~~~~~~~~~~~~~~~~~~~~~~~~~ -Each client can be acccessed as a property of the main client. See the documentation at left for the capabilities of each. Assuming your client is named ``client``, the subclients for each service are: - -* Authentication Service : ``client.auth`` -* AnnotationEngine : ``client.annotation`` -* PyChunkedGraph : ``client.chunkedgraph`` -* InfoService : ``client.info`` -* EM Annotation Schemas : ``client.schemas`` -* JSON Neuroglancer State Service : ``client.state`` diff --git a/docs/guide/info.rst b/docs/guide/info.rst deleted file mode 100644 index d73eced6..00000000 --- a/docs/guide/info.rst +++ /dev/null @@ -1,49 +0,0 @@ - -Info Service -============ - -A datastack has a number of complex paths to various data sources that -together comprise a datastack. Rather than hardcode these paths, the -InfoService allows one to query the location of each data source. This -is also convenient in case data sources change. - -An InfoClient is accessed at ``client.info``. - -.. code:: python - - client = CAVEclient(datastack_name) - print(f"This is an info client for {client.info.datastack_name} on {client.info.server_address}") - -Accessing datastack information -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -All of the information accessible for the datastack can be seen as a dict -using ``get_datastack_info()``. - -.. code:: python - - info.get_datastack_info() - -Individual entries can be found as well. Use tab autocomplete to see the -various possibilities. - -.. code:: python - - info.graphene_source() - -Adjusting formatting -^^^^^^^^^^^^^^^^^^^^ - -Because of the way neuroglancer looks up data versus cloudvolume, -sometimes one needs to convert between ``gs://`` style paths to -``https://storage.googleapis.com/`` stype paths. All of the path sources -in the info client accept a ``format_for`` argument that can handle -this, and correctly adapts to graphene vs precomputed data sources. - -.. code:: python - - neuroglancer_style_source = info.image_source(format_for='neuroglancer') - print(f"With gs-style: { neuroglancer_style_source }") - - cloudvolume_style_source = info.image_source(format_for='cloudvolume') - print(f"With https-style: { cloudvolume_style_source }") diff --git a/docs/guide/intro.rst b/docs/guide/intro.rst deleted file mode 100644 index f74556a3..00000000 --- a/docs/guide/intro.rst +++ /dev/null @@ -1,26 +0,0 @@ -Getting Started -=============== - -AnnotationFramework client is a package for simplifying interactions with HTML services associated with the CAVE (Connectome Annotation Versioning Engine), which includes: - -- `pychunkedgraph `_ (For tracking dynamic segmentations) -- `NeuroglancerJsonServer `_ (For posting/getting neuroglancer json states) -- `AnnotationFrameworkInfoService `_ (For storing datastack metadata information) -- `EmAnnotationSchemas `_ (For storing an extensible set of schemas for annotating EM data) -- `AnnotatationEngine `_ (For storing annotations on EM data) - -Installation -~~~~~~~~~~~~ - -The CAVEclient can be installed with pip: - -.. code-block:: bash - - $ pip install caveclient - -Assumptions -~~~~~~~~~~~ - -The code is setup to work flexibly with any deployment of these services, but you need to specify the server_address if that address is not -https://globalv1.daf-apis.com/ for each client when initializing it. -Similarly, the clients can query the info service for metadata to simplify the interaction with a datastack, but you have to specify a datastack name. diff --git a/docs/guide/l2cache.rst b/docs/guide/l2cache.rst deleted file mode 100644 index 4d6342d6..00000000 --- a/docs/guide/l2cache.rst +++ /dev/null @@ -1,78 +0,0 @@ -Level 2 Cache -============= -To understand the level 2 cache, you must understand the structure of the chunkedgraph so see :doc:`chunkedgraph`. - -Nodes on the second level or layer of the graph, corresponds to all the supervoxels that are locally connected to one another within a single level 2 spatial "chunk" of the data. -The Level 2 Cache, is a service whose job it is to track and update relevant statistics about every level 2 node within the a chunkedgraph. The source code of this service can be found `here `_. - -Finding Level 2 Nodes -^^^^^^^^^^^^^^^^^^^^^ -The chunkedgraph can be used to find the level2 nodes of a rootID using a ``stop_layer=2`` keyword argument on the :func:`~caveclient.chunkedgraph.ChunkedGraphClientV1.get_leaves`. Conversely the level 2 node of a supervoxel can be found using the same keyword argument of :func:`~caveclient.chunkedgraph.ChunkedGraphClientV1.get_roots`. Note if you don't specify a timestamp it will give you the level2 node that is presently associated with the object. - -Statistics -^^^^^^^^^^ -The statistics that are available are - -.. list-table:: Level 2 Stats - :widths: 25 50 - :header-rows: 1 - - * - Statistic - - Description - * - area_nm2 - - The surface area of the object in square nanometers. - Does not include border touching voxels - * - size_nm3 - - The volume of the object in cubic nanometers, based on counting voxels in the object. - * - max_dt_nm - - The maximum edge distance transform of that object in nanometers. Meant to capture the maximum 'thickness' of the voxels in the node. - * - mean_dt_nm - - The average edge distance transform of that object in nanometers. Meant to capture the average 'thickness' of voxels in that node. - * - rep_coord_nm - - A list of x,y,z coordinates in nanometers that represent a point within the object that is designed to be close to the 'center' of the object. This is the location of the max_dt_nm value. - * - chunk_intersect_count - - A 2 x 3 matrix representing the 6 sides of the chunk, and whose values represent how many voxels border that side of the chunk. Meant to help understand significant the borders with other chunks are. Ordering is the [[x_bottom, y_bottom, z_bottom],[x_top, y_top, z_top]] where {xyz}_bottom refers to the face which has the smallest values for that dimension, and {xyz}_top refers to the face which has the largest. - * - pca - - A 3x3 matrix representing the principal components of the xyz point cloud of voxels for this object. Ordering is NxD where N is the components and D are the xyz dimensions. Meant to help desribe the orientation of the level 2 chunk. Note that this is not calculated for very small objects and so might not be present for all level 2 nodes. You will see that its availability correlates strongly with size_nm3. - * - pca_val - - The 3 principal component values for the PCA components. - -Retrieving Level 2 Statistics -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Level 2 stats about nodes can be retreived using the :func:`~caveclient.l2cache.L2CacheClientLegacy.get_l2data` method. It simply takes a list of level 2 nodes you want to retrieve. Optionally you can specify only the attributes that you are interested in retrieving which will speed up the request. - -Missing Data -^^^^^^^^^^^^ -The service is constantly watching for changes made to objects and recalculating stats on new level2 nodes that are created, in order to keep its database of statistics current. This however takes some time, and is subject to sporadic rare failures. If you request stats on a level 2 node which are not in the database, you will receive an empty dictionary for that node. This will immediately trigger the system to recalculate the statistics of that missing data, and so it should be available shortly (on the order of seconds) if systems are operational. Please note that PCA is not calculated for very small objects because it is not meaningful. So if you are interested in differentiating whether PCA is not available because it hasn't been calculated, vs when its not available because it is not possible to calculate, you should ask for at least one other non PCA statistic as well. You will see that its availability correlates strongly with size_nm3. - -Use Cases -^^^^^^^^^ - -Calculate Total Area and Volume of Cells -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Say you want to calculate the total surface area and volume of a object in the dataset. -The areas and volume of each component can simply be added together to do this. - -.. code:: python - - import pandas as pd - root_id = 648518346349541252 - lvl2nodes = client.chunkedgraph.get_leaves(root_id,stop_layer=2) - l2stats=client.l2cache.get_l2data(lvl2nodes, attributes=['size_nm3','area_nm2']) - l2df = pd.DataFrame(l2stats).T - total_area_um2=l2df.area_nm2.sum()/(1000*1000) - total_volume_um3 = l2df.size_nm3.sum()/(1000*1000*1000) - -By utilizing the bounds argument of get_leaves, you can also do simple spatially restricted analysis of objects. In fact, because you have data on each level2 node individually, you can segregate the neuron using any labelling of its topology. - -Skeletonization -~~~~~~~~~~~~~~~ -Level 2 nodes have 'cross chunk' edges within the chunkedgraph which represent what level 2 nodes that object is locally connected to. This forms a graph between the level 2 nodes of the object that can be retreived using the chunkedgraph function :func:`~caveclient.chunkedgraph.ChunkedGraphClientV1.level2_chunk_graph`. This graph represents a topological representation of the neuron at the resolution of individual chunks, and is gaurunteed to be fully connected, unlike a voxel or mesh representation of the neuron which can have gaps where there are defects in the segmentation volume or incorrectly inferred edges at self contact locations. - -The level 2 graph can be turned into a skeleton representation of a neuron using a graph based TEASAR like algorithm as described for skeletonizing meshes in this `MeshParty Documentation `_. There is an implementation of this approach that utilizes the chunkedgraph and the L2cache if available `here `_ and on pypi as ``pcg-skel``. In this implementation the l2cache is used to more accurately place the level 2 nodes in space using the ``rep_coord_nm`` value. - -Trajectory Distributions -~~~~~~~~~~~~~~~~~~~~~~~~~ -If one is interested in the bulk direction of processes in a region of the brain, -one can start with supervoxels in a region, find level 2 nodes that correspond to them, filter out components based on size, (or other criteria such as whether they are part of objects that have components in some other brain area) and look at the distribution of PCA components to understand the directions that those processes are moving within that region of space. diff --git a/docs/guide/materialization.rst b/docs/guide/materialization.rst deleted file mode 100644 index 92a85177..00000000 --- a/docs/guide/materialization.rst +++ /dev/null @@ -1,333 +0,0 @@ -Materialization -================ - -The Materialization client allows one to interact with the materialized -annotation tables, that were posted to the annotation service (see -:doc:`annotation`). - -To see the entire class visit the API doc :class:`~caveclient.materializationengine.MaterializatonClientV2` - -The service regularly looks up all annotations and the segids underneath -all the boundspatialpoints. You can then query these tables to find out -the IDs that underlie the annotations, or the annotations that now intersect -with certain IDs. - -For example, one common pattern is that you have idenfied a cell based on -the location of its cell body, and you have an annotation there. - -You want to know what are the inputs onto the cell, so you first query the -annotation table with your soma annotation, asking for the current ID underneath -that soma. Then you query a synapse table for all synapse annotations that -have a post-synaptic ID equal to the ID from your soma annotation. - -In this way your code stays the same, as the proofreading changes and you can -track the connectivity of your cell over time. - -Initializing the client -^^^^^^^^^^^^^^^^^^^^^^^ -By default when you initialize the overall client, it will choose the most recent -materialization version available. This may or may not be desirable depending on your -use case. If your code involves using specific IDs then you should be using a -specific version that is tied to a timepoint where those IDs are valid. - -To see what versions are available, use the :func:`~caveclient.materializationengine.MaterializatonClientV2.get_versions` - -.. code:: python - - client.materialize.get_versions() - -Each version has a timestamp it was run on as well as a date when it will expire. -You can query all this metadata for a specific version using -:func:`~caveclient.materializationengine.MaterializatonClientV2.get_version_metadata` -or all versions using -:func:`~caveclient.materializationengine.MaterializatonClientV2.get_versions_metadata` - - -To change the default version, alter the .version property of the materialization client. - -.. code:: python - - client.materialize.version = 9 - -or specify the version when making a particular call. - -Browsing versions -^^^^^^^^^^^^^^^^^ -To see what tables are available in a version you can use -:func:`~caveclient.materializationengine.MaterializatonClientV2.get_tables` - -If you want to read about the description of what that table is, use the annotationengine client -:func:`~caveclient.annotationengine.AnnotationClientV2.get_table_metadata` - -If you want to read more about the schema for the annotation table use the schema service -:func:`~caveclient.emannotationschemas.SchemaClientLegacy.schema_definition` - -Note, the materialization service has a human readable webpage that links to the other services -that might be more convienent for you to browse, -to get a link there in ipython display ``client.materialize.homepage`` - -for some important tables, the info service has a pointer to which table you should use in -the metadata for the datastack. ```client.info.get_datastack_info()['synapse_table']``` -and ```client.info.get_datastack_info()['soma_table']```. - -To see how many annotations are in a particular table use - -.. code:: python - - nannotations=client.materialize.get_annotation_count('my_table') - -Querying tables -^^^^^^^^^^^^^^^ -To query a small table, you can just download the whole thing using -:func:`~caveclient.materializationengine.MaterializatonClientV2.query_table` -which will return a dataframe of the table. - -Note however, some tables, such as the synapse table might be very large 200-300 million rows -and the service will only return the first 200,000 results, and not in a deterministic manner. -**NOTE! This API is not designed to enable enmass downloading of the entire synapse table -there are more efficent ways of doing this. Contact your dataset administrator for more information -if this is what you are looking to do.** - -To just get a preview, use the limit argument (but note again that this won't be a reproducible set) - -.. code:: python - - df=client.materialize.query_table('my_table', limit=10) - -For most applications, you will want to filter the query in some way. - -We offer three kinds of filters you can apply: filter_equal, filter_in and filter_not_in. -For query_table each is specified as a dictionary where the keys are column names, -and the values are a list of values (or single value in the case of filter_equal). - -So for example to query a synapse table for all synapses onto a neuron in flywire you would use - -.. code:: python - - synapse_table = client.info.get_datastack_info()['synapse_table'] - df=client.materialize.query_table(synapse_table, - filter_equal_dict = {'post_pt_root_id': MYID}) - - -The speed of querying is affected by a number of factors, including the size of the data. -To improve the performance of results, you can reduce the number of columns returned using -select_colums. - -So for example, if you are only interested in the root_ids and locations of pre_synaptic terminals -you might limit the query with select_columns. Also, it is convient to return the -with positions as a column of np.array([x,y,z]) coordinates for many purposes. -However, sometimes you might prefer to have them split out as seperate _x, _y, _z columns. -To enable this option use split_columns=True. split_columns=True is faster, as combining them is an extra step. -You can recombine split-out position columns using :func:`~caveclient.materializationengine.concatenate_position_columns` - -.. code:: python - - synapse_table = client.info.get_datastack_info()['synapse_table'] - df=client.materialize.query_table(synapse_table, - filter_equal_dict = {'post_pt_root_id': MYID}, - select_columns=['id','pre_pt_root_id', 'pre_pt_position'], - split_columns=True) -Desired Resolution -^^^^^^^^^^^^^^^^^^ -Often you want to have position information in different units. -For example, to consider synapse locations or soma locations, you might want to have positions in nanometers or microns. - -To create neuroglancer views, you might want positions in integer voxels of a size that aligns with the resolution you are used to using Neuroglancer at. - -Annotation tables can be created and uploaded in varying resolutions according to whatever the user of the table felt was natural. -This information is available in the metadata for that table. In addition, you may pass *desired_resolution* as a keyword argument -which will automatically convert all spatial positions into voxels of that size in nanometers. - -So if you want positions in nanometers, you would pass desired_resolution=[1,1,1]. -If you want positions in microns you would pass desired_resolution=[1000,1000,1000]. -If you want positions in 4,4,40nm voxel coordinates to use with cloud-volume or neuroglancer you would pass desired_resolution=[4,4,40]. - - -Spatial Filters -^^^^^^^^^^^^^^^ -You can also filter columns that are associated with spatial locations based upon being within a 3d bounding box. - -This is done by adding a filter_spatial_dict argument to query_table. -The units of the bounding box should be in the units of the voxel_resolution of the table -(which can be obtained from :func:`~caveclient.materializationengine.MaterializatonClientV2.get_table_metadata`). - - -.. code:: python - - bounding_box = [[min_x, min_y, min_z], [max_x, max_y, max_z]] - synapse_table = client.info.get_datastack_info('synapse_table') - df=client.materialize.query_table(synapse_table, - filter_equal_dict = {'post_pt_root_id': MYID}, - filter_spatial_dict = {'post_pt_position': bounding_box}) - - -Synapse Query -^^^^^^^^^^^^^ -For synapses in particular, we have a simplified method for querying them with a reduced syntax. -:func:`~caveclient.materializationengine.MaterializatonClientV2.synapse_query` -lets you specify pre and post synaptic partners as keyword arguments and bounding boxes. -The defaults make reasonable assumptions about what you want to query, namely that the synapse_table is -the table that the info service advertises, and that if you specify a bounding box, that you want the post_pt_position. -These can be overridden of course, but the above bounding box query is simplified to. - -**NOTE! This API is not designed to enable enmass downloading of the entire synapse table -there are more efficent ways of doing this. Contact your dataset administrator for more information -if this is what you are looking to do.** - -.. code:: python - - bounding_box = [[min_x, min_y, min_z], [max_x, max_y, max_z]] - df=client.materialize.query_table(post_ids = MYID, - bounding_box=bounding_box) - - -Live Query -^^^^^^^^^^ -In order to query the materialized tables above you can only use IDs that were present at the -timestamp of the materialization. If you query the tables with an ID that is not valid during the -time of the materialization you will get empty results. - -To check if root_ids are valid at your materialization's timestamp, you can use -:func:`~caveclient.chunkedgraph.ChunkedGraphClientV1.is_latest_roots` - -.. code:: python - - import numpy as np - mat_time = client.materialize.get_timestamp() - is_latest = client.chunkedgraph.is_latest_roots([MYID], timestamp=mat_time) - assert(np.all(is_latest)) - - -If you need to lookup what happened to that ID, you can use the chunkedgraph lineage tree, -to look into the future or the past, depending on your application you can use -:func:`~caveclient.chunkedgraph.ChunkedGraphClientV1.get_lineage_graph` - -Again, the ideal situation is that you have an annotation in the database which refers -to your objects of interest, and querying that table by the id column will return the -object in the most recent materialization. - -However, sometimes you might be browsing and proofreadding the data and get an ID -that is more recent that the most recent version available. For convience, you can use -:func:`~caveclient.materializationengine.MaterializatonClientV2.live_query`. - - -to automatically update the results of your query to a time in the future, such as now. -For example, to pass now, use ```datetime.datetime.utcnow```. Note all timestamps are in UTC -throughout the codebase. - -.. code:: python - - import datetime - synapse_table = client.info.get_datastack_info()['synapse_table'] - df=client.materialize.live_query(synapse_table, - datetime.datetime.utcnow(), - filter_equal_dict = {'post_pt_root_id': MYID}) - -This will raise an ValueError exception if the IDs passed in your filters are not valid at the timestamp given - -You can also pass a timestamp directly to query_table and it will call live_query automatically. - -.. code:: python - - import datetime - synapse_table = client.info.get_datastack_info()['synapse_table'] - df=client.materialize.query_table(synapse_table, - timestamp=datetime.datetime.utcnow(), - filter_equal_dict = {'post_pt_root_id': MYID}) - - -Also, keep in mind if you run multiple queries and at each time pass ``datetime.datetime.utcnow()``, -there is no gauruntee that the IDs will be consistent from query to query, as proofreading might be happening -at any time. For larger scale analysis constraining oneself to a materialized version will ensure consistent results. - -Versions have varying expiration times in order to support the tradeoff between recency and consistency, -so before undertakin an analysis project consider what version you want to query and what your plan will be to -update your analysis to future versions. - -Content-aware Interface (Experimental) -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -.. warning:: - As of version 5.8.0, we have introduced a new interface to query tables and views. - This interface might have small but breaking changes in the near future. - -In order to make the querying interface more consistent across tables, we have introduced an additional alternative interface -to filtering and querying data via the ``client.materialize.tables`` object. -When you instantiate this object, this object finds all of the existing tables and the list of their columns and lets you filter -the tables as arguments in the function with suggestions. -Moreover, the filtering arguments and the querying arguments are separated into two. - -Let's see how this works with a simplest example — downloading a table called ``nucleus_detection_v0``. -First, we reference the table as a function and then we run the query — this is exactly the same as ``client.materialize.query_table('nucleus_detection_v0')``. - -.. code:: python - - client = CAVEclient('minnie65_public') - nuc_df = client.materialize.tables.nucleus_detection_v0().query() - -Where things differ is when we add filters. -If we want to query based on a set of values for the field "id", for example, we add that as an argument: - -.. code:: python - - my_ids = [373879, 111162] - nuc_df = client.materialize.tables.nucleus_detection_v0(id=my_ids).query() - -Where in this example the ``id=`` queries the column ``id`` based on the schema. -These values can be either individual elements (i.e. an integer or a string) or a list/array of elements, and any field can be used. -The tooling will automatically sort out how to format the filtering appropriately when running the query. -Importantly, the filtering is identical between querying all types of tables and queries. -To see the complete list of fields that can be queried, you can tab-autocomplete or in Jupyter or IPython -glance at the docstring with ``client.materialize.tables.nucleus_detection_v0?``. - -If you need to specify the table programmatically, you can also use a dictionary-style approach to getting the table filtering function. -For example, an equivalent version of the above line would be: - -.. code:: python - - my_ids = [373879, 111162] - my_table = 'nucleus_detection_v0' - nuc_df = client.materialize.tables[my_table](id=my_ids).query() - -The ``query`` function can also take arguments relating to timestamps or formatting where they act just like in the other query method. -In particular, the arguments that apply to ``query`` are: ``select_columns``, ``offset``, ``limit``, ``split_positions``, ``materialization_version``, -``timestamp``, ``metadata``, ``desired_resolution``, and ``get_counts``. -For example, to add a desired resolution and split positions in the above query, it would look like: - -.. code:: python - - my_ids = [373879, 111162] - nuc_df = client.materialize.tables.nucleus_detection_v0( - id=my_ids - ).query( - split_positions=True, - desired_resolution=[1,1,1], - ) - -If you want to do a live query instead of a materialized query, the filtering remains identifical but we use the ``live_query`` function instead. -The one required argument for ``live_query`` is the timestamp. - -.. code:: python - - my_ids = [373879, 111162] - nuc_df = client.materialize.tables.nucleus_detection_v0( - id=my_ids - ).live_query( - timestamp=datetime.datetime.utcnow(), - ) - -The live query functions have similar but slightly different arguments: ``timestamp`` (required), ``offset``, ``limit``, ``split_positions``, -``metadata``, ``desired_resolution``, and ``allow_missing_lookups``. - -Note that way that IPython handles docstrings means that while you can use ``?`` to get the docstring of the filtering part of the function, -you can't simply do something like ``client.materialize.tables.nucleus_detection_v0().query?``. It will tell you the function can't be found, -because technically the ``query`` function does not yet exist until the table filtering function is called. - -Instead, if you want to glimpse the docstring of the query or live_query functions, you need to split it into two lines: - -.. code:: python - - qry_func = client.materialize.tables.nucleus_detection_v0().query - qry_func? - -Finally, if the project you are working with has views, a similar interface is available to them via ``client.materialize.views``. -Currently views are not compatible with live query, and so only the ``.query`` function is available. diff --git a/docs/guide/state.rst b/docs/guide/state.rst deleted file mode 100644 index 51a21839..00000000 --- a/docs/guide/state.rst +++ /dev/null @@ -1,58 +0,0 @@ -JSON Neuroglancer State Service -=============================== - -We store the JSON description of a Neuroglancer state in a simple -database at the JSON Service. This is a convenient way to build states -to distribute to people, or pull states to parse work by individuals. -The JSON Client is at ``client.state`` - -.. code:: python - - client.state - -Retrieving a state -^^^^^^^^^^^^^^^^^^ - -JSON states are found simply by their ID, which you get when uploading a -state. You can download a state with ``get_state_json``. - -.. code:: python - - example_id = 4845531975188480 - example_state = client.state.get_state_json(test_id) - example_state['layers'][0] - -Uploading a state -^^^^^^^^^^^^^^^^^ - -You can also upload states with ``upload_state_json``. If you do this, -the state id is returned by the function. Note that there is no easy way -to query what you uploaded later, so be VERY CAREFUL with this state id -if you wish to see it again. - -*Note: If you are working with a Neuroglancer Viewer object or similar, -in order to upload, use viewer.state.to_json() to generate this -representation.* - -.. code:: python - - example_state['layers'][0]['name'] = 'example_name' - new_id = client.state.upload_state_json(example_state) - -.. code:: python - - test_state = client.state.get_state_json(new_id) - test_state['layers'][0]['name'] - -Generating a Neuroglancer URL -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Once you have a state ID, you want to turn it into a well-formatted link. -So you don't have to remember all the endpoints, we can do this from the state client. - -.. code:: python - - ngl_base = 'neuromancer-seung-import.appspot.com' - client.state.build_neuroglancer_url(new_id, ngl_base) - -Note that the neuroglancer base can be found in the info service under ``client.info.viewer_site()``. diff --git a/docs/images/logo-inverted.png b/docs/images/logo-inverted.png new file mode 100644 index 00000000..741e3d65 Binary files /dev/null and b/docs/images/logo-inverted.png differ diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 00000000..925533ac --- /dev/null +++ b/docs/index.md @@ -0,0 +1,3 @@ +{% + include-markdown "../README.md" +%} diff --git a/docs/index.rst b/docs/index.rst deleted file mode 100644 index 689cee40..00000000 --- a/docs/index.rst +++ /dev/null @@ -1,33 +0,0 @@ -Welcome to CAVEclient's documentation! -===================================================== - -.. toctree:: - :maxdepth: 2 - :caption: Contents: - - guide/intro - guide/framework - guide/authentication - guide/annotation - guide/chunkedgraph - guide/l2cache - guide/info - guide/schemas - guide/state - guide/materialization - -API ---- -.. toctree:: - :maxdepth: 2 - :caption: API: - - api/caveclient - - -Indices and tables -================== - -* :ref:`genindex` -* :ref:`modindex` -* :ref:`search` diff --git a/docs/installation.md b/docs/installation.md new file mode 100644 index 00000000..1a133ea2 --- /dev/null +++ b/docs/installation.md @@ -0,0 +1,57 @@ +# Installation + +## Stable release + +To install `{{ names.package }}`, run this command in your +terminal: + +```console +pip install {{ names.package }} +``` + +This is the preferred method to install `{{ names.package }}`, as it will always +install the most recent stable release. + +You can also specify a particular version, e.g. + +```console +pip install {{ names.package }}==5.0.0 +``` + +If you don't have [pip][] installed, this [Python installation guide][] +can guide you through the process. + +## From source + +The source for `{{ names.package }}` can be downloaded from +the [Github repo][]. + +You can either clone the public repository: + +```console +git clone git://github.com/{{ config.repo_name }} +``` + +Or download the [tarball][]: + +```console +curl -OJL https://github.com/{{ config.repo_name }}/tarball/{{ names.main_branch }} +``` + +Once you have a copy of the source, you can install it with: + +```console +pip install . +``` + +Or in editable mode, it can be installed with: + +```console +pip install -e . +``` + +[pip]: https://pip.pypa.io +[Python installation guide]: http://docs.python-guide.org/en/latest/starting/installation/ + +[Github repo]: https://github.com/{{ config.repo_name }} +[tarball]: https://github.com/{{ config.repo_name }}/tarball/{{ names.main_branch }} diff --git a/docs/make.bat b/docs/make.bat deleted file mode 100644 index 2119f510..00000000 --- a/docs/make.bat +++ /dev/null @@ -1,35 +0,0 @@ -@ECHO OFF - -pushd %~dp0 - -REM Command file for Sphinx documentation - -if "%SPHINXBUILD%" == "" ( - set SPHINXBUILD=sphinx-build -) -set SOURCEDIR=. -set BUILDDIR=_build - -if "%1" == "" goto help - -%SPHINXBUILD% >NUL 2>NUL -if errorlevel 9009 ( - echo. - echo.The 'sphinx-build' command was not found. Make sure you have Sphinx - echo.installed, then set the SPHINXBUILD environment variable to point - echo.to the full path of the 'sphinx-build' executable. Alternatively you - echo.may add the Sphinx directory to PATH. - echo. - echo.If you don't have Sphinx installed, grab it from - echo.http://sphinx-doc.org/ - exit /b 1 -) - -%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% -goto end - -:help -%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% - -:end -popd diff --git a/docs/requirements.txt b/docs/requirements.txt deleted file mode 100644 index 1835f9d5..00000000 --- a/docs/requirements.txt +++ /dev/null @@ -1,100 +0,0 @@ -# -# This file is autogenerated by pip-compile with Python 3.9 -# by the following command: -# -# pip-compile --output-file=requirements.out requirements.txt -# -appnope==0.1.3 - # via ipython -asttokens==2.2.1 - # via stack-data -attrs==23.1.0 - # via - # -r requirements.txt - # jsonschema - # referencing -backcall==0.2.0 - # via ipython -cachetools==5.3.1 - # via -r requirements.txt -certifi==2023.7.22 - # via requests -charset-normalizer==3.2.0 - # via requests -decorator==5.1.1 - # via ipython -executing==1.2.0 - # via stack-data -idna==3.4 - # via requests -ipython==8.14.0 - # via -r requirements.txt -jedi==0.19.0 - # via ipython -jsonschema==4.19.0 - # via -r requirements.txt -jsonschema-specifications==2023.7.1 - # via jsonschema -matplotlib-inline==0.1.6 - # via ipython -networkx==3.1 - # via -r requirements.txt -numpy==1.25.2 - # via - # -r requirements.txt - # pandas - # pyarrow -pandas==2.0.3 - # via -r requirements.txt -parso==0.8.3 - # via jedi -pexpect==4.8.0 - # via ipython -pickleshare==0.7.5 - # via ipython -prompt-toolkit==3.0.39 - # via ipython -ptyprocess==0.7.0 - # via pexpect -pure-eval==0.2.2 - # via stack-data -pyarrow==11.0.0 - # via -r requirements.txt -pygments==2.16.1 - # via ipython -python-dateutil==2.8.2 - # via pandas -pytz==2023.3 - # via pandas -referencing==0.30.2 - # via - # jsonschema - # jsonschema-specifications -requests==2.31.0 - # via -r requirements.txt -rpds-py==0.9.2 - # via - # jsonschema - # referencing -six==1.16.0 - # via python-dateutil -stack-data==0.6.2 - # via ipython -traitlets==5.9.0 - # via - # ipython - # matplotlib-inline -typing-extensions==4.7.1 - # via ipython -tzdata==2023.3 - # via pandas -urllib3==2.0.4 - # via - # -r requirements.txt - # requests -wcwidth==0.2.6 - # via prompt-toolkit -sphinx==5.3.0 -sphinx_rtd_theme==1.1.1 -readthedocs-sphinx-search==0.1.1 -sphinxcontrib-napoleon==0.7 \ No newline at end of file diff --git a/docs/tutorials/annotation.md b/docs/tutorials/annotation.md new file mode 100644 index 00000000..a74780b4 --- /dev/null +++ b/docs/tutorials/annotation.md @@ -0,0 +1,235 @@ +# AnnotationEngine + +The AnnotationClient is used to interact with the AnnotationEngine +service to create tables from existing schema, upload new data, and +download existing annotations. Note that annotations in the +AnnotationEngine are not linked to any particular segmentation, and thus +do not include any root ids. An annotation client is accessed with +`client.annotation`. + +## Getting existing tables + +A list of the existing tables for the datastack can be found with +[get_tables()]({{ client_api_paths.annotation }}.get_tables). + +```python +all_tables = client.annotation.get_tables() +all_tables[0] +``` + +Each table has three main properties that can be useful to know: + +- `table_name` : The table name, used to refer to it when uploading or + downloading annotations. This is also passed through to the table in + the Materialized database. +- `schema_name` : The name of the table's schema from + EMAnnotationSchemas (see below). +- `max_annotation_id` : An upper limit on the number of annotations + already contained in the table. + +## Downloading annotations + +You can download the JSON representation of a data point through the +[get_annotation()]({{ client_api_paths.annotation }}.get_annotation) +method. This can be useful if you need to look up +information on unmaterialized data, or to see what a properly templated +annotation looks like. + +```python +table_name = all_tables[0]['table_name'] # 'ais_analysis_soma' +annotation_id = 100 +client.annotation.get_annotation(annotation_ids=annotation_id, table_name=table_name) +``` + +## Create a new table + +One can create a new table with a specified schema with the +[create_table()]({{ client_api_paths.annotation }}.create_table) +method: + +```python +client.annotation.create_table(table_name='test_table', + schema_name='microns_func_coreg', + voxel_resolution = [1,1,1], + description="some text to describe your table") +``` + +The voxel resolution is the units your position columns will be uploaded +in. \[1,1,1\] would imply a nm location, where as \[4,4,40\] would +correspond to voxels of that size. If you are uploading points from a +neuroglancer session, you want this to match the units of that +neuroglancer view. + +Note there are some optional metadata parameters to +[create_table()]({{ client_api_paths.annotation }}.create_table) + +- `notice_text` : This is text that will show up to users who access + this data as a warning. This could be used to warn users that the + data is not complete or checked yet, or to advertise that a + particular publication should be cited when using this table. +- `read_permission` : one of "PRIVATE" which means only you can read + data in this table. "PUBLIC" (default) which means anyone can read + this table that has read permissions to this dataset. So if and only + if you can read the segmentation results of this data, you can read + this table. "GROUP" which means that you must share a common group + with this user for them to be able to read. We need to make a way to + discover what groups you are in and who you share groups with. +- `write_permission`: one of "PRIVATE" (default), which means only + you can write to this table. "PUBLIC" which means anyone can write + to this table that has write permissions to this dataset. Note + although this means anyone can add data, no annotations are ever + truly overwritten. "GROUP" which means that you must share a + common group with this user for them to be able to write. We need to + make a way to discover what groups you are in and who you share + groups with. + +If you change your mind about what you want for metadata, some but not +all fields can be updated with +[update_metadata()]({{ client_api_paths.annotation }}.update_metadata). This includes the +description, the notice_text, and the permissions, but not the name, schema or voxel +resolution. + +```python +# to update description +client.annotation.update_metadata(table_name='test_table', + description="a new description for my table") + +# to make your table readable by anybody who can read this dataset +client.annotation.update_metadata(table_name='test_table', + notice_text="This table isn't done yet, don't trust it. Contact me") + +# to make your table readable by anybody who can read this dataset +client.annotation.update_metadata(table_name='test_table', + read_permisison="PUBLIC") +``` + +New data can be generated as a dict or list of dicts following the +schema and uploaded with `post_annotation`. For example, a +`microns_func_coreg` point needs to have: \* `type` set to +`microns_func_coreg` \* `pt` set to a dict with `position` as a key and +the xyz location as a value. \* `func_id` set to an integer. + +The following could would create a new annotation and then upload it to +the service. Note that you get back the annotation id(s) of what you +uploaded. + +```python +new_data = {'type': 'microns_func_coreg', + 'pt': {'position': [1,2,3]}, + 'func_id': 0} +client.annotation.post_annotation(table_name='test_table', data=[new_data]) +``` + +There are methods to simplify annotation uploads if you have a pandas +dataframe whose structure mirrors the struction of the annotation schema +you want to upload + +```python +import pandas as pd + +df = pd.DataFrame([{'id':0, + 'type': 'microns_func_coreg', + 'pt_position': [1,2,3]}, + 'func_id': 0}, + {'id':1, + 'type': 'microns_func_coreg', + 'pt_position': [3,2,1]}, + 'func_id': 2}]) +client.annotation.post_annotation_df('test_table', df) +``` + +Note that here I specified the IDs of my annotations, which you can do, +but then its up to you to assure that the IDs don\'t collide with other +IDs. If you leave them blank then the service will assign the IDs for +you. + +There is a similar method for updating +[update_annotation_df()]({{ client_api_paths.annotation }}.update_annotation_df) + +## Staged Annotations + +Staged annotations help ensure that the annotations you post follow the +appropriate schema, both by providing guides to the field names and +locally validating against a schema before uploading. The most common +use case for staged annotations is to create a StagedAnnotation object +for a given table, then add annotations to it individually or as a +group, and finally upload to the annotation table. + +To get a StagedAnnotation object, you can start with either a table name +or a schema name. Here, we\'ll assume that there\'s already a table +called "my_table" that is running a "cell_type_local" schema. If we +want to add new annotations to the table, we simply use the table name +with [stage_annotations()]({{ client_api_paths.annotation }}.stage_annotations). + +```python +stage = client.annotation.stage_annotations("my_table") +``` + +This `stage` object retrieves the schema for the table and hosts a local +collection of annotations. Every time you add an annotation, it is +immediately validated against the schema. To add an annotation, use the +`add` method: + +```python +stage.add( + cell_type = "pyramidal_cell", + classification_system="excitatory", + pt_position=[100,100,10], +) +``` + +The argument names derive from fields in the schema and you must provide +all required fields. Any number of annotations can be added to the +stage. A dataframe of annotations can also be added with +`stage.add_dataframe`, and requires an exact match between column names +and schema fields. The key difference between this and posting a +dataframe directly is that annotations added to a StagedAnnotations are +validated locally, allowing any issues to be caught before uploading. + +You can see the annotations as a list of dictionary records with +`stage.annotation_list` or as a Pandas dataframe with +`stage.annotation_dataframe`. Finally, if you initialized the stage with +a table name, this information is stored in the `stage` and you can +simply upload it from the client. + +```python +client.annotation.upload_staged_annotations(stage) +``` + +Updating annotations requires knowing the annotation id of the +annotation you are updating, which is not required in the schema +otherwise. In order to stage updated annotations, set the `update` +parameter to `True` when creating the stage. + +```python +update_stage = client.annotation.stage_annotations("my_table", update=True) +update_stage.add( + id=1, + cell_type = "stellate_cell", + classification_system="excitatory", + pt_position=[100,100,10], +) +``` + +The `update` also informs the framework client to treat the annotations +as an update and it will use the appropriate methods automatically when +uploading `client.annotation.upload_staged_annotations`. + +If you want to specify ids when posting new annotations, `id_field` can +be set to True when creating the StagedAnnotation object. This will +enforce an `id` column but still post the data as new annotations. + +If you might be adding spatial data in coordinates that might be +different than the resolution for the table, you can also set the +`annotation_resolution` when creating the stage. The stage will convert +between the resolution you specify for your own annotations and the +resolution that the table expects. + +```python +stage = client.annotation.stage_annotations("my_table", annotation_resolution=[8,8,40]) +stage.add( + cell_type='pyramidal_cell', + classification_system="excitatory", + pt_position=[50,50,10], +) +``` diff --git a/docs/tutorials/authentication.md b/docs/tutorials/authentication.md new file mode 100644 index 00000000..68decb7b --- /dev/null +++ b/docs/tutorials/authentication.md @@ -0,0 +1,67 @@ +# Authentication + +Authentication tokens are generally needed for programmatic access to +our services. The AuthClient handles storing and loading your token or +tokens and inserting it into requests in other clients. + +We can access the auth client from `client.auth`. Once you have saved a +token, you probably won't interact with this client very often, however +it has some convenient features for saving new tokens the first time. +Let's see if you have a token already. Probably not. + +``` python +client = CAVEclient() +auth = client.auth +print(f"My current token is: {auth.token}") +``` + +## Getting a new token + +To get a new token, you will need to manually acquire it. For +convenience, the function [client.auth.get_new_token()]({{ client_api_paths.auth }}.get_new_token) provides instructions for +how to get and save the token. + +By default, the token is saved to +`~/.cloudvolume/secrets/cave-secret.json` as a string under the key +`token`. This makes it compatible by default with +[Cloudvolume](https://github.com/seung-lab/cloud-volume) projects, which +can come in handy. The following steps will save a token to the default +location. + +``` python +auth.get_new_token() +``` + +``` python +new_token = 'abcdef1234567890' #This is the text you see after you visit the website. +auth.save_token(token=new_token) +print(f"My token is now: {auth.token}") +``` + +Note that requesting a new token will invalidate your previous token on +the same project. If you want to use the same token across different +computers, you will need to share the same token information. + +## Loading saved tokens + +Try opening `~/.cloudvolume/secrets/cave-secret.json` to see what we +just created. + +If we had wanted to use a different file or a different json key, we +could have specified that in auth.save_token. + +Because we used the default values, this token is used automatically +when we initialize a new CAVEclient. If we wanted to use a different +token file, token key, or even directly specify a token we could do so +here. + +``` python +client = CAVEclient(datastack_name) +print(f"Now my basic token is: {client.auth.token}") + +client_direct = CAVEclient(datastack_name, auth_token='another_fake_token_678') +print(f"A directly specified token is: {client_direct.auth.token}") +``` + +If you use a CAVEclient, the AuthClient and its token will be +automatically applied to any other services without further use. diff --git a/docs/tutorials/chunkedgraph.md b/docs/tutorials/chunkedgraph.md new file mode 100644 index 00000000..14eada55 --- /dev/null +++ b/docs/tutorials/chunkedgraph.md @@ -0,0 +1,97 @@ +# ChunkedGraph + +The chunkedgraph is a dynamic oct-tree connected components supervoxel +graph. + +![](images/2880px-Octree2.svg.png){width="600px"} + +A visual representation of an oct-tree ([Wikipedia +(WhiteTimberwolf)](https://en.wikipedia.org/wiki/Octree) [CC BY-SA +3.0](http://creativecommons.org/licenses/by-sa/3.0/)) + +As with any oct-tree, it is organized in hierarchical levels, with the +bottom level 1 corresponding to the supervoxels of the segmentations, +and the top level being the unique connected components of the +supervoxel graph. + +![](images/PCG_oct_tree.png){width="700px"} + +A figure illustrating the spatial chunking and editing of a the +chunkedgraph. From [Dorkenwald et. al +2021](https://doi.org/10.1038/s41592-021-01330-0) + +The ChunkedGraph client allows one to interact with the ChunkedGraph +service, which stores and updates the supervoxel agglomeration graph. +This is most often useful for looking up an object root id of a +supervoxel or looking up supervoxels belonging to a root id. The +ChunkedGraph client is at `client.chunkedgraph`. + +## Look up a supervoxel + +Usually in Neuroglancer, one never notices supervoxel ids, but they are +important for programmatic work. In order to look up the root id for a +location in space, one needs to use the supervoxel segmentation to get +the associated supervoxel id. The ChunkedGraph client makes this easy +using the [client.chunkedgraph.get_root_id()]({{ client_api_paths.chunkedgraph }}.get_root_id) +method. + +```python +sv_id = 104200755619042523 +client.chunkedgraph.get_root_id(supervoxel_id=sv_id) +``` + +However, as proofreading occurs, the root id that a supervoxel belongs +to can change. By default, this function returns the current state, +however one can also provide a UTC timestamp to get the root id at a +particular moment in history. This can be useful for reproducible +analysis. Note below that the root id for the same supervoxel is +different than it is now. + +```python +import datetime + +# I looked up the UTC POSIX timestamp from a day in early 2019. +timestamp = datetime.datetime.utcfromtimestamp(1546595253) + +sv_id = 104200755619042523 +client.chunkedgraph.get_root_id(supervoxel_id=sv_id, timestamp=timestamp) +``` + +If you are doing this across lots of supervoxels (or any nodes) then you +can do it more efficiently in one request with +[client.chunkedgraph.get_roots()]({{ client_api_paths.chunkedgraph }}.get_roots) + +```python +node_ids = [104200755619042523, 104200755619042524,104200755619042525] +root_ids = client.chunkedgraph.get_roots(node_ids) +``` + +## Getting supervoxels for a root id + +A root id is associated with a particular agglomeration of supervoxels, +which can be found with the +[client.chunkedgraph.get_leaves()]({{ client_api_paths.chunkedgraph }}.get_leaves) method. +A new root id is generated for every new change in the chunkedgraph, so time stamps do not apply. + +```python +root_id = 648518346349541252 +client.chunkedgraph.get_leaves(root_id) +``` + +You can also query the chunkedgraph not all the way to the bottom, using +the stop_layer option + +```python +root_id = 648518346349541252 +client.chunkedgraph.get_leaves(root_id,stop_layer=2) +``` + +This will get all the level 2 IDs for this root, which correspond to the +lowest chunk of the hierarchy. An analogous option exists for +[client.chunkedgraph.get_roots()]({{ client_api_paths.chunkedgraph }}.get_roots). +This is useful to help find nodes to query within the `l2cache`, amongst other things. + +## Other functions + +There are a variety of other interesting functions to explore in +[client.chunkedgraph]({{ client_api_paths.chunkedgraph }}). diff --git a/docs/tutorials/framework.md b/docs/tutorials/framework.md new file mode 100644 index 00000000..d19f9743 --- /dev/null +++ b/docs/tutorials/framework.md @@ -0,0 +1,93 @@ +# Introduction + +## CAVEclient: one client for all services + +The CAVE Framework consists of a number of different services, each with +a specific set of tasks that it can perform through REST endpoints. The +CAVEclient is designed to ease programmatic interaction with all of the +various endpoints. In addition, most programmatic access requires the +use of authentication tokens. In order to collect a given server, +datastack name, and user token together into a coherent package that can +be used on multiple endpoints, the CAVEclient builds appropriately +configured clients for each of the specific services. Each of the +individual services has their own specific documentation as well. + +## Global and Local Services + +There are two categories of data in CAVE: Global and local. Local +services are associated with a single so-called **datastack**, which +refers to a precise collection of imagery and segmentation data that +function together. For example, EM imagery and a specific pychunkedgraph +segmentation would be one datastack, while the same EM imagery but an +initial static segmentation would be another. Datastacks are referred to +by a short name, for instance `pinky100_public_flat_v185`. + +Global services are those that are potentially shared across multiple +different specific datastacks. These include the info service, which can +describe the properties of all available datastacks, the authentication +service, and the state service that hosts neuroglancer states. Global +services are associated with a particular URL (by default +`http://globalv1.daf-apis.com`), but not a single datastack. + +### Initializing a CAVEclient + +Assuming that the services are on `http://globalv1.daf-apis.com` and +authentication tokens are either not being used or set up with default +values (see [Authentication](./authentication.md)), a simple +CAVEclient that can only access global services can be initialized: + +```python +from caveclient import CAVEclient + +client = CAVEclient() +``` + +Just to confirm that this works, let's see if we can get the EM image +source from the InfoService. If you get a list of names of datastacks, +all is good. If you have not yet set up an authentication token or you +get an authentication error, look at [Getting a new token][getting-a-new-token] for +information about how to set up your auth token. + +```python +client.info.get_datastacks() +``` + +If you have a specific datastack you want to use, you can initialize +your CAVEclient with it. This gives you access to the full range of +client functions. + +```python +client = CAVEclient(datastack_name='my_datastack') +``` + +### Using Other Server Addresses + +If your data is hosted by a different global server, you specify its +address when initializing the client. + +```python +client = CAVEclient(datastack_name='my_datastack', server_address='http://global.myserver.com') +``` + +By default, if you pass both a server address and a datastack, the +client will store the mapping from datastack to server address in the +same location as the default for authentication tokens. Once stored, the +client will automatically use the correct server address for the +datastack if none is provided. You can override storing the server +address by passing `write_server_address=False`. Datastacks can be +removed from the cache using + +[caveclient.datastack_lookup.reset_server_address_cache(datastack_name)][caveclient.datastack_lookup.reset_server_address_cache]. + +## Accessing specific clients + +Each client can be accessed as a property of the main client. See the +documentation at left for the capabilities of each. Assuming your client +is named `client`, the subclients for each service are: + +- Authentication Service : `client.auth` +- AnnotationEngine : `client.annotation` +- PyChunkedGraph : `client.chunkedgraph` +- InfoService : `client.info` +- EM Annotation Schemas : `client.schemas` +- JSON Neuroglancer State Service : `client.state` diff --git a/docs/guide/images/2880px-Octree2.svg.png b/docs/tutorials/images/2880px-Octree2.svg.png similarity index 100% rename from docs/guide/images/2880px-Octree2.svg.png rename to docs/tutorials/images/2880px-Octree2.svg.png diff --git a/docs/guide/images/PCG_oct_tree.png b/docs/tutorials/images/PCG_oct_tree.png similarity index 100% rename from docs/guide/images/PCG_oct_tree.png rename to docs/tutorials/images/PCG_oct_tree.png diff --git a/docs/tutorials/index.md b/docs/tutorials/index.md new file mode 100644 index 00000000..ddb4ca75 --- /dev/null +++ b/docs/tutorials/index.md @@ -0,0 +1,33 @@ +# Getting Started + +AnnotationFramework client is a package for simplifying interactions +with HTML services associated with the CAVE (Connectome Annotation +Versioning Engine), which includes: + +- [pychunkedgraph](https://www.github.com/seung-lab/pychunkedgraph) + (For tracking dynamic segmentations) +- [NeuroglancerJsonServer](https://www.github.com/seung-lab/NeuroglancerJsonServer) + (For posting/getting neuroglancer json states) +- [AnnotationFrameworkInfoService](https://www.github.com/seung-lab/AnnotationFrameworkInfoService) + (For storing datastack metadata information) +- [EmAnnotationSchemas](https://www.github.com/seung-lab/EmAnnotationSchemas) + (For storing an extensible set of schemas for annotating EM data) +- [AnnotatationEngine](https://www.github.com/seung-lab/AnnotationEngine) + (For storing annotations on EM data) + +## Installation + +The CAVEclient can be installed with pip: + +```bash +$ pip install caveclient +``` + +## Assumptions + +The code is setup to work flexibly with any deployment of these +services, but you need to specify the server_address if that address is +not for each client when initializing +it. Similarly, the clients can query the info service for metadata to +simplify the interaction with a datastack, but you have to specify a +datastack name. diff --git a/docs/tutorials/info.md b/docs/tutorials/info.md new file mode 100644 index 00000000..f7e34350 --- /dev/null +++ b/docs/tutorials/info.md @@ -0,0 +1,45 @@ +# Info Service + +A datastack has a number of complex paths to various data sources that +together comprise a datastack. Rather than hardcode these paths, the +InfoService allows one to query the location of each data source. This +is also convenient in case data sources change. + +An InfoClient is accessed at `client.info`. + +``` python +client = CAVEclient(datastack_name) +print(f"This is an info client for {client.info.datastack_name} on {client.info.server_address}") +``` + +## Accessing datastack information + +All of the information accessible for the datastack can be seen as a +dict using `get_datastack_info()`. + +``` python +info.get_datastack_info() +``` + +Individual entries can be found as well. Use tab autocomplete to see the +various possibilities. + +``` python +info.graphene_source() +``` + +## Adjusting formatting + +Because of the way neuroglancer looks up data versus cloudvolume, +sometimes one needs to convert between `gs://` style paths to +`https://storage.googleapis.com/` stype paths. All of the path sources +in the info client accept a `format_for` argument that can handle this, +and correctly adapts to graphene vs precomputed data sources. + +``` python +neuroglancer_style_source = info.image_source(format_for='neuroglancer') +print(f"With gs-style: { neuroglancer_style_source }") + +cloudvolume_style_source = info.image_source(format_for='cloudvolume') +print(f"With https-style: { cloudvolume_style_source }") +``` diff --git a/docs/tutorials/l2cache.md b/docs/tutorials/l2cache.md new file mode 100644 index 00000000..a183579d --- /dev/null +++ b/docs/tutorials/l2cache.md @@ -0,0 +1,142 @@ +# Level 2 Cache + +To understand the level 2 cache, you must understand the structure of +the chunkedgraph so see [the chunkedgraph tutorial](chunkedgraph.md). + +Nodes on the second level or layer of the graph, corresponds to all the +supervoxels that are locally connected to one another within a single +level 2 spatial "chunk" of the data. The Level 2 Cache, is a service +whose job it is to track and update relevant statistics about every +level 2 node within the a chunkedgraph. The source code of this service +can be found [here](https://github.com/seung-lab/pcgl2cache). + +## Finding Level 2 Nodes + +The chunkedgraph can be used to find the level2 nodes of a rootID using +a `stop_layer=2` keyword argument on the +[client.chunkedgraph.get_leaves()]({{ client_api_paths.chunkedgraph }}.get_leaves). +Conversely the level 2 node of a supervoxel can be found using the same keyword argument of +[client.chunkedgraph.get_roots()]({{ client_api_paths.chunkedgraph }}.get_roots). +Note if you don't specify a timestamp it will give you the level2 node that is +presently associated with the object. + +## Statistics + +The statistics that are available are: + +- **area_nm2:** The surface area of the object in square nanometers. Does not include border touching voxels +- **size_nm3:** The volume of the object in cubic nanometers, + based on counting voxels in the object. +- **max_dt_nm:** The maximum edge distance transform of that + object in nanometers. Meant to capture the + maximum "thickness" of the voxels in the + node. +- **mean_dt_nm:** The average edge distance transform of that + object in nanometers. Meant to capture the + average "thickness" of voxels in that node. +- **rep_coord_nm:** A list of x,y,z coordinates in nanometers that + represent a point within the object that is + designed to be close to the "center" of the + object. This is the location of the max_dt_nm + value. +- **chunk_intersect_count:** A 2 x 3 matrix representing the 6 sides of the + chunk, and whose values represent how many + voxels border that side of the chunk. Meant to + help understand significant the borders with + other chunks are. Ordering is the \[\[x_bottom, + y_bottom, z_bottom\],\[x_top, y_top, z_top\]\] + where {xyz}\_bottom refers to the face which + has the smallest values for that dimension, and + {xyz}\_top refers to the face which has the + largest. +- **pca** A 3x3 matrix representing the principal + components of the xyz point cloud of voxels for + this object. Ordering is NxD where N is the + components and D are the xyz dimensions. Meant + to help desribe the orientation of the level 2 + chunk. Note that this is not calculated for + very small objects and so might not be present + for all level 2 nodes. You will see that its + availability correlates strongly with size_nm3. +- **pca_val** The 3 principal component values for the PCA + components. + +## Retrieving Level 2 Statistics + +Level 2 stats about nodes can be retreived using the +[client.l2cache.get_l2data()]({{ client_api_paths.l2cache }}.get_l2data) method. It simply takes a list of level 2 nodes you want to +retrieve. Optionally you can specify only the attributes that you are +interested in retrieving which will speed up the request. + +## Missing Data + +The service is constantly watching for changes made to objects and +recalculating stats on new level2 nodes that are created, in order to +keep its database of statistics current. This however takes some time, +and is subject to sporadic rare failures. If you request stats on a +level 2 node which are not in the database, you will receive an empty +dictionary for that node. This will immediately trigger the system to +recalculate the statistics of that missing data, and so it should be +available shortly (on the order of seconds) if systems are operational. +Please note that PCA is not calculated for very small objects because it +is not meaningful. So if you are interested in differentiating whether +PCA is not available because it hasn't been calculated, vs when its not +available because it is not possible to calculate, you should ask for at +least one other non PCA statistic as well. You will see that its +availability correlates strongly with `size_nm3`. + +## Use Cases + +### Calculate Total Area and Volume of Cells + +Say you want to calculate the total surface area and volume of a object +in the dataset. The areas and volume of each component can simply be +added together to do this. + +```python +import pandas as pd +root_id = 648518346349541252 +lvl2nodes = client.chunkedgraph.get_leaves(root_id,stop_layer=2) +l2stats = client.l2cache.get_l2data(lvl2nodes, attributes=['size_nm3','area_nm2']) +l2df = pd.DataFrame(l2stats).T +total_area_um2=l2df.area_nm2.sum()/(1000*1000) +total_volume_um3 = l2df.size_nm3.sum()/(1000*1000*1000) +``` + +By utilizing the bounds argument of get_leaves, you can also do simple +spatially restricted analysis of objects. In fact, because you have data +on each level2 node individually, you can segregate the neuron using any +labelling of its topology. + +### Skeletonization + +Level 2 nodes have "cross chunk" edges within the chunkedgraph which +represent what level 2 nodes that object is locally connected to. This +forms a graph between the level 2 nodes of the object that can be +retrieved using the chunkedgraph function +[client.chunkedgraph]({{ client_api_paths.chunkedgraph }}.level2_chunk_graph). This graph represents a topological representation of the +neuron at the resolution of individual chunks, and is guaranteed to be +fully connected, unlike a voxel or mesh representation of the neuron +which can have gaps where there are defects in the segmentation volume +or incorrectly inferred edges at self contact locations. + +The level 2 graph can be turned into a skeleton representation of a +neuron using a graph based TEASAR like algorithm as described for +skeletonizing meshes in this [MeshParty +Documentation](https://meshparty.readthedocs.io/en/latest/guide/skeletons.html). +There is an implementation of this approach that utilizes the +chunkedgraph and the L2cache if available +[here](https://github.com/AllenInstitute/pcg_skel) and on pypi as +`pcg-skel`. In this implementation the l2cache is used to more +accurately place the level 2 nodes in space using the `rep_coord_nm` +value. + +### Trajectory Distributions + +If one is interested in the bulk direction of processes in a region of +the brain, one can start with supervoxels in a region, find level 2 +nodes that correspond to them, filter out components based on size, (or +other criteria such as whether they are part of objects that have +components in some other brain area) and look at the distribution of PCA +components to understand the directions that those processes are moving +within that region of space. diff --git a/docs/tutorials/materialization.md b/docs/tutorials/materialization.md new file mode 100644 index 00000000..380aa811 --- /dev/null +++ b/docs/tutorials/materialization.md @@ -0,0 +1,386 @@ +# Materialization + +The Materialization client allows one to interact with the materialized +annotation tables, that were posted to the annotation service +([the annotations tutorial](./annotation.md)). + +To see the entire class visit the [API doc]({{ client_api_paths.materialize }}). + +The service regularly looks up all annotations and the segids underneath +all the boundspatialpoints. You can then query these tables to find out +the IDs that underlie the annotations, or the annotations that now +intersect with certain IDs. + +For example, one common pattern is that you have identified a cell based +on the location of its cell body, and you have an annotation there. + +You want to know what are the inputs onto the cell, so you first query +the annotation table with your soma annotation, asking for the current +ID underneath that soma. Then you query a synapse table for all synapse +annotations that have a post-synaptic ID equal to the ID from your soma +annotation. + +In this way your code stays the same, as the proofreading changes and +you can track the connectivity of your cell over time. + +## Initializing the client + +By default when you initialize the overall client, it will choose the +most recent materialization version available. This may or may not be +desirable depending on your use case. If your code involves using +specific IDs then you should be using a specific version that is tied to +a timepoint where those IDs are valid. + +To see what versions are available, use the +[client.materialize.get_versions()]({{ client_api_paths.materialize }}.get_versions) function. + +```python +client.materialize.get_versions() +``` + +Each version has a timestamp it was run on as well as a date when it +will expire. You can query all this metadata for a specific version +using +[client.materialize.get_version_metadata()]({{ client_api_paths.materialize }}.get_version_metadata) or +all versions using +[client.materialize.get_versions_metadata()]({{ client_api_paths.materialize }}.get_versions_metadata). + +To change the default version, alter the .version property of the +materialization client. + +```python +client.materialize.version = 9 +``` + +or specify the version when making a particular call. + +## Browsing versions + +To see what tables are available in a version you can use +[client.materialize.get_tables()]({{ client_api_paths.materialize }}.get_tables). + +If you want to read about the description of what that table is, use the +annotationengine client +[client.materialize.get_table_metadata()]({{ client_api_paths.materialize }}.get_table_metadata). + +If you want to read more about the schema for the annotation table use +the schema service +[caveclient.emannotationschemas.SchemaClientLegacy.schema_definition()][caveclient.emannotationschemas.SchemaClientLegacy.schema_definition]. + +Note, the materialization service has a human readable webpage that +links to the other services that might be more convenient for you to +browse, to get a link there in ipython display +`client.materialize.homepage` + +for some important tables, the info service has a pointer to which table +you should use in the metadata for the datastack. +`client.info.get_datastack_info()['synapse_table']` and +`client.info.get_datastack_info()['soma_table']`. + +To see how many annotations are in a particular table use + +```python +nannotations=client.materialize.get_annotation_count('my_table') +``` + +## Querying tables + +To query a small table, you can just download the whole thing using +[client.materialize.query_table()]({{ client_api_paths.materialize }}.query_table) which will return a +dataframe of the table. + +Note however, some tables, such as the synapse table might be very large +200-300 million rows and the service will only return the first 200,000 +results, and not in a deterministic manner. **NOTE! This API is not +designed to enable enmass downloading of the entire synapse table there +are more efficient ways of doing this. Contact your dataset administrator +for more information if this is what you are looking to do.** + +To just get a preview, use the limit argument (but note again that this +won't be a reproducible set) + +```python +df=client.materialize.query_table('my_table', limit=10) +``` + +For most applications, you will want to filter the query in some way. + +We offer three kinds of filters you can apply: `filter_equal_dict`, `filter_in_dict` +and `filter_out_dict`. For query_table each is specified as a dictionary +where the keys are column names, and the values are a list of values (or +single value in the case of filter_equal). + +So for example to query a synapse table for all synapses onto a neuron +in flywire you would use + +```python +synapse_table = client.info.get_datastack_info()['synapse_table'] +df=client.materialize.query_table(synapse_table, + filter_equal_dict = {'post_pt_root_id': MYID}) +``` + +The speed of querying is affected by a number of factors, including the +size of the data. To improve the performance of results, you can reduce +the number of columns returned using `select_columns`. + +So for example, if you are only interested in the root_ids and locations +of pre_synaptic terminals you might limit the query with select_columns. +Also, it is convenient to return the with positions as a column of +`np.array([x,y,z])` coordinates for many purposes. However, sometimes +you might prefer to have them split out as separate x, y, z +columns. To enable this option use `split_columns=True`. +split_columns=True is faster, as combining them is an extra step. You +can recombine split-out position columns using +[caveclient.materializationengine.concatenate_position_columns()][caveclient.materializationengine.concatenate_position_columns] + +```python +synapse_table = client.info.get_datastack_info()['synapse_table'] +df=client.materialize.query_table(synapse_table, + filter_equal_dict = {'post_pt_root_id': MYID}, + select_columns=['id','pre_pt_root_id', 'pre_pt_position'], + split_columns=True) +``` + +## Desired Resolution + +Often you want to have position information in different units. For +example, to consider synapse locations or soma locations, you might want +to have positions in nanometers or microns. + +To create neuroglancer views, you might want positions in integer voxels +of a size that aligns with the resolution you are used to using +Neuroglancer at. + +Annotation tables can be created and uploaded in varying resolutions +according to whatever the user of the table felt was natural. This +information is available in the metadata for that table. In addition, +you may pass _desired_resolution_ as a keyword argument which will +automatically convert all spatial positions into voxels of that size in +nanometers. + +So if you want positions in nanometers, you would pass +`desired_resolution=[1,1,1]`. If you want positions in microns you would +pass `desired_resolution=[1000,1000,1000]`. If you want positions in +4,4,40nm voxel coordinates to use with cloud-volume or neuroglancer you +would pass `desired_resolution=[4,4,40]`. + +## Spatial Filters + +You can also filter columns that are associated with spatial locations +based upon being within a 3d bounding box. + +This is done by adding a filter_spatial_dict argument to query_table. +The units of the bounding box should be in the units of the +voxel_resolution of the table (which can be obtained from +[client.materialize.get_table_metadata()]({{ client_api_paths.materialize }}.get_table_metadata)). + +```python +bounding_box = [[min_x, min_y, min_z], [max_x, max_y, max_z]] +synapse_table = client.info.get_datastack_info('synapse_table') +df=client.materialize.query_table(synapse_table, + filter_equal_dict = {'post_pt_root_id': MYID}, + filter_spatial_dict = {'post_pt_position': bounding_box}) +``` + +## Synapse Query + +For synapses in particular, we have a simplified method for querying +them with a reduced syntax. +[client.materialize.synapse_query()]({{ client_api_paths.materialize }}.synapse_query) lets you specify pre +and post synaptic partners as keyword arguments and bounding boxes. The defaults make reasonable assumptions +about what you want to query, namely that the synapse_table is the table +that the info service advertises, and that if you specify a bounding +box, that you want the post_pt_position. These can be overridden of +course, but the above bounding box query is simplified to. + +**NOTE! This API is not designed to enable enmass downloading of the +entire synapse table there are more efficient ways of doing this. Contact +your dataset administrator for more information if this is what you are +looking to do.** + +```python +bounding_box = [[min_x, min_y, min_z], [max_x, max_y, max_z]] +df=client.materialize.query_table(post_ids = MYID, + bounding_box=bounding_box) +``` + +## Live Query + +In order to query the materialized tables above you can only use IDs +that were present at the timestamp of the materialization. If you query +the tables with an ID that is not valid during the time of the +materialization you will get empty results. + +To check if root_ids are valid at your materialization's timestamp, you +can use +[client.chunkedgraph.is_latest_roots()]({{ client_api_paths.chunkedgraph }}.is_latest_roots) + +```python +import numpy as np +mat_time = client.materialize.get_timestamp() +is_latest = client.chunkedgraph.is_latest_roots([MYID], timestamp=mat_time) +assert(np.all(is_latest)) +``` + +If you need to lookup what happened to that ID, you can use the +chunkedgraph lineage tree, to look into the future or the past, +depending on your application you can use +[client.chunkedgraph.get_lineage_graph()]({{ client_api_paths.chunkedgraph }}.get_lineage_graph). + +Again, the ideal situation is that you have an annotation in the +database which refers to your objects of interest, and querying that +table by the id column will return the object in the most recent +materialization. + +However, sometimes you might be browsing and proofreadding the data and +get an ID that is more recent that the most recent version available. +For convenience, you can use +[client.materialize.live_query()]({{ client_api_paths.chunkedgraph }}.live_query). + +to automatically update the results of your query to a time in the +future, such as now. For example, to pass now, use +`datetime.datetime.now(datetime.timezone.utc)`. Note all +timestamps are in UTC throughout the codebase. + +```python +import datetime +synapse_table = client.info.get_datastack_info()['synapse_table'] +df=client.materialize.live_query(synapse_table, + datetime.datetime.now(datetime.timezone.utc), + filter_equal_dict = {'post_pt_root_id': MYID}) +``` + +This will raise an ValueError exception if the IDs passed in your +filters are not valid at the timestamp given + +You can also pass a timestamp directly to query_table and it will call +live_query automatically. + +```python +import datetime +synapse_table = client.info.get_datastack_info()['synapse_table'] +df=client.materialize.query_table(synapse_table, + timestamp=datetime.datetime.now(datetime.timezone.utc), + filter_equal_dict = {'post_pt_root_id': MYID}) +``` + +Also, keep in mind if you run multiple queries and at each time pass +`datetime.datetime.now(datetime.timezone.utc)`, there is no guarantee +that the IDs will be consistent from query to query, as proofreading +might be happening at any time. For larger scale analysis constraining +oneself to a materialized version will ensure consistent results. + +Versions have varying expiration times in order to support the tradeoff +between recency and consistency, so before undertaking an analysis +project consider what version you want to query and what your plan will +be to update your analysis to future versions. + +## Content-aware Interface (Experimental) + +As of version 5.8.0, we have introduced a new interface to query tables +and views. This interface might have small but breaking changes in the +near future. +::: + +In order to make the querying interface more consistent across tables, +we have introduced an additional alternative interface to filtering and +querying data via the `client.materialize.tables` object. When you +instantiate this object, this object finds all of the existing tables +and the list of their columns and lets you filter the tables as +arguments in the function with suggestions. Moreover, the filtering +arguments and the querying arguments are separated into two. + +Let's see how this works with a simplest example --- downloading a +table called `nucleus_detection_v0`. First, we reference the table as a +function and then we run the query --- this is exactly the same as +`client.materialize.query_table('nucleus_detection_v0')`. + +```python +client = CAVEclient('minnie65_public') +nuc_df = client.materialize.tables.nucleus_detection_v0().query() +``` + +Where things differ is when we add filters. If we want to query based on +a set of values for the field "id", for example, we add that as an +argument: + +```python +my_ids = [373879, 111162] +nuc_df = client.materialize.tables.nucleus_detection_v0(id=my_ids).query() +``` + +Where in this example the `id=` queries the column `id` based on the +schema. These values can be either individual elements (i.e. an integer +or a string) or a list/array of elements, and any field can be used. The +tooling will automatically sort out how to format the filtering +appropriately when running the query. Importantly, the filtering is +identical between querying all types of tables and queries. To see the +complete list of fields that can be queried, you can tab-autocomplete or +in Jupyter or IPython glance at the docstring with +`client.materialize.tables.nucleus_detection_v0?`. + +If you need to specify the table programmatically, you can also use a +dictionary-style approach to getting the table filtering function. For +example, an equivalent version of the above line would be: + +```python +my_ids = [373879, 111162] +my_table = 'nucleus_detection_v0' +nuc_df = client.materialize.tables[my_table](id=my_ids).query() +``` + +The `query` function can also take arguments relating to timestamps or +formatting where they act just like in the other query method. In +particular, the arguments that apply to `query` are: `select_columns`, +`offset`, `limit`, `split_positions`, `materialization_version`, +`timestamp`, `metadata`, `desired_resolution`, and `get_counts`. For +example, to add a desired resolution and split positions in the above +query, it would look like: + +```python +my_ids = [373879, 111162] +nuc_df = client.materialize.tables.nucleus_detection_v0( + id=my_ids +).query( + split_positions=True, + desired_resolution=[1,1,1], +) +``` + +If you want to do a live query instead of a materialized query, the +filtering remains identical but we use the `live_query` function +instead. The one required argument for `live_query` is the timestamp. + +```python +my_ids = [373879, 111162] +nuc_df = client.materialize.tables.nucleus_detection_v0( + id=my_ids +).live_query( + timestamp=datetime.datetime.now(datetime.timezone.utc), +) +``` + +The live query functions have similar but slightly different arguments: +`timestamp` (required), `offset`, `limit`, `split_positions`, +`metadata`, `desired_resolution`, and `allow_missing_lookups`. + +Note that way that IPython handles docstrings means that while you can +use `?` to get the docstring of the filtering part of the function, you +can't simply do something like +`client.materialize.tables.nucleus_detection_v0().query?`. It will tell +you the function can't be found, because technically the `query` +function does not yet exist until the table filtering function is +called. + +Instead, if you want to glimpse the docstring of the query or live_query +functions, you need to split it into two lines: + +```python +qry_func = client.materialize.tables.nucleus_detection_v0().query +qry_func? +``` + +Finally, if the project you are working with has views, a similar +interface is available to them via `client.materialize.views`. Currently +views are not compatible with live query, and so only the `.query` +function is available. diff --git a/docs/guide/schemas.rst b/docs/tutorials/schemas.md similarity index 52% rename from docs/guide/schemas.rst rename to docs/tutorials/schemas.md index a0b24582..65a703bb 100644 --- a/docs/guide/schemas.rst +++ b/docs/tutorials/schemas.md @@ -1,37 +1,34 @@ -EMAnnotationSchemas -=================== +# EMAnnotationSchemas The EMAnnotationSchemas client lets one look up the available schemas and how they are defined. This is mostly used for programmatic interactions between services, but can be useful when looking up schema definitions for new tables. -Get the list of schema -^^^^^^^^^^^^^^^^^^^^^^ +## Get the list of schema -One can get the list of all available schema with the ``schema`` method. +One can get the list of all available schema with the `schema` method. Currently, new schema have to be generated on the server side, although we aim to have a generic set available to use. -.. code:: python +``` python +client.schema.schema() +``` - client.schema.schema() +## View a specific schema -View a specific schema -^^^^^^^^^^^^^^^^^^^^^^ - -The details of each schema can be viewed with the ``schema_definition`` +The details of each schema can be viewed with the `schema_definition` method, formatted as per JSONSchema. -.. code:: python - - example_schema = client.schema.schema_definition('microns_func_coreg') - example_schema +``` python +example_schema = client.schema.schema_definition('microns_func_coreg') +example_schema +``` This is mostly useful for programmatic interaction between services at the moment, but can also be used to inspect the expected form of an annotation by digging into the format. -.. code:: python - - example_schema['definitions']['FunctionalCoregistration'] +``` python +example_schema['definitions']['FunctionalCoregistration'] +``` diff --git a/docs/tutorials/state.md b/docs/tutorials/state.md new file mode 100644 index 00000000..c329cb1d --- /dev/null +++ b/docs/tutorials/state.md @@ -0,0 +1,56 @@ +# JSON Neuroglancer State Service + +We store the JSON description of a Neuroglancer state in a simple +database at the JSON Service. This is a convenient way to build states +to distribute to people, or pull states to parse work by individuals. +The JSON Client is at `client.state` + +``` python +client.state +``` + +## Retrieving a state + +JSON states are found simply by their ID, which you get when uploading a +state. You can download a state with `get_state_json`. + +``` python +example_id = 4845531975188480 +example_state = client.state.get_state_json(test_id) +example_state['layers'][0] +``` + +## Uploading a state + +You can also upload states with `upload_state_json`. If you do this, the +state id is returned by the function. Note that there is no easy way to +query what you uploaded later, so be VERY CAREFUL with this state id if +you wish to see it again. + +*Note: If you are working with a Neuroglancer Viewer object or similar, +in order to upload, use viewer.state.to_json() to generate this +representation.* + +``` python +example_state['layers'][0]['name'] = 'example_name' +new_id = client.state.upload_state_json(example_state) +``` + +``` python +test_state = client.state.get_state_json(new_id) +test_state['layers'][0]['name'] +``` + +## Generating a Neuroglancer URL + +Once you have a state ID, you want to turn it into a well-formatted +link. So you don\'t have to remember all the endpoints, we can do this +from the state client. + +``` python +ngl_base = 'neuromancer-seung-import.appspot.com' +client.state.build_neuroglancer_url(new_id, ngl_base) +``` + +Note that the neuroglancer base can be found in the info service under +`client.info.viewer_site()`. diff --git a/mkdocs.yml b/mkdocs.yml new file mode 100644 index 00000000..de424df2 --- /dev/null +++ b/mkdocs.yml @@ -0,0 +1,171 @@ +site_name: CAVEclient +site_url: https://bdpedigo.github.io/CAVEclient/ +repo_url: https://github.com/bdpedigo/CAVEclient/ +repo_name: bdpedigo/CAVEclient +edit_uri: edit/master/docs/ +nav: + - Home: index.md + - Installation: installation.md + - Tutorial: + - tutorials/index.md + - tutorials/framework.md + - tutorials/authentication.md + - tutorials/annotation.md + - tutorials/chunkedgraph.md + - tutorials/l2cache.md + - tutorials/info.md + - tutorials/schemas.md + - tutorials/state.md + - tutorials/materialization.md + - Client API: + - client_api/index.md + - client_api/annotation.md + - client_api/auth.md + - client_api/chunkedgraph.md + - client_api/l2cache.md + - client_api/materialize.md + - client_api/state.md + - Extended API: + - extended_api/index.md + - extended_api/annotationengine.md + - extended_api/auth.md + - extended_api/base.md + - extended_api/chunkedgraph.md + - extended_api/datastack_lookup.md + - extended_api/emannotationschemas.md + - extended_api/framework.md + - extended_api/infoservice.md + - extended_api/jsonservice.md + - extended_api/l2cache.md + - extended_api/materialization.md + - extended_api/session_config.md + - Glossary: glossary.md + - Contributing: contributing.md + - Changelog: changelog.md +theme: + name: material + language: en + logo: images/logo-inverted.png + favicon: images/logo-inverted.png + palette: + scheme: preference + primary: black + accent: indigo + icon: + repo: fontawesome/brands/github + features: + - navigation.indexes + - navigation.instant + # - navigation.path + - navigation.prune + # - navigation.sections # toggle to have sections in side nav + # - navigation.tabs # toggle to mainly use top nav + - navigation.tabs.sticky + - content.action.edit + - content.action.view + - toc.follow + # - toc.integrate # whether to include the toc in the main nav bar to the left + - navigation.top + - search.suggest + - search.highlight + - search.share +markdown_extensions: + - pymdownx.critic + - pymdownx.caret + - pymdownx.mark + - pymdownx.tilde + - pymdownx.tabbed + - attr_list + - pymdownx.arithmatex: + generic: true + - pymdownx.highlight: + linenums: false + - pymdownx.superfences + - pymdownx.inlinehilite + - pymdownx.details + - admonition + - toc: + baselevel: 2 + permalink: true + - meta +plugins: + - include-markdown + - search: + lang: en + - mkdocstrings: + default_handler: python + handlers: + python: + import: + - https://docs.python.org/3/objects.inv + - https://pandas.pydata.org/pandas-docs/stable/objects.inv + - https://networkx.org/documentation/stable/objects.inv + - https://docs.scipy.org/doc/scipy/objects.inv + options: + show_source: false + docstring_style: numpy + docstring_section_style: table # list, table, spacy + docstring_options: + ignore_init_summary: false + merge_init_into_class: true + allow_section_blank_line: false + show_root_heading: false + show_root_toc_entry: false + show_object_full_path: false + show_symbol_type_heading: false # whether to show "meth/func/etc" in the page + show_symbol_type_toc: false # whether to show "meth/func/etc" in the toc + signature_crossrefs: true # https://mkdocstrings.github.io/python/usage/configuration/signatures/#signature_crossrefs + members_order: alphabetical # can be source + group_by_category: true # order is attributes, classes, functions, modules + summary: true + show_if_no_docstring: false + show_docstring_attributes: false + annotations_path: brief # https://mkdocstrings.github.io/python/usage/configuration/signatures/#annotations_path + show_signature: true + separate_signature: false + show_signature_annotations: false + unwrap_annotated: false # https://mkdocstrings.github.io/python/usage/configuration/signatures/#unwrap_annotated + heading_level: 2 + inherited_members: true + - macros + - autorefs + - section-index + # - git-authors: # this one is more email-based + # show_contribution: true + # authorship_threshold_percent: 5 + # - git-committers: + # repository: bdpedigo/CAVEclient + # branch: main + # - git-revision-date-localized: + # enable_creation_date: true + # - social: + # cards_layout: default + +extra: + social: + - icon: fontawesome/brands/github + link: https://github.com/bdpedigo/CAVEclient + name: Github + - icon: material/email + link: "mailto:ben.pedigo@alleninstitute.org" + version: + provider: mike + names: + repo_title: CAVEclient + package: caveclient + main_branch: master + client_api_paths: + annotation: ../client_api/annotation.md#caveclient.annotationengine.AnnotationClientV2 + auth: ../client_api/auth.md#caveclient.auth.AuthClient + chunkedgraph: ../client_api/chunkedgraph.md#caveclient.chunkedgraph.ChunkedGraphClientV1 + materialize: ../client_api/materialize.md#caveclient.materializationengine.MaterializatonClientV3 + l2cache: ../client_api/l2cache.md#caveclient.l2cache.L2CacheClientLegacy + state: ../client_api/state.md#caveclient.state.JSONServiceV1 + + + # to enable disqus, uncomment the following and put your disqus id below + # disqus: disqus_id +# uncomment the following and put your google tracking id below to enable GA +#google_analytics: + #- UA-xxx + #- auto diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..882e07d3 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,58 @@ +[tool] +[tool.poetry] +authors = ["Ben Pedigo "] +classifiers = [ + 'Development Status :: 2 - Pre-Alpha', + 'Intended Audience :: Developers', + 'License :: OSI Approved :: MIT License', + 'Natural Language :: English', + 'Programming Language :: Python :: 3.11', + 'Programming Language :: Python :: 3.12', +] +description = "Lightweight representations of networks using Pandas DataFrames." +homepage = "https://github.com/bdpedigo/networkframe" +license = "MIT" +name = "networkframe" +packages = [ + { include = "networkframe" }, + { include = "tests", format = "sdist" }, +] +readme = "README.md" +version = "0.4.2" + +[tool.poetry.dependencies] +beartype = "^0.16.4" +networkx = "^3.2.1" +pandas = "^2.1.4" +python = ">=3.11.0,<4.0" +scipy = "^1.11.4" + +[tool.poetry.group.dev.dependencies] +commitizen = "^3.13.0" +mike = "^2.0.0" +mkdocs = "^1.5.3" +mkdocs-autorefs = "^0.5.0" +mkdocs-git-authors-plugin = "^0.7.2" +mkdocs-git-committers-plugin-2 = "^2.2.2" +mkdocs-git-revision-date-localized-plugin = "^1.2.2" +mkdocs-include-markdown-plugin = "^6.0.4" +mkdocs-jupyter = "^0.24.6" +mkdocs-material = "^9.5.2" +mkdocs-material-extensions = "^1.3.1" +mkdocs-section-index = "^0.3.8" +mkdocstrings = { extras = ["crystal", "python"], version = "^0.24.0" } +mknotebooks = "^0.8.0" +mypy = "^1.7.1" +pymdown-extensions = "^10.5" +pytest = "^7.4.3" +ruff = "^0.1.7" +tox = "^4.11.4" +tox-current-env = "^0.0.11" +twine = "^4.0.2" + +[build-system] +build-backend = "poetry.masonry.api" +requires = ["poetry"] + +[tool.ruff] +ignore-init-module-imports = true diff --git a/setup.py b/setup.py index f232151e..f7ac297b 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,9 @@ -from setuptools import setup, find_packages -import re -import os import codecs +import os +import re +from pathlib import Path + +from setuptools import find_packages, setup here = os.path.abspath(os.path.dirname(__file__)) @@ -22,10 +24,16 @@ def find_version(*file_paths): with open("requirements.txt", "r") as f: required = f.read().splitlines() +# read the contents of README file +this_directory = Path(__file__).parent +long_description = (this_directory / "README.rst").read_text() + setup( version=find_version("caveclient", "__init__.py"), name="caveclient", description="a service for interacting with the Connectome Annotation Versioning Engine", + long_description=long_description, + long_description_content_type="text/x-rst", author="Forrest Collman, Casey Schneider-Mizell, Sven Dorkenwald", author_email="forrestc@alleninstute.org,caseys@alleninstitute.org,svenmd@princeton.edu,", url="https://github.com/seung-lab/CAVEclient", @@ -33,4 +41,5 @@ def find_version(*file_paths): include_package_data=True, install_requires=required, setup_requires=["pytest-runner"], + python_requires=">=3.7,<3.12", ) diff --git a/synapse_example.py b/synapse_example.py index 93f1ab34..dc62ad07 100644 --- a/synapse_example.py +++ b/synapse_example.py @@ -1,8 +1,9 @@ -import pandas as pd import os -import numpy as np import time +import numpy as np +import pandas as pd + from caveclient import annotationengine as ae HOME = os.path.expanduser("~") @@ -44,7 +45,6 @@ def insert_synapses(syn_df, datastack_name="pinky100", annotation_type="synapse" if __name__ == "__main__": - print("LOADING synapses") time_start = time.time() diff --git a/test_requirements.txt b/test_requirements.txt index d0e3b209..fefdccd9 100644 --- a/test_requirements.txt +++ b/test_requirements.txt @@ -2,4 +2,4 @@ pytest pytest-cov pytest-env responses -pytest-mock \ No newline at end of file +pytest-mock diff --git a/tests/test_chunkedgraph.py b/tests/test_chunkedgraph.py index c7e2ca86..d607f998 100644 --- a/tests/test_chunkedgraph.py +++ b/tests/test_chunkedgraph.py @@ -2,6 +2,7 @@ from .conftest import test_info, TEST_LOCAL_SERVER, TEST_DATASTACK import pytest import responses +from responses.matchers import json_params_matcher import pytz import numpy as np from caveclient.endpoints import ( @@ -35,7 +36,6 @@ def package_timestamp(timestamp, name="timestamp"): class TestChunkedgraph: - _default_endpoint_map = { "cg_server_address": TEST_LOCAL_SERVER, "table_id": test_info["segmentation_source"].split("/")[-1], @@ -47,7 +47,7 @@ def test_get_roots(self, myclient): url = chunkedgraph_endpoints_v1["get_roots"].format_map(endpoint_mapping) svids = np.array([97557743795364048, 75089979126506763], dtype=np.uint64) root_ids = np.array([864691135217871271, 864691135566275148], dtype=np.uint64) - now = datetime.datetime.utcnow() + now = datetime.datetime.now(datetime.timezone.utc) query_d = package_timestamp(now) qurl = url + "?" + urlencode(query_d) responses.add( @@ -231,7 +231,7 @@ def test_delta_roots(self, myclient): endpoint_mapping = self._default_endpoint_map url = chunkedgraph_endpoints_v1["delta_roots"].format_map(endpoint_mapping) - now = datetime.datetime.utcnow() + now = datetime.datetime.now(datetime.timezone.utc) timestamp_past = now - datetime.timedelta(days=1) query_d = package_timestamp(timestamp_past, name="timestamp_past") query_d.update(package_timestamp(now, name="timestamp_future")) @@ -382,7 +382,7 @@ def test_get_remeshing(self, myclient): responses.POST, status=200, url=url, - match=[responses.json_params_matcher({"new_lvl2_ids": chunkid_list})], + match=[json_params_matcher({"new_lvl2_ids": chunkid_list})], ) myclient.chunkedgraph.remesh_level2_chunks(chunk_ids) @@ -404,7 +404,7 @@ def test_is_latest_roots(self, myclient): status=200, url=url, json={"is_latest": is_latest_list}, - match=[responses.json_params_matcher({"node_ids": root_id_list})], + match=[json_params_matcher({"node_ids": root_id_list})], ) qis_latest = myclient.chunkedgraph.is_latest_roots(root_ids) @@ -434,7 +434,7 @@ def test_past_ids(self, myclient): "864691136577570580": [864691136721486702, 864691133958789149], }, } - now = datetime.datetime.utcnow() + now = datetime.datetime.now(datetime.timezone.utc) timestamp_past = now - datetime.timedelta(days=7) query_d = package_timestamp(timestamp_past, name="timestamp_past") @@ -446,7 +446,7 @@ def test_past_ids(self, myclient): status=200, url=qurl, json=id_map_str, - match=[responses.json_params_matcher({"root_ids": root_id_list})], + match=[json_params_matcher({"root_ids": root_id_list})], ) qid_map = myclient.chunkedgraph.get_past_ids( @@ -470,7 +470,7 @@ def test_lineage_graph(self, myclient): url = chunkedgraph_endpoints_v1["handle_lineage_graph"].format_map( endpoint_mapping ) - now = datetime.datetime.utcnow() + now = datetime.datetime.now(datetime.timezone.utc) timestamp_past = now - datetime.timedelta(days=7) query_d = package_timestamp(timestamp_past, name="timestamp_past") @@ -621,7 +621,7 @@ def test_preview_split(self, myclient): url=url, body=json.dumps(response_data), match=[ - responses.json_params_matcher( + json_params_matcher( {"sources": qdata_svid["sources"], "sinks": qdata_svid["sinks"]} ) ], @@ -798,3 +798,35 @@ def test_get_info(self, myclient): base_resolution = myclient.chunkedgraph.base_resolution assert np.all(base_resolution == [8, 8, 40]) + + @responses.activate + def test_is_valid_nodes(self, myclient): + + endpoint_mapping = self._default_endpoint_map + url = chunkedgraph_endpoints_v1["valid_nodes"].format_map(endpoint_mapping) + query_nodes = [91070075234304972, 91070075234296549] + data = {"node_ids": query_nodes} + return_data = {"valid_roots": query_nodes} + responses.add( + responses.GET, + status=200, + url=url, + json=return_data, + match=[json_params_matcher(data)], + ) + + out = myclient.chunkedgraph.is_valid_nodes(query_nodes) + assert np.all(out) + + query_nodes = [0, -1] + data = {"node_ids": [0, 18446744073709551615]} + return_data = {"valid_roots": []} + responses.add( + responses.GET, + status=200, + url=url, + json=return_data, + match=[json_params_matcher(data)], + ) + out = myclient.chunkedgraph.is_valid_nodes(query_nodes) + assert not np.any(out) diff --git a/tests/test_materialization.py b/tests/test_materialization.py index 2186b481..9ea76e06 100644 --- a/tests/test_materialization.py +++ b/tests/test_materialization.py @@ -6,6 +6,7 @@ ) import pandas as pd import responses +from responses.matchers import json_params_matcher import pyarrow as pa from urllib.parse import urlencode from .conftest import test_info, TEST_LOCAL_SERVER, TEST_DATASTACK @@ -26,7 +27,7 @@ def match(request_body): return match -class TestChunkedgraphException(Exception): +class ChunkedgraphTestException(Exception): """Error to raise is bad values make it to chunkedgraph""" @@ -140,7 +141,7 @@ def test_matclient(self, myclient, mocker): url=url, body=serialize_dataframe(df), content_type="data.arrow", - match=[responses.json_params_matcher(correct_query_data)], + match=[json_params_matcher(correct_query_data)], ) responses.add( @@ -151,11 +152,7 @@ def test_matclient(self, myclient, mocker): headers={ "dataframe_resolution": "1, 1, 1", }, - match=[ - responses.json_params_matcher( - correct_query_data_with_desired_resolution - ) - ], + match=[json_params_matcher(correct_query_data_with_desired_resolution)], ) meta_url = self.endpoints["metadata"].format_map(endpoint_mapping) @@ -221,7 +218,7 @@ def test_matclient(self, myclient, mocker): ### live query test def my_get_roots(self, supervoxel_ids, timestamp=None, stop_layer=None): if 0 in supervoxel_ids: - raise TestChunkedgraphException( + raise ChunkedgraphTestException( ("should not call get roots on svid =0") ) if timestamp == good_time: @@ -269,7 +266,7 @@ def mocked_get_past_ids( self, root_ids, timestamp_past=None, timestamp_future=None ): if 0 in root_ids: - raise TestChunkedgraphException(("should not past_ids on svid =0")) + raise ChunkedgraphTestException(("should not past_ids on svid =0")) id_map = {201: [100], 103: [103], 203: [101, 102]} return { "future_id_map": {}, @@ -278,7 +275,7 @@ def mocked_get_past_ids( def mock_is_latest_roots(self, root_ids, timestamp=None): if 0 in root_ids: - raise TestChunkedgraphException( + raise ChunkedgraphTestException( ("should not call is_latest on svid =0") ) if timestamp == good_time: @@ -352,7 +349,7 @@ def mock_get_root_timestamps(self, root_ids): url=url, body=serialize_dataframe(df), content_type="data.arrow", - match=[responses.json_params_matcher(correct_query_data)], + match=[json_params_matcher(correct_query_data)], ) correct_query_data = { "filter_in_dict": { @@ -364,7 +361,7 @@ def mock_get_root_timestamps(self, root_ids): url=url, content_type="data.arrow", body=serialize_dataframe(df), - match=[responses.json_params_matcher(correct_query_data)], + match=[json_params_matcher(correct_query_data)], ) correct_query_data = { "filter_in_dict": { @@ -376,7 +373,7 @@ def mock_get_root_timestamps(self, root_ids): url=url, body=serialize_dataframe(df), content_type="data.arrow", - match=[responses.json_params_matcher(correct_query_data)], + match=[json_params_matcher(correct_query_data)], ) dfq = myclient.materialize.live_query( @@ -429,7 +426,7 @@ def mock_get_root_timestamps(self, root_ids): url=url, body=serialize_dataframe(df_ct), content_type="data.arrow", - match=[responses.json_params_matcher(correct_query_data)], + match=[json_params_matcher(correct_query_data)], ) dfq = myclient.materialize.live_query( "cell_types", good_time, split_positions=True @@ -444,7 +441,7 @@ def mock_get_root_timestamps(self, root_ids): url=url, body=serialize_dataframe(df_ct), content_type="data.arrow", - match=[responses.json_params_matcher(correct_query_data)], + match=[json_params_matcher(correct_query_data)], ) dfq = myclient.materialize.live_query( "cell_types",