From 777575dba19c14c018bbf2ba042507501962e810 Mon Sep 17 00:00:00 2001 From: Akhilesh Halageri Date: Sat, 17 Sep 2022 11:08:40 -0500 Subject: [PATCH 1/4] wip: l2cache instructions --- 1.chunkedgraph.md | 4 +- 3.l2cache.md | 15 ++++++ helm/l2cache/example_values.yaml | 92 ++++++++++++++++++++++++++++++++ 3 files changed, 109 insertions(+), 2 deletions(-) create mode 100644 helm/l2cache/example_values.yaml diff --git a/1.chunkedgraph.md b/1.chunkedgraph.md index 625a94e..6746682 100644 --- a/1.chunkedgraph.md +++ b/1.chunkedgraph.md @@ -29,7 +29,7 @@ Provided scripts create a VPC network, subnet, redis instance, cluster with separately managed pools to run master and workers. Customize variables in the file `terraform/terraform.tfvars` to create infrastructure in your Google Cloud project. Create a service account with at least the following roles: -* Service Account User +* Service Account Admin * Cloud Memorystore Redis Admin * Kubernetes Engine Cluster Admin * Compute Network Admin @@ -41,7 +41,7 @@ Run the `terraform apply` command to create resources. ```shell $ cd terraform/ -$ terraform init // only needed first time +$ terraform init $ terraform apply ``` This will output some variables useful for next steps: diff --git a/3.l2cache.md b/3.l2cache.md index 8b13789..2fe4420 100644 --- a/3.l2cache.md +++ b/3.l2cache.md @@ -1 +1,16 @@ +# L2Cache +The L2Cache is a storage for parameters of individual L2 IDs in the PyChunkedGraph. L2 IDs represent the connected component of the supervoxel graph within a chunk. The most anticipated use of the L2 Cache is to query the information stored for all L2 IDs making up a single neuron. For instance, the volume of a neuron can be computed from the volume of all its L2 IDs. + +The main reason to store information for the L2 level is to make computation and retrieval of neuron-level information fast and easy following an edit. Edits typically only affect a few chunks among hundreds or thousands spanned by a neuron. Hence, information only needs to be recomputed for a few chunks instead of all of them. + + +```shell +export PROJECT_ID=$(gcloud config get-value project) +git clone https://github.com/seung-lab/PCGL2Cache.git -b pcgv2-ingest +cd PCGL2Cache/ +gcloud builds submit --config=cloudbuild.v2.ingest.yaml . + +ingest v2 l2cache_aibs_v1dd aibs_v1dd \ +graphene://https://api.em.brain.allentech.org/segmentation/table/aibs_v1dd "2022-09-16 12:00:00" --create +``` diff --git a/helm/l2cache/example_values.yaml b/helm/l2cache/example_values.yaml new file mode 100644 index 0000000..8a3a200 --- /dev/null +++ b/helm/l2cache/example_values.yaml @@ -0,0 +1,92 @@ +env: +- name: &commonEnvVars "pychunkedgraph" + vars: + REDIS_HOST: "" # refer to output of terraform apply + REDIS_PORT: 6379 + REDIS_PASSWORD: "" + BIGTABLE_PROJECT: &bt_project "" + BIGTABLE_INSTANCE: &bt_instance "" + GOOGLE_APPLICATION_CREDENTIALS: /root/.cloudvolume/secrets/google-secret.json + SHELL: /bin/bash + FLASK_APP: run_dev.py + APP_SETTINGS: pychunkedgraph.app.config.DeploymentWithRedisConfig + + +configfiles: +- name: &bashrc "bashrc" + files: + ".bashrc": |- + alias watch='watch ' + alias ingest='flask ingest' + alias rqx='flask rq' + +configyamls: [] + +secrets: +- name: &cloudVolumeSecrets cloud-volume-secrets + files: + # these are used by python bigtable client and cloud-files + # must have the following permissions: + # * read gcs objects if edges/component files are stored in google cloud buckets + # if they're stored elsewhere use the secrets with appropriate permissions accordingly + # * bigtable - create and read tables + google-secret.json: |- + { + + } + cave-secret.json: |- + { + "token": "" + } + +deployments: + - enabled: true + name: &name master + nodeSelector: + cloud.google.com/gke-nodepool: master + hpa: + enabled: false + volumes: &commonVolumes + - name: *cloudVolumeSecrets + secret: + secretName: *cloudVolumeSecrets + - name: &bashrcVolume bashrc-volume + configMap: + name: *bashrc + containers: + - name: *name + image: &image + repository: &imageRep + tag: &tag "" + volumeMounts: &commonVolumeMounts + - name: *cloudVolumeSecrets + mountPath: /root/.cloudvolume/secrets + readOnly: true + - name: *bashrcVolume + mountPath: /root/ + env: + - name: *commonEnvVars + resources: + requests: + memory: 500M + + +workerDeployments: + - enabled: true + name: &name l2 + nodeSelector: + cloud.google.com/gke-nodepool: low + hpa: + enabled: true + minReplicas: 10 + volumes: *commonVolumes + containers: + - name: *name + command: [rq, worker, *name] + image: *image + volumeMounts: *commonVolumeMounts + env: + - name: *commonEnvVars + resources: + requests: + memory: 1G \ No newline at end of file From fe78d0f85b6a9bc07b1cb076a07e3784fce85b55 Mon Sep 17 00:00:00 2001 From: Akhilesh Halageri Date: Mon, 19 Sep 2022 11:49:51 -0500 Subject: [PATCH 2/4] resolve conflicts --- 3.l2cache.md | 16 ---------------- l2cache.cmds | 9 +++++++++ 2 files changed, 9 insertions(+), 16 deletions(-) create mode 100644 l2cache.cmds diff --git a/3.l2cache.md b/3.l2cache.md index 2fe4420..e69de29 100644 --- a/3.l2cache.md +++ b/3.l2cache.md @@ -1,16 +0,0 @@ -# L2Cache - -The L2Cache is a storage for parameters of individual L2 IDs in the PyChunkedGraph. L2 IDs represent the connected component of the supervoxel graph within a chunk. The most anticipated use of the L2 Cache is to query the information stored for all L2 IDs making up a single neuron. For instance, the volume of a neuron can be computed from the volume of all its L2 IDs. - -The main reason to store information for the L2 level is to make computation and retrieval of neuron-level information fast and easy following an edit. Edits typically only affect a few chunks among hundreds or thousands spanned by a neuron. Hence, information only needs to be recomputed for a few chunks instead of all of them. - - -```shell -export PROJECT_ID=$(gcloud config get-value project) -git clone https://github.com/seung-lab/PCGL2Cache.git -b pcgv2-ingest -cd PCGL2Cache/ -gcloud builds submit --config=cloudbuild.v2.ingest.yaml . - -ingest v2 l2cache_aibs_v1dd aibs_v1dd \ -graphene://https://api.em.brain.allentech.org/segmentation/table/aibs_v1dd "2022-09-16 12:00:00" --create -``` diff --git a/l2cache.cmds b/l2cache.cmds new file mode 100644 index 0000000..f8ee5d6 --- /dev/null +++ b/l2cache.cmds @@ -0,0 +1,9 @@ +```shell +export PROJECT_ID=$(gcloud config get-value project) +git clone https://github.com/seung-lab/PCGL2Cache.git -b pcgv2-ingest +cd PCGL2Cache/ +gcloud builds submit --config=cloudbuild.v2.ingest.yaml . + +ingest v2 l2cache_aibs_v1dd aibs_v1dd \ +graphene://https://api.em.brain.allentech.org/segmentation/table/aibs_v1dd "2022-09-16 12:00:00" --create +``` From c64056afcfd6241b380439f82d04963e0a241451 Mon Sep 17 00:00:00 2001 From: Akhilesh Halageri Date: Mon, 19 Sep 2022 18:19:55 -0500 Subject: [PATCH 3/4] merge conflict --- 3.l2cache.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/3.l2cache.md b/3.l2cache.md index e69de29..c774c48 100644 --- a/3.l2cache.md +++ b/3.l2cache.md @@ -0,0 +1,6 @@ + +# L2Cache + +The L2Cache is a storage for parameters of individual L2 IDs in the PyChunkedGraph. L2 IDs represent the connected component of the supervoxel graph within a chunk. The most anticipated use of the L2 Cache is to query the information stored for all L2 IDs making up a single neuron. For instance, the volume of a neuron can be computed from the volume of all its L2 IDs. + +The main reason to store information for the L2 level is to make computation and retrieval of neuron-level information fast and easy following an edit. Edits typically only affect a few chunks among hundreds or thousands spanned by a neuron. Hence, information only needs to be recomputed for a few chunks instead of all of them. From 319b4817408601bd45adfeb9b2d3757bba4cf8c6 Mon Sep 17 00:00:00 2001 From: Akhilesh Halageri Date: Wed, 21 Sep 2022 16:31:17 -0500 Subject: [PATCH 4/4] wip: add more docs --- 3.l2cache.md | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/3.l2cache.md b/3.l2cache.md index 927fe89..723aa6e 100644 --- a/3.l2cache.md +++ b/3.l2cache.md @@ -3,4 +3,15 @@ The L2Cache is a storage for parameters of individual L2 IDs in the PyChunkedGraph. L2 IDs represent the connected component of the supervoxel graph within a chunk. The most anticipated use of the L2 Cache is to query the information stored for all L2 IDs making up a single neuron. For instance, the volume of a neuron can be computed from the volume of all its L2 IDs. -The main reason to store information for the L2 level is to make computation and retrieval of neuron-level information fast and easy following an edit. Edits typically only affect a few chunks among hundreds or thousands spanned by a neuron. Hence, information only needs to be recomputed for a few chunks instead of all of them. \ No newline at end of file +The main reason to store information for the L2 level is to make computation and retrieval of neuron-level information fast and easy following an edit. Edits typically only affect a few chunks among hundreds or thousands spanned by a neuron. Hence, information only needs to be recomputed for a few chunks instead of all of them. + +## Infrastructure +The [same infrastructure](1.chunkedgraph.md#infrastructure) used for creating a PyChunkedGraph can be used to create an L2Cache. But simpler because you will need workers for only one layer i.e. L2. + +One other requirement for creating an L2Cache is a PyChunkedGraph server. A graphene protocol path that can be used a create a [CloudVolume](https://github.com/seung-lab/cloud-volume/) instance. The path looks something like this - `graphene://https:///segmentation/table/`. This assumes [CAVEdeployment](https://github.com/seung-lab/CAVEdeployment) has already been setup. + +You will also need a [cave-secret](https://github.com/seung-lab/cloud-volume/#cave-secretjson) for authentication. Refer to `cave-secret.json` in [example_values.yaml](helm/l2cache/example_values.yaml). + +## Ingest + +Once the necessary infrastructure is setup, a `helm` [chart](helm/l2cache/) can be used to create a master and worker nodes. The master is used to create jobs using redis as a queue. \ No newline at end of file