From 777575dba19c14c018bbf2ba042507501962e810 Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhileshhalageri@gmail.com>
Date: Sat, 17 Sep 2022 11:08:40 -0500
Subject: [PATCH 1/4] wip: l2cache instructions

---
 1.chunkedgraph.md                |  4 +-
 3.l2cache.md                     | 15 ++++++
 helm/l2cache/example_values.yaml | 92 ++++++++++++++++++++++++++++++++
 3 files changed, 109 insertions(+), 2 deletions(-)
 create mode 100644 helm/l2cache/example_values.yaml

diff --git a/1.chunkedgraph.md b/1.chunkedgraph.md
index 625a94e..6746682 100644
--- a/1.chunkedgraph.md
+++ b/1.chunkedgraph.md
@@ -29,7 +29,7 @@
 Provided scripts create a VPC network, subnet, redis instance, cluster with separately managed pools to run master and workers. Customize variables in the file `terraform/terraform.tfvars` to create infrastructure in your Google Cloud project.
 
 Create a service account with at least the following roles:
-* Service Account User
+* Service Account Admin
 * Cloud Memorystore Redis Admin
 * Kubernetes Engine Cluster Admin
 * Compute Network Admin
@@ -41,7 +41,7 @@ Run the `terraform apply` command to create resources.
 
 ```shell
 $ cd terraform/
-$ terraform init // only needed first time
+$ terraform init
 $ terraform apply
 ```
 This will output some variables useful for next steps:
diff --git a/3.l2cache.md b/3.l2cache.md
index 8b13789..2fe4420 100644
--- a/3.l2cache.md
+++ b/3.l2cache.md
@@ -1 +1,16 @@
+# L2Cache
 
+The L2Cache is a storage for parameters of individual L2 IDs in the PyChunkedGraph. L2 IDs represent the connected component of the supervoxel graph within a chunk. The most anticipated use of the L2 Cache is to query the information stored for all L2 IDs making up a single neuron. For instance, the volume of a neuron can be computed from the volume of all its L2 IDs.
+
+The main reason to store information for the L2 level is to make computation and retrieval of neuron-level information fast and easy following an edit. Edits typically only affect a few chunks among hundreds or thousands spanned by a neuron. Hence, information only needs to be recomputed for a few chunks instead of all of them.
+
+
+```shell
+export PROJECT_ID=$(gcloud config get-value project)
+git clone https://github.com/seung-lab/PCGL2Cache.git -b pcgv2-ingest
+cd PCGL2Cache/
+gcloud builds submit --config=cloudbuild.v2.ingest.yaml .
+
+ingest v2 l2cache_aibs_v1dd aibs_v1dd \
+graphene://https://api.em.brain.allentech.org/segmentation/table/aibs_v1dd "2022-09-16 12:00:00" --create
+```
diff --git a/helm/l2cache/example_values.yaml b/helm/l2cache/example_values.yaml
new file mode 100644
index 0000000..8a3a200
--- /dev/null
+++ b/helm/l2cache/example_values.yaml
@@ -0,0 +1,92 @@
+env:
+- name: &commonEnvVars "pychunkedgraph"
+  vars:
+    REDIS_HOST: "<redis_host>" # refer to output of terraform apply
+    REDIS_PORT: 6379
+    REDIS_PASSWORD: ""
+    BIGTABLE_PROJECT: &bt_project "<google_project>"
+    BIGTABLE_INSTANCE: &bt_instance "<bigtable_instance>"
+    GOOGLE_APPLICATION_CREDENTIALS: /root/.cloudvolume/secrets/google-secret.json
+    SHELL: /bin/bash
+    FLASK_APP: run_dev.py
+    APP_SETTINGS: pychunkedgraph.app.config.DeploymentWithRedisConfig
+
+
+configfiles:
+- name: &bashrc "bashrc"
+  files:
+    ".bashrc": |-
+      alias watch='watch '
+      alias ingest='flask ingest'
+      alias rqx='flask rq'
+
+configyamls: []
+
+secrets:
+- name: &cloudVolumeSecrets cloud-volume-secrets
+  files:
+    # these are used by python bigtable client and cloud-files
+    # must have the following permissions:
+    # * read gcs objects if edges/component files are stored in google cloud buckets
+    #   if they're stored elsewhere use the secrets with appropriate permissions accordingly
+    # * bigtable - create and read tables
+    google-secret.json: |-
+      {
+        <contents_of_service_accout_secret>
+      }
+    cave-secret.json: |-
+      {
+        "token": "<cave_token>"
+      }
+
+deployments:
+  - enabled: true
+    name: &name master
+    nodeSelector:
+      cloud.google.com/gke-nodepool: master
+    hpa:
+      enabled: false
+    volumes: &commonVolumes
+    - name: *cloudVolumeSecrets
+      secret:
+        secretName: *cloudVolumeSecrets
+    - name: &bashrcVolume bashrc-volume
+      configMap:
+        name: *bashrc
+    containers:
+    - name: *name
+      image: &image
+        repository: &imageRep <image_repo>
+        tag: &tag "<image_tag>"
+      volumeMounts: &commonVolumeMounts
+      - name: *cloudVolumeSecrets
+        mountPath: /root/.cloudvolume/secrets
+        readOnly: true
+      - name: *bashrcVolume
+        mountPath: /root/
+      env:
+      - name: *commonEnvVars
+      resources:
+        requests:
+          memory: 500M
+
+
+workerDeployments:
+  - enabled: true
+    name: &name l2
+    nodeSelector:
+      cloud.google.com/gke-nodepool: low
+    hpa:
+      enabled: true
+      minReplicas: 10
+    volumes: *commonVolumes
+    containers:
+    - name: *name
+      command: [rq, worker, *name]
+      image: *image
+      volumeMounts: *commonVolumeMounts
+      env:
+      - name: *commonEnvVars
+      resources:
+        requests:
+          memory: 1G
\ No newline at end of file

From fe78d0f85b6a9bc07b1cb076a07e3784fce85b55 Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhileshhalageri@gmail.com>
Date: Mon, 19 Sep 2022 11:49:51 -0500
Subject: [PATCH 2/4] resolve conflicts

---
 3.l2cache.md | 16 ----------------
 l2cache.cmds |  9 +++++++++
 2 files changed, 9 insertions(+), 16 deletions(-)
 create mode 100644 l2cache.cmds

diff --git a/3.l2cache.md b/3.l2cache.md
index 2fe4420..e69de29 100644
--- a/3.l2cache.md
+++ b/3.l2cache.md
@@ -1,16 +0,0 @@
-# L2Cache
-
-The L2Cache is a storage for parameters of individual L2 IDs in the PyChunkedGraph. L2 IDs represent the connected component of the supervoxel graph within a chunk. The most anticipated use of the L2 Cache is to query the information stored for all L2 IDs making up a single neuron. For instance, the volume of a neuron can be computed from the volume of all its L2 IDs.
-
-The main reason to store information for the L2 level is to make computation and retrieval of neuron-level information fast and easy following an edit. Edits typically only affect a few chunks among hundreds or thousands spanned by a neuron. Hence, information only needs to be recomputed for a few chunks instead of all of them.
-
-
-```shell
-export PROJECT_ID=$(gcloud config get-value project)
-git clone https://github.com/seung-lab/PCGL2Cache.git -b pcgv2-ingest
-cd PCGL2Cache/
-gcloud builds submit --config=cloudbuild.v2.ingest.yaml .
-
-ingest v2 l2cache_aibs_v1dd aibs_v1dd \
-graphene://https://api.em.brain.allentech.org/segmentation/table/aibs_v1dd "2022-09-16 12:00:00" --create
-```
diff --git a/l2cache.cmds b/l2cache.cmds
new file mode 100644
index 0000000..f8ee5d6
--- /dev/null
+++ b/l2cache.cmds
@@ -0,0 +1,9 @@
+```shell
+export PROJECT_ID=$(gcloud config get-value project)
+git clone https://github.com/seung-lab/PCGL2Cache.git -b pcgv2-ingest
+cd PCGL2Cache/
+gcloud builds submit --config=cloudbuild.v2.ingest.yaml .
+
+ingest v2 l2cache_aibs_v1dd aibs_v1dd \
+graphene://https://api.em.brain.allentech.org/segmentation/table/aibs_v1dd "2022-09-16 12:00:00" --create
+```

From c64056afcfd6241b380439f82d04963e0a241451 Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhileshhalageri@gmail.com>
Date: Mon, 19 Sep 2022 18:19:55 -0500
Subject: [PATCH 3/4] merge conflict

---
 3.l2cache.md | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/3.l2cache.md b/3.l2cache.md
index e69de29..c774c48 100644
--- a/3.l2cache.md
+++ b/3.l2cache.md
@@ -0,0 +1,6 @@
+
+# L2Cache
+
+The L2Cache is a storage for parameters of individual L2 IDs in the PyChunkedGraph. L2 IDs represent the connected component of the supervoxel graph within a chunk. The most anticipated use of the L2 Cache is to query the information stored for all L2 IDs making up a single neuron. For instance, the volume of a neuron can be computed from the volume of all its L2 IDs.
+
+The main reason to store information for the L2 level is to make computation and retrieval of neuron-level information fast and easy following an edit. Edits typically only affect a few chunks among hundreds or thousands spanned by a neuron. Hence, information only needs to be recomputed for a few chunks instead of all of them.

From 319b4817408601bd45adfeb9b2d3757bba4cf8c6 Mon Sep 17 00:00:00 2001
From: Akhilesh Halageri <akhileshhalageri@gmail.com>
Date: Wed, 21 Sep 2022 16:31:17 -0500
Subject: [PATCH 4/4] wip: add more docs

---
 3.l2cache.md | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/3.l2cache.md b/3.l2cache.md
index 927fe89..723aa6e 100644
--- a/3.l2cache.md
+++ b/3.l2cache.md
@@ -3,4 +3,15 @@
 
 The L2Cache is a storage for parameters of individual L2 IDs in the PyChunkedGraph. L2 IDs represent the connected component of the supervoxel graph within a chunk. The most anticipated use of the L2 Cache is to query the information stored for all L2 IDs making up a single neuron. For instance, the volume of a neuron can be computed from the volume of all its L2 IDs.
 
-The main reason to store information for the L2 level is to make computation and retrieval of neuron-level information fast and easy following an edit. Edits typically only affect a few chunks among hundreds or thousands spanned by a neuron. Hence, information only needs to be recomputed for a few chunks instead of all of them.
\ No newline at end of file
+The main reason to store information for the L2 level is to make computation and retrieval of neuron-level information fast and easy following an edit. Edits typically only affect a few chunks among hundreds or thousands spanned by a neuron. Hence, information only needs to be recomputed for a few chunks instead of all of them.
+
+## Infrastructure
+The [same infrastructure](1.chunkedgraph.md#infrastructure) used for creating a PyChunkedGraph can be used to create an L2Cache. But simpler because you will need workers for only one layer i.e. L2.
+
+One other requirement for creating an L2Cache is a PyChunkedGraph server. A graphene protocol path that can be used a create a [CloudVolume](https://github.com/seung-lab/cloud-volume/) instance. The path looks something like this - `graphene://https://<server-host>/segmentation/table/<chunkedgraph>`. This assumes [CAVEdeployment](https://github.com/seung-lab/CAVEdeployment) has already been setup.
+
+You will also need a [cave-secret](https://github.com/seung-lab/cloud-volume/#cave-secretjson) for authentication. Refer to `cave-secret.json` in [example_values.yaml](helm/l2cache/example_values.yaml).
+
+## Ingest
+
+Once the necessary infrastructure is setup, a `helm` [chart](helm/l2cache/) can be used to create a master and worker nodes. The master is used to create jobs using redis as a queue.
\ No newline at end of file