From 21b5e8c90186e05bda840ee1bf51acc34583c153 Mon Sep 17 00:00:00 2001
From: Sean Kelly <kelly@seankelly.biz>
Date: Wed, 9 Jun 2021 16:50:17 -0500
Subject: [PATCH] Resolve #13

---
 README.md                                     | 33 +++++--------------
 docker-compose.yaml                           | 18 +++++-----
 .../edrn/rdf/dmccprotocolrdfgenerator.py      | 18 ++++++++--
 3 files changed, 33 insertions(+), 36 deletions(-)

diff --git a/README.md b/README.md
index 9882ba3..bbabdb1 100644
--- a/README.md
+++ b/README.md
@@ -59,6 +59,7 @@ Here are the environment variables you'll need to set (substituting values betwe
 -   `EDRN_CANCERDATAEXPO_DATA` — set to a path to contain blobstorage, filestorage, and logs.
 -   `EDRN_CANCERDATAEXPO_PORT` — set to a free port number
 -   `EDRN_CANCERDATAEXPO_VERSION` — set to a version number of `latest`
+-   `EDRN_IMAGE_OWNER` — set to `nutjob4life` or leave it blank to use your local Docker containers
 
 
 ### 🧱 Building the Image
@@ -89,19 +90,16 @@ To run the CancerDataExpo for the **first time**, create empty directories to ho
             --project-name cancerdataexpo \
             up --detach
 
+The `docker-compose.yaml` assumes that `EDRN_CANCERDATAEXPO_DATA` is `usr/local/labcas/cancerdataexpo/docker-data` which is appropriate for `edrn-docker.jpl.nasa.gov` where this normally runs, and that `EDRN_CANCERDATAEXPO_PORT` is 2131, and that `EDRN_CANCERDATAEXPO_VERSION` is `latest`, so you can simply say:
+
+    docker-compose --project-name cancerdataexpo up --detach
+
 You can check the logs with:
 
-    env \
-        EDRN_CANCERDATAEXPO_DATA=/usr/local/labcas/cancerdataexpo/docker-data \
-        EDRN_CANCERDATAEXPO_VERSION=latest \
-        EDRN_CANCERDATAEXPO_PORT=2131 \
-        docker-compose \
-            --project-name cancerdataexpo \
-            logs --follow
+    docker-compose --project-name cancerdataexpo logs --follow
 
 **📝 Note:** With no existing database, the initiall startup might fail (see the logs, message "Resource Busy"). If this happens, stop it and start it again.
 
-
 Once this is up and running, head to http://localhost:${EDRN_CANCERDATAEXPO_PORT}/manage_main and log in (with username `admin` and password `admin`) and change the default password in the `acl_users` object. Next, create an instance of the CancerDataExpo by visiting http://localhost:${EDRN_CANCERDATAEXPO_PORT}/@@plone-addsite?site_id=Plone&advanced=1 and entering the following:
 
 -   Path identifier: `cancerdataexpo`
@@ -123,25 +121,14 @@ Then click "Save". Lastly, head to the RDF Generators and give the LabCAS genera
 
 Need to bring it all down?
 
-    env \
-        EDRN_CANCERDATAEXPO_DATA=/usr/local/labcas/cancerdataexpo/docker-data \
-        EDRN_CANCERDATAEXPO_VERSION=latest \
-        EDRN_CANCERDATAEXPO_PORT=2131 \
-        docker-compose \
-            --project-name cancerdataexpo \
-            down
+    docker-compose --project-name cancerdataexpo down
+
 
 ### 🎽 Subsequent Runs
 
 Start it up again?
 
-    env \
-        EDRN_CANCERDATAEXPO_DATA=/usr/local/labcas/cancerdataexpo/docker-data \
-        EDRN_CANCERDATAEXPO_VERSION=latest \
-        EDRN_CANCERDATAEXPO_PORT=2131 \
-        docker-compose \
-            --project-name cancerdataexpo \
-            up --detach
+    docker-compose --project-name cancerdataexpo up --detach
 
 
 #### 🐛 Advanced Debugging
@@ -160,5 +147,3 @@ Add a Manager user (not through the web, like above):
 Get a shell:
 
     docker container run --volume ${EDRN_CANCERDATAEXPO_DATA}/blobstorage:/data/blobstorage --tty --rm --interactive --network cancerdataexpo_frontsidebus --entrypoint /bin/bash cancerdataexpo:latest
-
-
diff --git a/docker-compose.yaml b/docker-compose.yaml
index 8e727cb..11b094d 100644
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -4,17 +4,16 @@
 ---
 services:
     appserver:
-        # image: cancerdataexpo:${EDRN_CANCERDATAEXPO_VERSION:-latest}
-        image: nutjob4life/cancerdataexpo:${EDRN_CANCERDATAEXPO_VERSION:-latest}
+        image: ${EDRN_IMAGE_OWNER-nutjob4life/}cancerdataexpo:${EDRN_CANCERDATAEXPO_VERSION:-latest}
         volumes:
             -
                 type: bind
-                source: ${EDRN_CANCERDATAEXPO_DATA}/blobstorage
+                source: ${EDRN_CANCERDATAEXPO_DATA:-/usr/local/labcas/cancerdataexpo/docker-data}/blobstorage
                 target: /data/blobstorage
                 consistency: consistent
             -
                 type: bind
-                source: ${EDRN_CANCERDATAEXPO_DATA}/log
+                source: ${EDRN_CANCERDATAEXPO_DATA:-/usr/local/labcas/cancerdataexpo/docker-data}/log
                 target: /data/log
                 consistency: delegated
         ports:
@@ -41,23 +40,22 @@ services:
             org.label-schema.name: CancerDataExpo Application Server
             org.label-schema.description: Zope appserver running the exposition for cancer data.
     db:
-        # image: cancerdataexpo:${EDRN_CANCERDATAEXPO_VERSION:-latest}
-        image: nutjob4life/cancerdataexpo:${EDRN_CANCERDATAEXPO_VERSION:-latest}
+        image: ${EDRN_IMAGE_OWNER-nutjob4life/}cancerdataexpo:${EDRN_CANCERDATAEXPO_VERSION:-latest}
         command: zeo
         volumes:
             -
                 type: bind
-                source: ${EDRN_CANCERDATAEXPO_DATA}/filestorage
+                source: ${EDRN_CANCERDATAEXPO_DATA:-/usr/local/labcas/cancerdataexpo/docker-data}/filestorage
                 target: /data/filestorage
                 consistency: consistent
             -
                 type: bind
-                source: ${EDRN_CANCERDATAEXPO_DATA}/blobstorage
+                source: ${EDRN_CANCERDATAEXPO_DATA:-/usr/local/labcas/cancerdataexpo/docker-data}/blobstorage
                 target: /data/blobstorage
                 consistency: consistent
             -
                 type: bind
-                source: ${EDRN_CANCERDATAEXPO_DATA}/log
+                source: ${EDRN_CANCERDATAEXPO_DATA:-/usr/local/labcas/cancerdataexpo/docker-data}/log
                 target: /data/log
                 consistency: delegated
         networks:
@@ -72,7 +70,7 @@ services:
             org.label-schema.name: CancerDataExpo DB Server
             org.label-schema.description: Zope Enterprise Objects DB server for the cancer data exposition.
     memory-cache:
-        image: memcached
+        image: memcached:1.6.9-alpine
         networks:
             -   frontsidebus
         restart: on-failure
diff --git a/src/edrn.rdf/edrn/rdf/dmccprotocolrdfgenerator.py b/src/edrn.rdf/edrn/rdf/dmccprotocolrdfgenerator.py
index 38eba07..d2c7d52 100644
--- a/src/edrn.rdf/edrn/rdf/dmccprotocolrdfgenerator.py
+++ b/src/edrn.rdf/edrn/rdf/dmccprotocolrdfgenerator.py
@@ -1,5 +1,5 @@
 # encoding: utf-8
-# Copyright 2012 California Institute of Technology. ALL RIGHTS
+# Copyright 2012–2021 California Institute of Technology. ALL RIGHTS
 # RESERVED. U.S. Government Sponsorship acknowledged.
 
 '''DMCC Protocol RDF Generator. An RDF generator that describes EDRN protocols using the DMCC's bungling web services.
@@ -178,6 +178,12 @@ class IDMCCProtocolRDFGenerator(IRDFGenerator):
         description=_('Uniform Resource Identifier for the cancer type predicate.'),
         required=True,
     )
+    cancerTypeURIPrefix = schema.TextLine(
+        title=_('Disease URI Prefix'),
+        description=_('URI prefix to identity Disease objects for the cancer type studied by a protocol.'),
+        required=True,
+        default='http://edrn.nci.nih.gov/data/diseases/'
+    )
     commentsURI = schema.TextLine(
         title=_('Comments URI'),
         description=_('Uniform Resource Identifier for the comments predicate.'),
@@ -412,7 +418,6 @@ def addToGraph(self, graph, context):
     'Protocol_Aims':                        'aimsURI',
     'Protocol_Analytic_Method':             'analyticMethodURI',
     'Protocol_Blinding':                    'blindingURI',
-    'Protocol_Cancer_Type':                 'cancerTypeURI',
     'Protocol_Collaborative_Group':         'collaborativeGroupTextURI',
     'Protocol_Comments':                    'commentsURI',
     'Protocol_Data_Sharing_Plan':           'dataSharingPlanURI',
@@ -484,13 +489,22 @@ def _addMiscFields(self, graph, context):
             if not obj: continue
             predicateURI = URIRef(getattr(context, predicateFieldName))
             graph.add((subjectURI, predicateURI, Literal(obj)))
+    def _addCancerTypes(self, graph, context):
+        subjectURI, predicateURI = self.getSubjectURI(context), URIRef(context.cancerTypeURI)
+        values = self.slots.get('Protocol_Cancer_Type', '')
+        for value in values.strip().split(', '):
+            value = value.strip()
+            if value:
+                graph.add((subjectURI, predicateURI, URIRef(context.cancerTypeURIPrefix + value)))
     def addToGraph(self, graph, specifics, context):
         self._addInvolvedInvestigatorSites(graph, specifics, context)
         self._addOtherSites(graph, context)
         self._addPublications(graph, context)
         self._addFieldsOfResearch(graph, context)
+        self._addCancerTypes(graph, context)
         self._addMiscFields(graph, context)
 
+
 _specificsMap = {
     'Animal_Subject_Training_Received': 'animalTraining',
     'Human_Subject_Training_Recieved':  'humanTraining',