clulab · kwalcock · Jan 4, 2024 · Jan 2, 2024 · Jan 3, 2024 · Jan 3, 2024
diff --git a/apps/build.sbt b/apps/build.sbt
@@ -7,9 +7,17 @@ resolvers ++= Seq(
 
 libraryDependencies ++= {
   Seq(
-    "org.clulab"     % "deberta-onnx-model" % "0.2.0",
-    "org.clulab"     % "electra-onnx-model" % "0.2.0",
-    "org.clulab"     % "roberta-onnx-model" % "0.2.0",
+    // Models version 0.1.0 work when LinearLayer.USE_CONCAT == true.
+    "org.clulab"     % "deberta-onnx-model" % "0.1.0",
+    "org.clulab"     % "roberta-onnx-model" % "0.1.0",
+
+    // Models version 0.2.0 work when LinearLayer.USE_CONCAT == false.
+    // Models of different versions cannot be combined into a single project
+    // because the resource names will conflict.  The choice is forced.
+    // "org.clulab"     % "deberta-onnx-model" % "0.2.0",
+    // "org.clulab"     % "electra-onnx-model" % "0.2.0",
+    // "org.clulab"     % "roberta-onnx-model" % "0.2.0",
+
     "org.scalatest" %% "scalatest"          % "3.2.15" % "test"
   )
 }

diff --git a/apps/src/main/scala/org/clulab/scala_transformers/apps/TokenClassifierExampleApp.scala b/apps/src/main/scala/org/clulab/scala_transformers/apps/TokenClassifierExampleApp.scala
@@ -4,8 +4,8 @@ import org.clulab.scala_transformers.encoder.TokenClassifier
 
 object TokenClassifierExampleApp extends App {
   // Choose one of these.
-  val tokenClassifier = TokenClassifier.fromFiles("../microsoft-deberta-v3-base-mtl/avg_export")
-  // val tokenClassifier = TokenClassifier.fromResources("/org/clulab/scala_transformers/models/microsoft_deberta_v3_base_mtl/avg_export")
+  // val tokenClassifier = TokenClassifier.fromFiles("../microsoft-deberta-v3-base-mtl/avg_export")
+  val tokenClassifier = TokenClassifier.fromResources("/org/clulab/scala_transformers/models/microsoft_deberta_v3_base_mtl/avg_export")
   // val tokenClassifier = TokenClassifier.fromFiles("../models/google_electra_small_discriminator_mtl/avg_export")
   // val tokenClassifier = TokenClassifier.fromResources("/org/clulab/scala_transformers/models/google_electra_small_discriminator_mtl/avg_export")
   // val tokenClassifier = TokenClassifier.fromFiles("../models/roberta_base_mtl/avg_export")

diff --git a/encoder/src/main/python/parameters.py b/encoder/src/main/python/parameters.py
@@ -50,6 +50,10 @@ class Parameters:
     # the encoding used by default for reading and writing files       
     encoding = "UTF-8"
 
+    # use concatenation or sum in dual mode
+    use_concat: bool = True
+
+
     def get_model_name(transformer_name: str) -> str:
         return f"{transformer_name.replace('/', '-')}-mtl"
 

diff --git a/encoder/src/main/python/token_classifier.py b/encoder/src/main/python/token_classifier.py
@@ -223,7 +223,7 @@ def __init__(self, hidden_size: int, num_labels: int, task_id, dual_mode: bool=F
         self.dropout = nn.Dropout(dropout_p)
         self.dual_mode = dual_mode
         self.classifier = nn.Linear(
-            hidden_size, # if not self.dual_mode else hidden_size * 2, # USE SUM
+            hidden_size * 2 if (self.dual_mode and Parameters.use_concat) else hidden_size,
             num_labels
         )
         self.num_labels = num_labels
@@ -248,8 +248,7 @@ def concatenate(self, sequence_output, head_positions):
         head_states = sequence_output[torch.arange(sequence_output.shape[0]).unsqueeze(1), long_head_positions]
         #print(f"head_states.size = {head_states.size()}")
         # Concatenate the hidden states from modifier + head.
-        #modifier_head_states = torch.cat([sequence_output, head_states], dim=2)
-        modifier_head_states = torch.add(sequence_output, head_states) # USE SUM
+        modifier_head_states = torch.cat([sequence_output, head_states], dim=2) if Parameters.use_concat else torch.add(sequence_output, head_states) 
         #print(f"modifier_head_states.size = {modifier_head_states.size()}")
         #print("EXIT")
         #exit(1)

diff --git a/encoder/src/main/scala/org/clulab/scala_transformers/encoder/LinearLayer.scala b/encoder/src/main/scala/org/clulab/scala_transformers/encoder/LinearLayer.scala
@@ -1,6 +1,7 @@
 package org.clulab.scala_transformers.encoder
 
 import org.clulab.scala_transformers.encoder.math.Mathematics.{MathMatrix, MathColVector, Math}
+import LinearLayer._
 
 /** Implements one linear layer */
 class LinearLayer(
@@ -75,7 +76,15 @@ class LinearLayer(
 
     // this matrix concatenates the hidden states of modifier + corresponding head
     // rows = number of tokens in the sentence; cols = hidden state size x 2
-    val concatMatrix = Math.zeros(rows = Math.rows(sentenceHiddenStates), cols = Math.cols(sentenceHiddenStates))
+    val concatMatrix = 
+      if(USE_CONCAT) 
+        Math.zeros(
+          rows = Math.rows(sentenceHiddenStates), 
+          cols = 2 * Math.cols(sentenceHiddenStates))
+      else 
+        Math.zeros(
+          rows = Math.rows(sentenceHiddenStates), 
+          cols = Math.cols(sentenceHiddenStates))
 
     // traverse all modifiers
     for(i <- 0 until Math.rows(sentenceHiddenStates)) {
@@ -86,9 +95,15 @@ class LinearLayer(
         if(rawHeadAbsPos >= 0 && rawHeadAbsPos < Math.rows(sentenceHiddenStates)) rawHeadAbsPos
         else i // if the absolute position is invalid (e.g., root node or incorrect prediction) duplicate the mod embedding
       val headHiddenState = Math.row(sentenceHiddenStates, headAbsolutePosition)
+
       // row i in the concatenated matrix contains the embedding of modifier i and its head
-      Math.inplaceMatrixAddition(concatMatrix, i, modHiddenState)
-      Math.inplaceMatrixAddition(concatMatrix, i, headHiddenState)
+      if(USE_CONCAT) {
+        val concatState = Math.horcat(modHiddenState, headHiddenState)
+        Math.inplaceMatrixAddition(concatMatrix, i, concatState)
+      } else {
+        Math.inplaceMatrixAddition(concatMatrix, i, modHiddenState)
+        Math.inplaceMatrixAddition(concatMatrix, i, headHiddenState)
+      }
     }
 
     //println(s"concatMatrix size ${concatMatrix.rows} x ${concatMatrix.cols}")
@@ -106,7 +121,11 @@ class LinearLayer(
 
     // this matrix concatenates the hidden states of modifier + corresponding head
     // rows = 1; cols = hidden state size x 2
-    val concatMatrix = Math.zeros(rows = 1, cols = Math.cols(sentenceHiddenStates))
+    val concatMatrix = 
+      if(USE_CONCAT)
+        Math.zeros(rows = 1, cols = 2 * Math.cols(sentenceHiddenStates))
+      else
+        Math.zeros(rows = 1, cols = Math.cols(sentenceHiddenStates))
     // embedding of the modifier
     val modHiddenState = Math.row(sentenceHiddenStates, modifierAbsolutePosition)
 
@@ -117,9 +136,15 @@ class LinearLayer(
       else modifierAbsolutePosition // if the absolute position is invalid (e.g., root node or incorrect prediction) duplicate the mod embedding
 
     val headHiddenState = Math.row(sentenceHiddenStates, headAbsolutePosition)
+
     //println(s"concatMatrix size ${concatMatrix.rows} x ${concatMatrix.cols}")
-    Math.inplaceMatrixAddition(concatMatrix, 0, modHiddenState)
-    Math.inplaceMatrixAddition(concatMatrix, 0, headHiddenState)
+    if(USE_CONCAT) {
+      val concatState = Math.horcat(modHiddenState, headHiddenState)
+      Math.inplaceMatrixAddition(concatMatrix, 0, concatState)
+    } else {
+      Math.inplaceMatrixAddition(concatMatrix, 0, modHiddenState)
+      Math.inplaceMatrixAddition(concatMatrix, 0, headHiddenState)
+    }
     concatMatrix
   }
 
@@ -232,6 +257,10 @@ class LinearLayer(
 
 object LinearLayer {
 
+  // If true, it concatenates the embeddings of head and modifier in dual mode
+  // Otherwise, it sums them up
+  val USE_CONCAT = true
+
   def fromFiles(layerDir: String): LinearLayer = {
     val linearLayerLayout = new LinearLayerLayout(layerDir)
     val linearLayerFactory = new LinearLayerFactoryFromFiles(linearLayerLayout)

diff --git a/encoder/src/main/scala/org/clulab/scala_transformers/encoder/math/EjmlMath.scala b/encoder/src/main/scala/org/clulab/scala_transformers/encoder/math/EjmlMath.scala
@@ -83,6 +83,8 @@ object EjmlMath extends Math {
     colVector.getNumRows
   }
 
+  // This is supposed to put two column vectors on top of each other to make one
+  // longer column vector.  This is done by concatenating rows.
   def vertcat(leftColVector: MathColVector, rightColVector: MathColVector): MathColVector = {
     assert(isColVector(leftColVector))
     assert(isColVector(rightColVector))
@@ -105,6 +107,8 @@ object EjmlMath extends Math {
     result
   }
 
+  // This is supposed to put two row vectors beside each other to make one
+  // longer row vector.  This is done by concatenating columns.
   def horcat(leftRowVector: MathRowVector, rightRowVector: MathRowVector): MathRowVector = {
     assert(isRowVector(leftRowVector))
     assert(isRowVector(rightRowVector))

diff --git a/encoder/src/test/scala/org/clulab/scala_transformers/encoder/math/EjmlMathTest.scala b/encoder/src/test/scala/org/clulab/scala_transformers/encoder/math/EjmlMathTest.scala
@@ -168,7 +168,7 @@ class EjmlMathTest extends Test {
     }
   }
 
-  it should "cat" in {
+  it should "horcat" in {
     val leftVectorValues = Array(1f, 2f, 3f)
     val rightVectorValues = Array(2f, 4f, 6f)
     val leftVector = mkRowVector(leftVectorValues)