From 873e8418b33eb79714b02a4408e60dbb4e14831e Mon Sep 17 00:00:00 2001
From: Mihai Surdeanu <surdeanu@gmail.com>
Date: Tue, 2 Jan 2024 16:46:29 -0700
Subject: [PATCH 1/5] we can now use either concat or sum; concat is enabled by
 default

---
 apps/build.sbt                                |  5 +--
 encoder/src/main/python/parameters.py         |  4 ++
 encoder/src/main/python/token_classifier.py   |  5 +--
 .../encoder/LinearLayer.scala                 | 41 ++++++++++++++++---
 4 files changed, 43 insertions(+), 12 deletions(-)

diff --git a/apps/build.sbt b/apps/build.sbt
index bdca081..8ac0114 100644
--- a/apps/build.sbt
+++ b/apps/build.sbt
@@ -7,9 +7,8 @@ resolvers ++= Seq(
 
 libraryDependencies ++= {
   Seq(
-    "org.clulab"     % "deberta-onnx-model" % "0.2.0",
-    "org.clulab"     % "electra-onnx-model" % "0.2.0",
-    "org.clulab"     % "roberta-onnx-model" % "0.2.0",
+    "org.clulab"     % "deberta-onnx-model" % "0.1.0",
+    "org.clulab"     % "roberta-onnx-model" % "0.1.0",
     "org.scalatest" %% "scalatest"          % "3.2.15" % "test"
   )
 }
diff --git a/encoder/src/main/python/parameters.py b/encoder/src/main/python/parameters.py
index 3bdb27a..be01a00 100644
--- a/encoder/src/main/python/parameters.py
+++ b/encoder/src/main/python/parameters.py
@@ -50,6 +50,10 @@ class Parameters:
     # the encoding used by default for reading and writing files       
     encoding = "UTF-8"
 
+    # use concatenation or sum in dual mode
+    use_concat: bool = True
+
+
     def get_model_name(transformer_name: str) -> str:
         return f"{transformer_name.replace('/', '-')}-mtl"
 
diff --git a/encoder/src/main/python/token_classifier.py b/encoder/src/main/python/token_classifier.py
index e7dae3b..7f667b6 100644
--- a/encoder/src/main/python/token_classifier.py
+++ b/encoder/src/main/python/token_classifier.py
@@ -223,7 +223,7 @@ def __init__(self, hidden_size: int, num_labels: int, task_id, dual_mode: bool=F
         self.dropout = nn.Dropout(dropout_p)
         self.dual_mode = dual_mode
         self.classifier = nn.Linear(
-            hidden_size, # if not self.dual_mode else hidden_size * 2, # USE SUM
+            hidden_size * 2 if (self.dual_mode and Parameters.use_concat) else hidden_size,
             num_labels
         )
         self.num_labels = num_labels
@@ -248,8 +248,7 @@ def concatenate(self, sequence_output, head_positions):
         head_states = sequence_output[torch.arange(sequence_output.shape[0]).unsqueeze(1), long_head_positions]
         #print(f"head_states.size = {head_states.size()}")
         # Concatenate the hidden states from modifier + head.
-        #modifier_head_states = torch.cat([sequence_output, head_states], dim=2)
-        modifier_head_states = torch.add(sequence_output, head_states) # USE SUM
+        modifier_head_states = torch.cat([sequence_output, head_states], dim=2) if Parameters.use_concat else torch.add(sequence_output, head_states) 
         #print(f"modifier_head_states.size = {modifier_head_states.size()}")
         #print("EXIT")
         #exit(1)
diff --git a/encoder/src/main/scala/org/clulab/scala_transformers/encoder/LinearLayer.scala b/encoder/src/main/scala/org/clulab/scala_transformers/encoder/LinearLayer.scala
index 665ed51..117c82f 100644
--- a/encoder/src/main/scala/org/clulab/scala_transformers/encoder/LinearLayer.scala
+++ b/encoder/src/main/scala/org/clulab/scala_transformers/encoder/LinearLayer.scala
@@ -1,6 +1,7 @@
 package org.clulab.scala_transformers.encoder
 
 import org.clulab.scala_transformers.encoder.math.Mathematics.{MathMatrix, MathColVector, Math}
+import LinearLayer._
 
 /** Implements one linear layer */
 class LinearLayer(
@@ -75,7 +76,15 @@ class LinearLayer(
 
     // this matrix concatenates the hidden states of modifier + corresponding head
     // rows = number of tokens in the sentence; cols = hidden state size x 2
-    val concatMatrix = Math.zeros(rows = Math.rows(sentenceHiddenStates), cols = Math.cols(sentenceHiddenStates))
+    val concatMatrix = 
+      if(USE_CONCAT) 
+        Math.zeros(
+          rows = Math.rows(sentenceHiddenStates), 
+          cols = 2 * Math.cols(sentenceHiddenStates))
+      else 
+        Math.zeros(
+          rows = Math.rows(sentenceHiddenStates), 
+          cols = Math.cols(sentenceHiddenStates))
 
     // traverse all modifiers
     for(i <- 0 until Math.rows(sentenceHiddenStates)) {
@@ -86,9 +95,15 @@ class LinearLayer(
         if(rawHeadAbsPos >= 0 && rawHeadAbsPos < Math.rows(sentenceHiddenStates)) rawHeadAbsPos
         else i // if the absolute position is invalid (e.g., root node or incorrect prediction) duplicate the mod embedding
       val headHiddenState = Math.row(sentenceHiddenStates, headAbsolutePosition)
+
       // row i in the concatenated matrix contains the embedding of modifier i and its head
-      Math.inplaceMatrixAddition(concatMatrix, i, modHiddenState)
-      Math.inplaceMatrixAddition(concatMatrix, i, headHiddenState)
+      if(USE_CONCAT) {
+        val concatState = Math.vertcat(modHiddenState, headHiddenState)
+        Math.inplaceMatrixAddition(concatMatrix, i, concatState)
+      } else {
+        Math.inplaceMatrixAddition(concatMatrix, i, modHiddenState)
+        Math.inplaceMatrixAddition(concatMatrix, i, headHiddenState)
+      }
     }
     
     //println(s"concatMatrix size ${concatMatrix.rows} x ${concatMatrix.cols}")
@@ -106,7 +121,11 @@ class LinearLayer(
 
     // this matrix concatenates the hidden states of modifier + corresponding head
     // rows = 1; cols = hidden state size x 2
-    val concatMatrix = Math.zeros(rows = 1, cols = Math.cols(sentenceHiddenStates))
+    val concatMatrix = 
+      if(USE_CONCAT)
+        Math.zeros(rows = 1, cols = 2 * Math.cols(sentenceHiddenStates))
+      else
+        Math.zeros(rows = 1, cols = Math.cols(sentenceHiddenStates))
     // embedding of the modifier
     val modHiddenState = Math.row(sentenceHiddenStates, modifierAbsolutePosition)
 
@@ -117,9 +136,15 @@ class LinearLayer(
       else modifierAbsolutePosition // if the absolute position is invalid (e.g., root node or incorrect prediction) duplicate the mod embedding
 
     val headHiddenState = Math.row(sentenceHiddenStates, headAbsolutePosition)
+
     //println(s"concatMatrix size ${concatMatrix.rows} x ${concatMatrix.cols}")
-    Math.inplaceMatrixAddition(concatMatrix, 0, modHiddenState)
-    Math.inplaceMatrixAddition(concatMatrix, 0, headHiddenState)
+    if(USE_CONCAT) {
+      val concatState = Math.vertcat(modHiddenState, headHiddenState)
+      Math.inplaceMatrixAddition(concatMatrix, 0, concatState)
+    } else {
+      Math.inplaceMatrixAddition(concatMatrix, 0, modHiddenState)
+      Math.inplaceMatrixAddition(concatMatrix, 0, headHiddenState)
+    }
     concatMatrix
   }
 
@@ -232,6 +257,10 @@ class LinearLayer(
 
 object LinearLayer {
 
+  // If true, it concatenates the embeddings of head and modifier in dual mode
+  // Otherwise, it sums them up
+  val USE_CONCAT = true
+
   def fromFiles(layerDir: String): LinearLayer = {
     val linearLayerLayout = new LinearLayerLayout(layerDir)
     val linearLayerFactory = new LinearLayerFactoryFromFiles(linearLayerLayout)

From 1f0ed3f3f241906a45ce9f75ca43cb77b09aeebf Mon Sep 17 00:00:00 2001
From: Keith Alcock <github@keithalcock.com>
Date: Tue, 2 Jan 2024 21:51:09 -0700
Subject: [PATCH 2/5] Change from vertcat to horcat

---
 .../org/clulab/scala_transformers/encoder/LinearLayer.scala   | 2 +-
 .../org/clulab/scala_transformers/encoder/math/EjmlMath.scala | 4 ++++
 .../clulab/scala_transformers/encoder/math/EjmlMathTest.scala | 2 +-
 3 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/encoder/src/main/scala/org/clulab/scala_transformers/encoder/LinearLayer.scala b/encoder/src/main/scala/org/clulab/scala_transformers/encoder/LinearLayer.scala
index 117c82f..7fcb909 100644
--- a/encoder/src/main/scala/org/clulab/scala_transformers/encoder/LinearLayer.scala
+++ b/encoder/src/main/scala/org/clulab/scala_transformers/encoder/LinearLayer.scala
@@ -98,7 +98,7 @@ class LinearLayer(
 
       // row i in the concatenated matrix contains the embedding of modifier i and its head
       if(USE_CONCAT) {
-        val concatState = Math.vertcat(modHiddenState, headHiddenState)
+        val concatState = Math.horcat(modHiddenState, headHiddenState)
         Math.inplaceMatrixAddition(concatMatrix, i, concatState)
       } else {
         Math.inplaceMatrixAddition(concatMatrix, i, modHiddenState)
diff --git a/encoder/src/main/scala/org/clulab/scala_transformers/encoder/math/EjmlMath.scala b/encoder/src/main/scala/org/clulab/scala_transformers/encoder/math/EjmlMath.scala
index 4348b0a..655d446 100644
--- a/encoder/src/main/scala/org/clulab/scala_transformers/encoder/math/EjmlMath.scala
+++ b/encoder/src/main/scala/org/clulab/scala_transformers/encoder/math/EjmlMath.scala
@@ -83,6 +83,8 @@ object EjmlMath extends Math {
     colVector.getNumRows
   }
 
+  // This is supposed to put two column vectors on top of each other to make one
+  // longer column vector.  This is done by concatenating rows.
   def vertcat(leftColVector: MathColVector, rightColVector: MathColVector): MathColVector = {
     assert(isColVector(leftColVector))
     assert(isColVector(rightColVector))
@@ -105,6 +107,8 @@ object EjmlMath extends Math {
     result
   }
 
+  // This is supposed to put two row vectors beside each other to make one
+  // longer row vector.  This is done by concatenating columns.
   def horcat(leftRowVector: MathRowVector, rightRowVector: MathRowVector): MathRowVector = {
     assert(isRowVector(leftRowVector))
     assert(isRowVector(rightRowVector))
diff --git a/encoder/src/test/scala/org/clulab/scala_transformers/encoder/math/EjmlMathTest.scala b/encoder/src/test/scala/org/clulab/scala_transformers/encoder/math/EjmlMathTest.scala
index b390138..f8b6a37 100644
--- a/encoder/src/test/scala/org/clulab/scala_transformers/encoder/math/EjmlMathTest.scala
+++ b/encoder/src/test/scala/org/clulab/scala_transformers/encoder/math/EjmlMathTest.scala
@@ -168,7 +168,7 @@ class EjmlMathTest extends Test {
     }
   }
 
-  it should "cat" in {
+  it should "horcat" in {
     val leftVectorValues = Array(1f, 2f, 3f)
     val rightVectorValues = Array(2f, 4f, 6f)
     val leftVector = mkRowVector(leftVectorValues)

From 36c76ae4f8491d69fe843f61504e59099e279968 Mon Sep 17 00:00:00 2001
From: Keith Alcock <github@keithalcock.com>
Date: Tue, 2 Jan 2024 21:56:38 -0700
Subject: [PATCH 3/5] Get the other one.

---
 .../org/clulab/scala_transformers/encoder/LinearLayer.scala     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/encoder/src/main/scala/org/clulab/scala_transformers/encoder/LinearLayer.scala b/encoder/src/main/scala/org/clulab/scala_transformers/encoder/LinearLayer.scala
index 7fcb909..7b393a4 100644
--- a/encoder/src/main/scala/org/clulab/scala_transformers/encoder/LinearLayer.scala
+++ b/encoder/src/main/scala/org/clulab/scala_transformers/encoder/LinearLayer.scala
@@ -139,7 +139,7 @@ class LinearLayer(
 
     //println(s"concatMatrix size ${concatMatrix.rows} x ${concatMatrix.cols}")
     if(USE_CONCAT) {
-      val concatState = Math.vertcat(modHiddenState, headHiddenState)
+      val concatState = Math.horcat(modHiddenState, headHiddenState)
       Math.inplaceMatrixAddition(concatMatrix, 0, concatState)
     } else {
       Math.inplaceMatrixAddition(concatMatrix, 0, modHiddenState)

From ac647ebc5df7dba2b9fd4b6f81ce9ee7cb144b70 Mon Sep 17 00:00:00 2001
From: Mihai Surdeanu <surdeanu@gmail.com>
Date: Wed, 3 Jan 2024 11:14:20 -0700
Subject: [PATCH 4/5] use correct model

---
 .../scala_transformers/apps/TokenClassifierExampleApp.scala   | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/apps/src/main/scala/org/clulab/scala_transformers/apps/TokenClassifierExampleApp.scala b/apps/src/main/scala/org/clulab/scala_transformers/apps/TokenClassifierExampleApp.scala
index f67fc73..6141648 100644
--- a/apps/src/main/scala/org/clulab/scala_transformers/apps/TokenClassifierExampleApp.scala
+++ b/apps/src/main/scala/org/clulab/scala_transformers/apps/TokenClassifierExampleApp.scala
@@ -4,8 +4,8 @@ import org.clulab.scala_transformers.encoder.TokenClassifier
 
 object TokenClassifierExampleApp extends App {
   // Choose one of these.
-  val tokenClassifier = TokenClassifier.fromFiles("../microsoft-deberta-v3-base-mtl/avg_export")
-  // val tokenClassifier = TokenClassifier.fromResources("/org/clulab/scala_transformers/models/microsoft_deberta_v3_base_mtl/avg_export")
+  // val tokenClassifier = TokenClassifier.fromFiles("../microsoft-deberta-v3-base-mtl/avg_export")
+  val tokenClassifier = TokenClassifier.fromResources("/org/clulab/scala_transformers/models/microsoft_deberta_v3_base_mtl/avg_export")
   // val tokenClassifier = TokenClassifier.fromFiles("../models/google_electra_small_discriminator_mtl/avg_export")
   // val tokenClassifier = TokenClassifier.fromResources("/org/clulab/scala_transformers/models/google_electra_small_discriminator_mtl/avg_export")
   // val tokenClassifier = TokenClassifier.fromFiles("../models/roberta_base_mtl/avg_export")

From 83b8e95474f1ed742ef0d79a27c3d38fb1b29684 Mon Sep 17 00:00:00 2001
From: Keith Alcock <github@keithalcock.com>
Date: Wed, 3 Jan 2024 13:38:07 -0700
Subject: [PATCH 5/5] Add comments about models

---
 apps/build.sbt | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/apps/build.sbt b/apps/build.sbt
index 8ac0114..980158b 100644
--- a/apps/build.sbt
+++ b/apps/build.sbt
@@ -7,8 +7,17 @@ resolvers ++= Seq(
 
 libraryDependencies ++= {
   Seq(
+    // Models version 0.1.0 work when LinearLayer.USE_CONCAT == true.
     "org.clulab"     % "deberta-onnx-model" % "0.1.0",
     "org.clulab"     % "roberta-onnx-model" % "0.1.0",
+
+    // Models version 0.2.0 work when LinearLayer.USE_CONCAT == false.
+    // Models of different versions cannot be combined into a single project
+    // because the resource names will conflict.  The choice is forced.
+    // "org.clulab"     % "deberta-onnx-model" % "0.2.0",
+    // "org.clulab"     % "electra-onnx-model" % "0.2.0",
+    // "org.clulab"     % "roberta-onnx-model" % "0.2.0",
+
     "org.scalatest" %% "scalatest"          % "3.2.15" % "test"
   )
 }