From 873e8418b33eb79714b02a4408e60dbb4e14831e Mon Sep 17 00:00:00 2001 From: Mihai Surdeanu Date: Tue, 2 Jan 2024 16:46:29 -0700 Subject: [PATCH 1/5] we can now use either concat or sum; concat is enabled by default --- apps/build.sbt | 5 +-- encoder/src/main/python/parameters.py | 4 ++ encoder/src/main/python/token_classifier.py | 5 +-- .../encoder/LinearLayer.scala | 41 ++++++++++++++++--- 4 files changed, 43 insertions(+), 12 deletions(-) diff --git a/apps/build.sbt b/apps/build.sbt index bdca081..8ac0114 100644 --- a/apps/build.sbt +++ b/apps/build.sbt @@ -7,9 +7,8 @@ resolvers ++= Seq( libraryDependencies ++= { Seq( - "org.clulab" % "deberta-onnx-model" % "0.2.0", - "org.clulab" % "electra-onnx-model" % "0.2.0", - "org.clulab" % "roberta-onnx-model" % "0.2.0", + "org.clulab" % "deberta-onnx-model" % "0.1.0", + "org.clulab" % "roberta-onnx-model" % "0.1.0", "org.scalatest" %% "scalatest" % "3.2.15" % "test" ) } diff --git a/encoder/src/main/python/parameters.py b/encoder/src/main/python/parameters.py index 3bdb27a..be01a00 100644 --- a/encoder/src/main/python/parameters.py +++ b/encoder/src/main/python/parameters.py @@ -50,6 +50,10 @@ class Parameters: # the encoding used by default for reading and writing files encoding = "UTF-8" + # use concatenation or sum in dual mode + use_concat: bool = True + + def get_model_name(transformer_name: str) -> str: return f"{transformer_name.replace('/', '-')}-mtl" diff --git a/encoder/src/main/python/token_classifier.py b/encoder/src/main/python/token_classifier.py index e7dae3b..7f667b6 100644 --- a/encoder/src/main/python/token_classifier.py +++ b/encoder/src/main/python/token_classifier.py @@ -223,7 +223,7 @@ def __init__(self, hidden_size: int, num_labels: int, task_id, dual_mode: bool=F self.dropout = nn.Dropout(dropout_p) self.dual_mode = dual_mode self.classifier = nn.Linear( - hidden_size, # if not self.dual_mode else hidden_size * 2, # USE SUM + hidden_size * 2 if (self.dual_mode and Parameters.use_concat) else hidden_size, num_labels ) self.num_labels = num_labels @@ -248,8 +248,7 @@ def concatenate(self, sequence_output, head_positions): head_states = sequence_output[torch.arange(sequence_output.shape[0]).unsqueeze(1), long_head_positions] #print(f"head_states.size = {head_states.size()}") # Concatenate the hidden states from modifier + head. - #modifier_head_states = torch.cat([sequence_output, head_states], dim=2) - modifier_head_states = torch.add(sequence_output, head_states) # USE SUM + modifier_head_states = torch.cat([sequence_output, head_states], dim=2) if Parameters.use_concat else torch.add(sequence_output, head_states) #print(f"modifier_head_states.size = {modifier_head_states.size()}") #print("EXIT") #exit(1) diff --git a/encoder/src/main/scala/org/clulab/scala_transformers/encoder/LinearLayer.scala b/encoder/src/main/scala/org/clulab/scala_transformers/encoder/LinearLayer.scala index 665ed51..117c82f 100644 --- a/encoder/src/main/scala/org/clulab/scala_transformers/encoder/LinearLayer.scala +++ b/encoder/src/main/scala/org/clulab/scala_transformers/encoder/LinearLayer.scala @@ -1,6 +1,7 @@ package org.clulab.scala_transformers.encoder import org.clulab.scala_transformers.encoder.math.Mathematics.{MathMatrix, MathColVector, Math} +import LinearLayer._ /** Implements one linear layer */ class LinearLayer( @@ -75,7 +76,15 @@ class LinearLayer( // this matrix concatenates the hidden states of modifier + corresponding head // rows = number of tokens in the sentence; cols = hidden state size x 2 - val concatMatrix = Math.zeros(rows = Math.rows(sentenceHiddenStates), cols = Math.cols(sentenceHiddenStates)) + val concatMatrix = + if(USE_CONCAT) + Math.zeros( + rows = Math.rows(sentenceHiddenStates), + cols = 2 * Math.cols(sentenceHiddenStates)) + else + Math.zeros( + rows = Math.rows(sentenceHiddenStates), + cols = Math.cols(sentenceHiddenStates)) // traverse all modifiers for(i <- 0 until Math.rows(sentenceHiddenStates)) { @@ -86,9 +95,15 @@ class LinearLayer( if(rawHeadAbsPos >= 0 && rawHeadAbsPos < Math.rows(sentenceHiddenStates)) rawHeadAbsPos else i // if the absolute position is invalid (e.g., root node or incorrect prediction) duplicate the mod embedding val headHiddenState = Math.row(sentenceHiddenStates, headAbsolutePosition) + // row i in the concatenated matrix contains the embedding of modifier i and its head - Math.inplaceMatrixAddition(concatMatrix, i, modHiddenState) - Math.inplaceMatrixAddition(concatMatrix, i, headHiddenState) + if(USE_CONCAT) { + val concatState = Math.vertcat(modHiddenState, headHiddenState) + Math.inplaceMatrixAddition(concatMatrix, i, concatState) + } else { + Math.inplaceMatrixAddition(concatMatrix, i, modHiddenState) + Math.inplaceMatrixAddition(concatMatrix, i, headHiddenState) + } } //println(s"concatMatrix size ${concatMatrix.rows} x ${concatMatrix.cols}") @@ -106,7 +121,11 @@ class LinearLayer( // this matrix concatenates the hidden states of modifier + corresponding head // rows = 1; cols = hidden state size x 2 - val concatMatrix = Math.zeros(rows = 1, cols = Math.cols(sentenceHiddenStates)) + val concatMatrix = + if(USE_CONCAT) + Math.zeros(rows = 1, cols = 2 * Math.cols(sentenceHiddenStates)) + else + Math.zeros(rows = 1, cols = Math.cols(sentenceHiddenStates)) // embedding of the modifier val modHiddenState = Math.row(sentenceHiddenStates, modifierAbsolutePosition) @@ -117,9 +136,15 @@ class LinearLayer( else modifierAbsolutePosition // if the absolute position is invalid (e.g., root node or incorrect prediction) duplicate the mod embedding val headHiddenState = Math.row(sentenceHiddenStates, headAbsolutePosition) + //println(s"concatMatrix size ${concatMatrix.rows} x ${concatMatrix.cols}") - Math.inplaceMatrixAddition(concatMatrix, 0, modHiddenState) - Math.inplaceMatrixAddition(concatMatrix, 0, headHiddenState) + if(USE_CONCAT) { + val concatState = Math.vertcat(modHiddenState, headHiddenState) + Math.inplaceMatrixAddition(concatMatrix, 0, concatState) + } else { + Math.inplaceMatrixAddition(concatMatrix, 0, modHiddenState) + Math.inplaceMatrixAddition(concatMatrix, 0, headHiddenState) + } concatMatrix } @@ -232,6 +257,10 @@ class LinearLayer( object LinearLayer { + // If true, it concatenates the embeddings of head and modifier in dual mode + // Otherwise, it sums them up + val USE_CONCAT = true + def fromFiles(layerDir: String): LinearLayer = { val linearLayerLayout = new LinearLayerLayout(layerDir) val linearLayerFactory = new LinearLayerFactoryFromFiles(linearLayerLayout) From 1f0ed3f3f241906a45ce9f75ca43cb77b09aeebf Mon Sep 17 00:00:00 2001 From: Keith Alcock Date: Tue, 2 Jan 2024 21:51:09 -0700 Subject: [PATCH 2/5] Change from vertcat to horcat --- .../org/clulab/scala_transformers/encoder/LinearLayer.scala | 2 +- .../org/clulab/scala_transformers/encoder/math/EjmlMath.scala | 4 ++++ .../clulab/scala_transformers/encoder/math/EjmlMathTest.scala | 2 +- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/encoder/src/main/scala/org/clulab/scala_transformers/encoder/LinearLayer.scala b/encoder/src/main/scala/org/clulab/scala_transformers/encoder/LinearLayer.scala index 117c82f..7fcb909 100644 --- a/encoder/src/main/scala/org/clulab/scala_transformers/encoder/LinearLayer.scala +++ b/encoder/src/main/scala/org/clulab/scala_transformers/encoder/LinearLayer.scala @@ -98,7 +98,7 @@ class LinearLayer( // row i in the concatenated matrix contains the embedding of modifier i and its head if(USE_CONCAT) { - val concatState = Math.vertcat(modHiddenState, headHiddenState) + val concatState = Math.horcat(modHiddenState, headHiddenState) Math.inplaceMatrixAddition(concatMatrix, i, concatState) } else { Math.inplaceMatrixAddition(concatMatrix, i, modHiddenState) diff --git a/encoder/src/main/scala/org/clulab/scala_transformers/encoder/math/EjmlMath.scala b/encoder/src/main/scala/org/clulab/scala_transformers/encoder/math/EjmlMath.scala index 4348b0a..655d446 100644 --- a/encoder/src/main/scala/org/clulab/scala_transformers/encoder/math/EjmlMath.scala +++ b/encoder/src/main/scala/org/clulab/scala_transformers/encoder/math/EjmlMath.scala @@ -83,6 +83,8 @@ object EjmlMath extends Math { colVector.getNumRows } + // This is supposed to put two column vectors on top of each other to make one + // longer column vector. This is done by concatenating rows. def vertcat(leftColVector: MathColVector, rightColVector: MathColVector): MathColVector = { assert(isColVector(leftColVector)) assert(isColVector(rightColVector)) @@ -105,6 +107,8 @@ object EjmlMath extends Math { result } + // This is supposed to put two row vectors beside each other to make one + // longer row vector. This is done by concatenating columns. def horcat(leftRowVector: MathRowVector, rightRowVector: MathRowVector): MathRowVector = { assert(isRowVector(leftRowVector)) assert(isRowVector(rightRowVector)) diff --git a/encoder/src/test/scala/org/clulab/scala_transformers/encoder/math/EjmlMathTest.scala b/encoder/src/test/scala/org/clulab/scala_transformers/encoder/math/EjmlMathTest.scala index b390138..f8b6a37 100644 --- a/encoder/src/test/scala/org/clulab/scala_transformers/encoder/math/EjmlMathTest.scala +++ b/encoder/src/test/scala/org/clulab/scala_transformers/encoder/math/EjmlMathTest.scala @@ -168,7 +168,7 @@ class EjmlMathTest extends Test { } } - it should "cat" in { + it should "horcat" in { val leftVectorValues = Array(1f, 2f, 3f) val rightVectorValues = Array(2f, 4f, 6f) val leftVector = mkRowVector(leftVectorValues) From 36c76ae4f8491d69fe843f61504e59099e279968 Mon Sep 17 00:00:00 2001 From: Keith Alcock Date: Tue, 2 Jan 2024 21:56:38 -0700 Subject: [PATCH 3/5] Get the other one. --- .../org/clulab/scala_transformers/encoder/LinearLayer.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/encoder/src/main/scala/org/clulab/scala_transformers/encoder/LinearLayer.scala b/encoder/src/main/scala/org/clulab/scala_transformers/encoder/LinearLayer.scala index 7fcb909..7b393a4 100644 --- a/encoder/src/main/scala/org/clulab/scala_transformers/encoder/LinearLayer.scala +++ b/encoder/src/main/scala/org/clulab/scala_transformers/encoder/LinearLayer.scala @@ -139,7 +139,7 @@ class LinearLayer( //println(s"concatMatrix size ${concatMatrix.rows} x ${concatMatrix.cols}") if(USE_CONCAT) { - val concatState = Math.vertcat(modHiddenState, headHiddenState) + val concatState = Math.horcat(modHiddenState, headHiddenState) Math.inplaceMatrixAddition(concatMatrix, 0, concatState) } else { Math.inplaceMatrixAddition(concatMatrix, 0, modHiddenState) From ac647ebc5df7dba2b9fd4b6f81ce9ee7cb144b70 Mon Sep 17 00:00:00 2001 From: Mihai Surdeanu Date: Wed, 3 Jan 2024 11:14:20 -0700 Subject: [PATCH 4/5] use correct model --- .../scala_transformers/apps/TokenClassifierExampleApp.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/apps/src/main/scala/org/clulab/scala_transformers/apps/TokenClassifierExampleApp.scala b/apps/src/main/scala/org/clulab/scala_transformers/apps/TokenClassifierExampleApp.scala index f67fc73..6141648 100644 --- a/apps/src/main/scala/org/clulab/scala_transformers/apps/TokenClassifierExampleApp.scala +++ b/apps/src/main/scala/org/clulab/scala_transformers/apps/TokenClassifierExampleApp.scala @@ -4,8 +4,8 @@ import org.clulab.scala_transformers.encoder.TokenClassifier object TokenClassifierExampleApp extends App { // Choose one of these. - val tokenClassifier = TokenClassifier.fromFiles("../microsoft-deberta-v3-base-mtl/avg_export") - // val tokenClassifier = TokenClassifier.fromResources("/org/clulab/scala_transformers/models/microsoft_deberta_v3_base_mtl/avg_export") + // val tokenClassifier = TokenClassifier.fromFiles("../microsoft-deberta-v3-base-mtl/avg_export") + val tokenClassifier = TokenClassifier.fromResources("/org/clulab/scala_transformers/models/microsoft_deberta_v3_base_mtl/avg_export") // val tokenClassifier = TokenClassifier.fromFiles("../models/google_electra_small_discriminator_mtl/avg_export") // val tokenClassifier = TokenClassifier.fromResources("/org/clulab/scala_transformers/models/google_electra_small_discriminator_mtl/avg_export") // val tokenClassifier = TokenClassifier.fromFiles("../models/roberta_base_mtl/avg_export") From 83b8e95474f1ed742ef0d79a27c3d38fb1b29684 Mon Sep 17 00:00:00 2001 From: Keith Alcock Date: Wed, 3 Jan 2024 13:38:07 -0700 Subject: [PATCH 5/5] Add comments about models --- apps/build.sbt | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/apps/build.sbt b/apps/build.sbt index 8ac0114..980158b 100644 --- a/apps/build.sbt +++ b/apps/build.sbt @@ -7,8 +7,17 @@ resolvers ++= Seq( libraryDependencies ++= { Seq( + // Models version 0.1.0 work when LinearLayer.USE_CONCAT == true. "org.clulab" % "deberta-onnx-model" % "0.1.0", "org.clulab" % "roberta-onnx-model" % "0.1.0", + + // Models version 0.2.0 work when LinearLayer.USE_CONCAT == false. + // Models of different versions cannot be combined into a single project + // because the resource names will conflict. The choice is forced. + // "org.clulab" % "deberta-onnx-model" % "0.2.0", + // "org.clulab" % "electra-onnx-model" % "0.2.0", + // "org.clulab" % "roberta-onnx-model" % "0.2.0", + "org.scalatest" %% "scalatest" % "3.2.15" % "test" ) }