From 498d25249008da78eebb41249fa8cbae772c1d73 Mon Sep 17 00:00:00 2001
From: Holger Roth <hroth@nvidia.com>
Date: Tue, 19 Mar 2024 20:29:24 -0400
Subject: [PATCH] Add FedBPT research example

initial fedbpt files

add roberta model and run FL

move send to end

upgrade to 2.4.1rc and run experiment with 10 clients

move init to top

debug using pickle

record successful setting

use custom decomposer

clean code

add summary writer

add result figure

formatting

fix broken links

remove debug messages

update readme with system resources

use decomposer widget on server
---
 integration/nemo/examples/peft/peft.ipynb     |    2 +-
 .../prompt_learning/prompt_learning.ipynb     |    2 +-
 research/auto-fed-rl/README.md                |    2 +-
 research/fed-bpt/LICENSE.txt                  |  201 ++
 research/fed-bpt/README.md                    |   82 +
 research/fed-bpt/figs/global_test_acc.png     |  Bin 0 -> 40592 bytes
 research/fed-bpt/index.html                   |   16 +
 .../fedbpt/config_fed_client.conf             |  127 ++
 .../fedbpt/config_fed_server.conf             |   55 +
 .../fed-bpt/job_templates/fedbpt/info.conf    |    5 +
 research/fed-bpt/job_templates/fedbpt/info.md |   11 +
 .../fed-bpt/job_templates/fedbpt/meta.conf    |   10 +
 research/fed-bpt/src/LMForwardAPI.py          |  910 +++++++++
 research/fed-bpt/src/cma_decomposer.py        |  116 ++
 research/fed-bpt/src/data_process.py          |  391 ++++
 .../fed-bpt/src/dataloaders/dataloader.py     |  482 +++++
 research/fed-bpt/src/decomposer_widget.py     |   30 +
 research/fed-bpt/src/fedbpt_train.py          |  364 ++++
 research/fed-bpt/src/global_es.py             |  147 ++
 research/fed-bpt/src/metrics/metrics.py       |  486 +++++
 .../fed-bpt/src/models/modeling_roberta.py    | 1703 +++++++++++++++++
 research/fed-bpt/src/utils.py                 |   71 +
 research/one-shot-vfl/README.md               |   22 +-
 23 files changed, 5230 insertions(+), 5 deletions(-)
 create mode 100644 research/fed-bpt/LICENSE.txt
 create mode 100644 research/fed-bpt/README.md
 create mode 100644 research/fed-bpt/figs/global_test_acc.png
 create mode 100644 research/fed-bpt/index.html
 create mode 100644 research/fed-bpt/job_templates/fedbpt/config_fed_client.conf
 create mode 100644 research/fed-bpt/job_templates/fedbpt/config_fed_server.conf
 create mode 100644 research/fed-bpt/job_templates/fedbpt/info.conf
 create mode 100644 research/fed-bpt/job_templates/fedbpt/info.md
 create mode 100644 research/fed-bpt/job_templates/fedbpt/meta.conf
 create mode 100644 research/fed-bpt/src/LMForwardAPI.py
 create mode 100644 research/fed-bpt/src/cma_decomposer.py
 create mode 100644 research/fed-bpt/src/data_process.py
 create mode 100644 research/fed-bpt/src/dataloaders/dataloader.py
 create mode 100644 research/fed-bpt/src/decomposer_widget.py
 create mode 100644 research/fed-bpt/src/fedbpt_train.py
 create mode 100644 research/fed-bpt/src/global_es.py
 create mode 100644 research/fed-bpt/src/metrics/metrics.py
 create mode 100644 research/fed-bpt/src/models/modeling_roberta.py
 create mode 100644 research/fed-bpt/src/utils.py

diff --git a/integration/nemo/examples/peft/peft.ipynb b/integration/nemo/examples/peft/peft.ipynb
index ba31f585b9..1b42a57a45 100644
--- a/integration/nemo/examples/peft/peft.ipynb
+++ b/integration/nemo/examples/peft/peft.ipynb
@@ -177,7 +177,7 @@
    "metadata": {},
    "source": [
     "## Federated learning simulations\n",
-    "Next, we are using NVFlare's [simulator](https://nvflare.readthedocs.io/en/latest/user_guide/fl_simulator.html) to simulate each client training on their own dataset locally and all three clients training together using the [FedAvg](https://arxiv.org/abs/1602.05629) algorithm implemented in NVFlare.\n",
+    "Next, we are using NVFlare's [simulator](https://nvflare.readthedocs.io/en/latest/user_guide/nvflare_cli/fl_simulator.html) to simulate each client training on their own dataset locally and all three clients training together using the [FedAvg](https://arxiv.org/abs/1602.05629) algorithm implemented in NVFlare.\n",
     "\n",
     "With this setting, we require a GPU with at least 24GB of memory to run all clients in parallel on the same GPU. \n",
     "If you have multiple GPUs in your system, you can use the `gpu` argument to assign one GPU for each client, e.g., `gpu=\"0,1\"`.\n",
diff --git a/integration/nemo/examples/prompt_learning/prompt_learning.ipynb b/integration/nemo/examples/prompt_learning/prompt_learning.ipynb
index 83873404cf..26e39581d1 100644
--- a/integration/nemo/examples/prompt_learning/prompt_learning.ipynb
+++ b/integration/nemo/examples/prompt_learning/prompt_learning.ipynb
@@ -171,7 +171,7 @@
    "metadata": {},
    "source": [
     "## Federated learning simulations\n",
-    "Next, we are using NVFlare's [simulator](https://nvflare.readthedocs.io/en/latest/user_guide/fl_simulator.html) to simulate each client training on their own dataset locally and all three clients training together using the [FedAvg](https://arxiv.org/abs/1602.05629) algorithm implemented in NVFlare.\n",
+    "Next, we are using NVFlare's [simulator](https://nvflare.readthedocs.io/en/latest/user_guide/nvflare_cli/fl_simulator.html) to simulate each client training on their own dataset locally and all three clients training together using the [FedAvg](https://arxiv.org/abs/1602.05629) algorithm implemented in NVFlare.\n",
     "\n",
     "With this setting, we require a GPU with at least 16GB memory to run all clients in parallel on the same GPU. \n",
     "If you have multiple GPUs in your system, you can use the `gpu` argument to assign one GPU for each client, e.g., `gpu=\"0,1\"`."
diff --git a/research/auto-fed-rl/README.md b/research/auto-fed-rl/README.md
index 542e8a38fa..de0b05dc8e 100644
--- a/research/auto-fed-rl/README.md
+++ b/research/auto-fed-rl/README.md
@@ -34,7 +34,7 @@ python3 -m pt.utils.cifar10_data_utils
 
 ## 3. Run simulated FL experiments
 
-We are using NVFlare's [FL simulator](https://nvflare.readthedocs.io/en/latest/user_guide/fl_simulator.html) to run the following experiments.
+We are using NVFlare's [FL simulator](https://nvflare.readthedocs.io/en/latest/user_guide/nvflare_cli/fl_simulator.html) to run the following experiments.
 
 The output root of where to save the results is set in [./run_simulator.sh](./run_simulator.sh) as `RESULT_ROOT=/tmp/nvflare/sim_cifar10`.
 
diff --git a/research/fed-bpt/LICENSE.txt b/research/fed-bpt/LICENSE.txt
new file mode 100644
index 0000000000..261eeb9e9f
--- /dev/null
+++ b/research/fed-bpt/LICENSE.txt
@@ -0,0 +1,201 @@
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/research/fed-bpt/README.md b/research/fed-bpt/README.md
new file mode 100644
index 0000000000..b9138900b4
--- /dev/null
+++ b/research/fed-bpt/README.md
@@ -0,0 +1,82 @@
+# FedBPT: Efficient Federated Black-box Prompt Tuning for Large Language Models
+
+This example shows how to run [FedBPT](https://arxiv.org/abs/2310.01467) on an example task and the [FL simulator](https://nvflare.readthedocs.io/en/latest/user_guide/nvflare_cli/fl_simulator.html).
+
+###### Abstract:
+> Pre-trained language models (PLM) have revolutionized the NLP landscape, achieving stellar performances across diverse tasks. These models, while benefiting from vast training data, often require fine-tuning on specific data to cater to distinct downstream tasks. However, this data adaptation process has inherent security and privacy concerns, primarily when leveraging user-generated, device-residing data. Federated learning (FL) provides a solution, allowing collaborative model fine-tuning without centralized data collection. However, applying FL to finetune PLMs is hampered by challenges, including restricted model parameter access, high computational requirements, and communication overheads. This paper introduces Federated Black-box Prompt Tuning (FedBPT), a framework designed to address these challenges. FedBPT does not require the clients to access the model parameters. By focusing on training optimal prompts and utilizing gradient-free optimization methods, FedBPT reduces the number of exchanged variables, boosts communication efficiency, and minimizes computational and storage costs. Experiments highlight the framework's ability to drastically cut communication and memory costs while maintaining competitive performance. Ultimately, FedBPT presents a promising solution for efficient, privacy-preserving fine-tuning of PLM in the age of large language models.
+
+## License
+The code in this directory is released under Apache v2 License.
+The code is extended from [Black-Box-Tuning (BBT)](https://github.com/txsun1997/Black-Box-Tuning) which is released under MIT License.
+The models code is copied from the [transformers](https://github.com/huggingface/transformers) library.
+
+## 1. Setup
+We recommend creating a [conda environment](https://www.anaconda.com) following [BBT](https://github.com/txsun1997/Black-Box-Tuning#prepare-your-environment) 
+with the addition of installing NVFlare for running federated learning and some other updates:
+```commandline
+conda create --name fedbpt python=3.8
+conda activate fedbpt
+pip install transformers==4.38.2
+pip install fastNLP==0.6.0
+pip install datasets
+pip install cma
+pip install scikit-learn
+pip install tensorboard
+pip install cvxopt
+pip install nvflare==2.4.1rc
+```
+
+## 2. Run a federated learning experiment
+First, we set the location of NVFlare job templates directory.
+```commandline
+nvflare config -jt ./job_templates
+```
+Next, we generate a job configuration from the template to run FL on `N_CLIENTS` clients. 
+We utilize the [SST-2 dataset](https://huggingface.co/datasets/stanfordnlp/sst2) and the RoBerTa-large model for training.
+```commandline
+N_CLIENTS=10
+SEED=1234
+nvflare job create -force -j "./jobs/fedbpt" -w "fedbpt" -sd "./src" \
+-f app/config/config_fed_client.conf app_script="fedbpt_train.py" app_config="--task_name sst2 \
+--n_prompt_tokens 50 \
+--intrinsic_dim 500 \
+--k_shot 200 \
+--device cuda:0 \
+--seed ${SEED} \
+--loss_type ce \
+--cat_or_add add \
+--local_iter 8 \
+--num_users ${N_CLIENTS} \
+--iid 1 \
+--local_popsize 5 \
+--perturb 1 \
+--model_name roberta-large \
+--llama_causal 1" \
+-f app/config/config_fed_server.conf min_clients=${N_CLIENTS} num_rounds=200 seed=${SEED}
+```
+Start the FL simulator with `N_CLIENTS` clients in parallel.
+The following setting requires a GPU with at least 24 GB memory and enough system memory to run the clients in parallel (we recommend at least 40 GB).
+For a system with less resources, you can set -t to be a lower number and simulate the clients running sequentially.
+```commandline
+OUT_DIR="/tmp/nvflare/fedbpt"
+nvflare simulator ./jobs/fedbpt -n ${N_CLIENTS} -t ${N_CLIENTS} -w ${OUT_DIR}
+```
+
+## 3. Example results
+The training results showing the global testing accuracy over 200 rounds is shown below. 
+The global learnt prompt using FedBPT achieves an accuracy of 0.8761 on the SST-2 test set. 
+<img src="./figs/global_test_acc.png" alt="FedBPT results" width="600"/>
+
+## Citation
+
+> Sun, Jingwei, et al. "FedBPT: Efficient Federated Black-box Prompt Tuning for Large Language Models." arXiv preprint arXiv:2310.01467 (2023).
+
+BibTeX
+```
+@article{sun2023fedbpt,
+  title={FedBPT: Efficient Federated Black-box Prompt Tuning for Large Language Models},
+  author={Sun, Jingwei and Xu, Ziyue and Yin, Hongxu and Yang, Dong and Xu, Daguang and Chen, Yiran and Roth, Holger R},
+  journal={arXiv preprint arXiv:2310.01467},
+  year={2023}
+}
+```
diff --git a/research/fed-bpt/figs/global_test_acc.png b/research/fed-bpt/figs/global_test_acc.png
new file mode 100644
index 0000000000000000000000000000000000000000..2066ce38fa6c0e2fa74e129991dd90324d2e7151
GIT binary patch
literal 40592
zcmb@ubySsI_cpp|kW`dTL6DU05|9oFLAtw}4H5!^f`oK~bR*p@ARyfh(k+|rv$pU1
z8{Zk@eBU|$9LMtv<o4csuY0W-*SzL6_m{Uy(%6qk9zh@wY*`seRR{!81_D7SMn?r#
z1nDd(zz<{>aanbA@aF@%>1XgevFn@nu4)eEt{z6tW)KT|2Rkzs7ZYbQGkX_H2iGH{
zHW6?W3;ZStXEP&LD+hZjbt^kFh^Cn-6&okjYe#1)4mJ)hDmHEbHV*I~)mufXH<Ien
z5x3+J2o*$D^0m5W>i#0sGj?+J^7zQqyiy^$4+mMC3eyPV7oB)X<>x2c#WK%yN5@oI
z0@`Y`vfh1ToYM$<WXxjSO&B}X8d`wD$XfsO4YH)x3x9Ef%GY#zv@uD2S9iyzwWi{T
zv@!k{TaC_Un?eKp!|Q#&g)Jr-a-WK$QwQO7vJ_NdQU&3OqmNF?_|yLTOAxZmzkgHw
zKVFawL6#}jET`k<j-!!|NS$&@;dOxjqAYq>t=G@)aj47MxSi$Bp8(#M+E*wCwQt~f
ztyinomYAQPe{22f6a4c;XvJWE9vR|L2ze@{8^9NHAc05=eu#gjQxX(oeD!MUu<2+s
z0F%{ld*u6q%dl{XR#ouCggWfxL`O)8p?ly}+wY%W@QK+BA^3C(E4c~!oy&6c-QC^4
zlQ=^<0?{~~*JTJ;v?cZQ(uPHDE{-hD_h)q-J5aDFMN+k@%#Rm6meUS5>ch$SqxZ_1
zaQv6=&+BsX@>I37c9!msdnq~X=D1LC$bKpN!tm3fo5ZO~`NRBEC0Y{h?mS6JNxJT{
zW<n3QCwycPJZTT=WH@NUq7On~C59{Q9~BbWI<^*A;fu)-&A@!s9A(rMe0-Ae&^&81
z7ViUpNXn*hoT$U*6Q!wAo!%e}{6s+y`29lnmusaI7b6n5$Cqo7oQ^AEhb<TS92^{%
z;~DoqX(ENjY?t4J5g4%%)t?Lr5(~J#EN{JL3?pE1b`W)5N1xX7AcsPs@K1%FY&!j2
zd2QdYLBLEfu%rJ3BA>vz(Ry=;v@+dx&+Yecv&^2P04XtOOE<XN)%@Ah<9yU|;e6ap
zcmj62!t>bp;<&sS_6U=Z)f7Au^ZOtE-z7pSP5Yiuc<)ze3@^t|AMWI5Y>vslG8@f$
zgGI{o^@V)=-`kVnri;R&BIBN~#I{S#NqgmO=?yMh(HR-Tr#s{PUZ<8R7mtBmAZIFd
zMUV@s%gKd${rN(_V1<p1Z9SCA&-miSn~jYPhTK@~8tXVYSy`13TuKc&hO>jZ<)YZw
zr!`<<87IP*D}nGk4`!>R8FDA2DSdKORaGZUB84}rmLG2X9+Y$Q^NRxl5OZTy{qFV*
zCTA*4Ccog*FWlJ9R=&y2$tiH#n`#(8tY5?7ww@^9F<3r7n12Uf%f+I{qSe{inf3aw
zZ}9bq+}y9?`n}!R**WCUb(?&*yIWj-K5GGADJQ2IKUoC9@N#l;G6}c){`<3pw%cxc
zpX<{d>xFu!!!2duv)Eyq!dO)^Gc)U%A4a?PH%AU9B5?P5yIXA?Rckwo*j`$<w5v3}
z<NB3|{ZDMK-9r6ie}0$E%baECRvN^1uDXx@E?S=T7%JkjsgSBfKVRp7R=s+Y;d{;C
z<m7bdbA7glCieRE>y+|*mRh@cDJ?DH7<%Qr>gtI*PO#bF70bRiERfaJ)u&INI&P0p
zUsMbvyt3FDO4~lW<{Vl!<v0eX=sYZZahYRbYI>==7eeX#>xWSnk&x#PK^8i?VDNk5
zYF9`dloIZp{;RI;tS?_uadE{`2z#Y!(Dg=;(+CJ8UtW4<oPsNOj~~Cbw?AZvR8~?#
ztyVE)ov*c9`$VV@Ob979K7MRu1hP`6JU+|kzE1@4SaeHRJ^;G|Vb-mWns;bR+uyfE
z#Ug6IJYH)!pR<vbmlxdQP}kHvfepN>a$1vm^ytwK?NW%yN6pu-5!QMm9o+Yaeb1}L
z=cM5gB%hdS=gcc0K*?`KM?wM#HSnPXJikENVQg=cfJ9S3Ca%aOd~+SIHWE&@vm?h^
zygiDFi<7`5tAtW+aOx0d{pQ4+oE-Cc5HWDuGhYW@0yms(hH_?nfMM87mu++t8mt2+
zTC1J67hJndQ=;dz7@~dkDiXNcx1=OlQ&U>Fy+I%DE*5VXa<zTP=PM)OaZrg<Y<IGl
zP@^~wpI(Wp7+WB2sm1%G1C7$K;{E3@U!3v@pFVx7qa%N`)S?V+E)MQ{mf3v0TYS1Z
z8TO1xqh9o{yu7^fmg*ccJNsk*f`XSl;iOw08Q0fdFc?fmMutjIFa_-0?%rM!rv;9(
z$Q3Q`f+NzmZ{I9pA9$BPbR8}>L1BILNg_Utf{Qn9u5shbFFPedaE$8aiMcG_rkull
z@~2gVu?zQbGpwKKkSjN~$>FVhWmOe|=jtcI4`T)&j^AV)uu}TnRt%qL8u&a5>{!v~
z`MKk2TsQ9%JnIME{ScPQkUfgN!udgji7D*I4>o<zRTNvE6v38)g3lHiJ_PyWvTknN
zv$L~8$3O884>~$Ju!uR3^T&5YGTKh+dfdX7pqiSRRX#WF88$8Zl>;+1HiY0HJn|<e
zCLH%l>lzI%YWHd2;d8v?HHnZX8!&+_2frIq5c9Ce_@W-}FWO2`WwNEhjpwSZnlJx^
zJtZLE_;BgyzCY6ecC6uQGsQ;$8>vLEDIUC(({_ff^?H{{<Yu4KzHx`n!)|E3FGj1{
z@-gqCD?XvY2li#3i!|@M+S(zOb!BH~4%EPpn)=XBSy|7`hf-zL)t_P#Ft2=*rdUn3
zDKi3}$d{(1?|s01>LpxdKKPu6Cw^x<|KxhF+yv}02-R0tS0&mtp%o^*jW*u5X>B#h
zxzrNDBUKj4_ow-yPV2uO2LuLQU9>$6_QlX!Sz2c1<pux!>uBG0=e`8pA_t$@$5OW-
z4nEhR`55)xyLZ3S1PL=0Z{W6%c<OiAMS1TifCiG!$LQ!MBDa4f;Ez4>U4od`o=nhv
ze+7i67v@z-4sCbwz(Rs_99rULX5K^Slu|#BTAH404uWv=1(?+4U`p@p)yd+Zsp#!r
zq@&gwRwj)S!-xAj_`Z#esg||fdr^G&``SY^50kate`Mrst^Hy@IAAHTG>~kWL7ta<
z^M>8&k0gZ0cBUO<|AvFw`GSIi%%Y-4#gn@i^vujyF@2Jro_v(Pm()!jN1MT<_U)z|
z?BGPk#>U|1zBgTNF`O=}>)iK3=w`p_aF0~fS4b-R8{CJnNqOGBecN7bHST#jDvg1G
zfk4H~{Cn$mJYPA#wzjsLgCQ3k7uRBcrh-hs^>d`i^=AN791m)yVO1G-UqM=s3MY9}
zXTOM!iD}Xw%doz=2?JoG(rkcWY<yg+(Uk!S4aaqBdG24RhNM}C7#TzgpA#`@mYGhM
z>48V9k#3k<1c2mnziL=};T{Bq)bs7;sv4VV*0PrKIf6O^5ZZ39hW&61zW{SZN3g)7
z5<Z(y`<*VFvb}97wnysH{Y=x4i{uJ~A%UEz10&$~sY1}o?gK3?Eo3w_v>%dlzo!iR
z?%cxWz0db|(wAl7r~J`H8$nf35j(-ammll}@O5J@60JIWa_@^nv)M{Bc!na^(tro*
z&;EP|BsqWn7^nMQTk$w7DVUkPASNaTCX>LRmglNY42NOVh(&tWR2cC4Q<atcv~B;r
z@+9?tFktElf~tzj<HEwi4<KgRiI}(hBQygivPj(m(OS{kdY72;75u}>4yuuIeDAK0
z&*vRHB(;X*K|)FO{t|!w+z$!7XO>CY3HN_+<w5Bn!dQPCldPg*_l_%v-~9*Hwp-_m
zS`ejT{u}*4ves&F&K<5G<+V4xy*$PuV)KXN2>Nu-fRYmCi*xV*e*uV@bZC`hfIps8
z1X<roM_>a0a`kHwg7x61HuD~cc?~;xDZhUIUe5|+H9r2+Juy9fv+n{>QJ!*!60qka
zHY3Co9y@1;s`&$<tMwS&569hDBwU}1%Nj#jbZV>5a-B(m-vVIE)zHuY&>`%|?|~E&
z*4=sJdm+eLzk*m<S^2)+k(!&E`*2O<;nt<B_1c<)le6J;OdfiFwWT7L26GsRuGJk#
z=C-K^5p7{!5A&hctqJVC(~^qax+9-D0H|`x%9_BXgNw?`HRTv;tj6i$($mYP%Jhms
zbe|ABYR(3BISJe{PE~GWeZ8Q%nybcfRlLSxn4-q03zggWCpx$JZ=#C^%`)9W;Pk7!
z%iK0o1z*2n)mTq5x~;S$)EIX^ic3x|^1WzU2H1*`nOU8Ifx&ujs#K04*Y|Q2joWgB
z$_-?#Vqp3Oz++^>i8-YJM)o;$IX_x9&~qJqv)gj9qy~&p1y|^=KYYnxPr$1wd`?x|
z0BkHS)u}65SkOJ|6}?x%CgGAv<8u-R;H=SY51>4`;_{?Hb2WfL->vn8tGT(k_3yYc
zHZ7KwHN)iG=I!bV;U9c3Uso&}Lsy7`N$?V^l7T@5TvdnTC;_k20ysFE1UO<6Y^L@t
zX{wyfG;GeMyzuvLg|i+~hk~vU+~TbaKmD`4X=Q*6^A+cTjekB-NkO7bI6ZZ(v0oIp
zI013L*yrZ_YM;}r|LNJ5=z|tmW9KK#$!D6n-$_VF7Una2&b2ZiJL+_b$vN;amr<Q!
z-&oiU+4&wqVI9BROX*jls;2hN*SA$w4l@cK@Gso+#|v3oPF2s=W0V<lt1}OQ2_GH+
z9191jKDWoca)q^MGT>-GTsd>wFYsNQTr7Dj!#4?Jw)*iUzXu=qiSN5N%~hC$z)>%7
z=(Aa1K^aXMym$W|l>;Emi{<`}bnio_Hjz?0Lg2nb(!o`wALSGj)=!2-w*Z=;<>e&|
z>H0Wn?Z*4v2OKAW<ydJ-TOvJ{T(vt30ArI0d&L6RU8KKL?>(BYi&V*$x?1N2#v^8<
zu?GMOgqoWA@BjcxQ*(1z7p^E|q{+h}-R~Z{<b5Ozj`3}{ixrPuo*@3s`9XGBSwt9X
z<FB--krDs#og9$mL7l^HHAY+3bSTghK|TPgG7w+Y0KnnTzEyH#si~=1-`H^KKq0&&
zI|1nkC3Ers4m_@QOOSE^BtHh>5C&o@EGFX?IeYZiM4{T^d|*HT`Xhm=))j#?oHV5n
z{c?vBb^sj)VQSin#5^gb-D|PaL4QCEWDJ5Rvq5VrIP2ZrT_O(CUpoe3m|kbQ-Nogt
zbIdK$krdl!UK%CuJAhsH=4bf!1J~-xxId-Whl3km9Xq!o8+IbB{M(!AT0IbN`3%y|
z=$GL)7N}$siTXA-nS=Z?)gsKo67JA?)w7jYJN0whRRGi;AY~;eH!j*c94CH6LaQ|C
z#mVru0DE+3#|muyuvaNRrp5arIyxG$9VE&gkh->)+eE!uD2gZN@?f>D*h`?ud&$f!
zSw0Q280CfXhYvykZh$EC_fOa>4#RecFTj;2<$iZz-)N*^01s!V+~+qEWRJ=KU;;Ae
zIluWve_Zp!{Y|O;Khx06C#UM}>8W(v(}*DBM-=n&5&&4-WIRuCf2k$q=CCOeB*l6g
z8+e5XZs-KXWoN6;%}l)$O|eEPjDHK@)hhsA?lv#N3~mG9u(lasaxx*$D1b;)omGFq
z0ef?^5U8?$f&u}gA53_$2oC)8V1Bw~xoTKsAWt!6rp|#9SM-+c#fujUo@?R$1QVqI
zMgUJg0cn#Bum%2VQ(`r=pwj5<6mPxSBxNSR1SJfdQ2>P_d?!Jj*aKgkBJU!yzkc~v
zAAQDpoTfgtsAvXw8y8U2fGTK9#A3Xyqp;eY#bdz<4LrdgP%+Z+@+M3cYo7eXSH`2H
zT+aJvY|tH7L6%ajDk(ic2kiDUm8;8H(bV{Yg18twMlridPXWj@;_%>SeSdp(Xu@N^
zPyo<lJ_ziS0DxrytRMo9cujT6$y~C+0G;@r*jQS=g~urpcH<!U|Bc`vCTphk_eA{0
zmF7VGdNyrP4B%jsbg}UHj7d>h*-%T1-^0E26>MV{z(<w4>oYZYWc-(ph6e`J>Xy8U
z`+xsd2lawFs65`O<tx1dkEJS?rcZAd)}00Fo{U0ZYefJJz09qSH~`Nb0zmN(_=`jz
zu7)SU(=7Hj-<{7}kLAcR($GkRQwVF{fpYdQgWrGm3M>rZO+7t;vfwu;CULy+xm*nq
z*|N0zm!n!o88#*fCSL;-ubjVqbJUiREgcC1R#E3wqWUPLYt_D0P0nf7EWzn)QuCx~
z`L3+1e?h}#^Y^i!bTl|BU?K;9Zw-8}%-|eK+q_1({@~}wGy=_7|J;7FM9`{k6F}4j
z_jx;^+rM3>d()A2^R*QqR8#of9`letMEczZhLiGE8g?LNWo5muGROY(>C-=)qYJBr
z0{G{1t&1}Em6%gIm8}wZI09=pJh}R!%V3WI3x<Mq^$icxGcbGstdG;lAaC8y45-)t
zp(5KEF;I`(-CS&e=;nsI2=|*HO9upuC^%@N9(uvi0e>@UN&F}O0ZClc2+{xlUohI<
zM|6Dw)@~QS=_<hR>Q%3G%~-y2hJV}!2<!PpMWFye<AdB3B~)Bo%<p^42Y?(xA8xyS
zoeL-$0AOY#2}c;<rKb-`7xsSg`0?W(LXi~0p|iGhJUl#xPzq6BGJo(dsLaEal^l#r
zOgn%@Xotmm*O|CYwFsL3PW0cPmsEQ~1wFn9wy&Cf`RWxtJNviw^>zO^GZ4cd@GA80
z-@pFr0Gxgq-@)2;o0^_h)6-J~0T%-Y2j_k3vuDp}U%W8lNkNLY(9<Iy9UXPun_@A9
zHX_3uz`{fOOdnaMJ0H213^jXv+GZa6NeAEI(f@C<@&9p$WI_^JVNOnv*+Bfam>3CL
zTjsh&cTz%qPouCKKXBM3`Yp-eDM6$Klv>%akQBgX%>a(=FErqTi~=abhT|Smj7QOl
zZ{dhKf9gdRO4H)d{%jQ-v|Rx}&=-J$`5qL3XgFkln;wSJ1htwy*+DSiwV5h00<;z!
z!~<M?Ap$C<E?b@x5J7%`ybk!quILx?h;FiQE8+`r`vS27cYq2v2$XXsY(z5h@+Kgj
zl;}2$9o@sZ1Q18)Uc5j_Q|bljC6V0(75F}4W_^8PXy_x~s{sRJHX=~QY*qA9n1JZ;
zv6=ZFPS0+X^gkWsU1kIHPDVAr!|n%j8y<csgHyIXm7wUhwz1J!uxBH(w6qjsn2dM<
zH|n2P1@g!;1Yw_of{-(~V*1u`Jd+z&gGn#vm+z(Cyg`8gC|9CW7Xk7l$;edNBTP(}
z>6hGGT=3`0Q%Va}%HL~e`bQC5Wy-sbxeKKE3M$u4eD|6tQ29zJnm(E>4bNHtt$KpE
zxHFm^;++B5KsKLiSK!7@B^peDsMyvOeM;~x@6<^^k8%}QwBq&wbl124W&%7?(gBd!
zJXb!VeU6AQ|GUyLbKt%?lr|uNEBq4>q>ch<@Z#C~upuN@KB3lgDK(Xh3@1cc8!$Z(
zKtj2Q;vlCCqPBxngZCq@)5h8wWK&x3hw2J}j|>OU#_S~FL|7l2$WLG}M?SKkkPtNH
zVUyknql)*y;Bnulzj^b<zm~*1^F1>G@DZeV%3m$>qXK(Nib_g2?|sF&CSM@teE$5|
z{K3<`sipfafkmeWl+YW#f#6`*0JXQ(u$6Ao9r~;pU^_U=Jiyw7HF8Zk2+Rc@dH@Os
zeDd~1GjNW<F+doDIF>kQZftBkboxX&MZX-a-IfQRfPf%M2pbh?a&i)(MX6~~Nm3FS
zynraY$A5#Am>5nv09FTKpsKk!_1CXo{;jPV#m}fRp#o`2t^#^uC;yy!vxQ00V&@5h
zYMbvJpXXmHwLAr<d{8(<rKcZy_%$s?^qInB!y+R=XnEvcRK#d(Vv<6>)e<SX*yv^x
zZw?{=*g878JT1|}i1lkJ5<)`49}b}a8BR=0AfF2C?d>tJu)Giv5i#U8%k&WpkBscK
z<w0Xs4(k?ob#)apFwhij#pxSY#j2Ua8z=94zR)MfaO&kV02Lt5bXcyVXz&o!uvlsm
zca3KqmGuV6$#&+)M?fryc?arM7^8qp=XRu~rnct$0Z>n{DIJHaRFlDE?ysc0I3rT+
zr3P(FTZy2G2OcM3Zf-8EABwltrkBxf;ENr(RC4EQNYV^LkKL?0xD3T7*A@aH9ak?4
zm4THt0*%7cADqRv_;@LOeF}hoOaW^H0lZ{%TG}9h8R(B58LxJJg0qnz{f7PgDH%Z_
z+z%4g<z%@pikO_-BjC{{o4=E2>FEKf19o8pfDtUqrWQPeNd#`r!4KyICEl&j>%)i6
zizcJPCnFhu{C=B%N|r<CmH1q^aR*A_7!VjR3_F7`zLE>Z1EL0GwGYW7Po6wk=y+^t
z_ZL)1-K&m{uyt8*DLLFTh0E$m44q=9afKlp5#pTwE6MhE@mAcNi_rqCfBBpi{2bR+
z^$;8$u*+KVH+-x~1RdsgX~mw1yISedi6NGnI2#*hE!*7(-;!dXHhLWS_Pd}~s(r^J
z?|HX3RvrJ#{8Oc|BNvYK_OEu6E$Pyum=o?`{42xHf7=$PX3Bq>qa<J8KLz!Dmi+1K
zC*svvu$=Rz67)xYBi3KVFjL9ge+eR|=-Xil{k^qU?me%w;=SwZjOT4c<d+`ER{w;T
z#S{gR{aiNm40*QXt;HmTX6gvyi+l<1+aAsz4on|T3f;dl`CAWjAlH<3Y7uJ_!SvU-
zd84}yZ*<YA8xvV4L7w>A^^SC%{M1$tcau+MH-D0>!4^u`_`;?`+7@?6Mn7W9sQ}|i
zbo<=-34b(4W^L<LaDR+8o2J!hdcwN6DcMi@bf^|sNFeBENosBtSrt}UBlP5HQa?ly
zP$ikv^s>g$uliArQuO|msDBXgVS(8v7xN(m_pIUiU_R(gUmqvvNrSV4`m1Z@O_DV9
zAKG<dC0_51Gv(unyRreOu-%``Bhgavdz`Wmmb2v~M|Zl7q{HiVzIgojYsxIF!A`@n
zvN~cuPqT(j(1lEw;~7uupIP%zWE)KUG7U{GWhSPvGMb~eCoAYHdyy~TN5>D~#;DdD
zhr_q=?<fRu^i&T&=&9r=APvh_k%LZbM+|(o*jOZMg}GP0ufEy{MGnq=a80?`<|Pb#
ze1Al&Or-`L<K;`fc$$M1fX!3`jod9U<d9HdozUpeOhL!}lKJ7U<S={LH=Ft9fI6GQ
zApEPpv-8@DldBHYK9<F=lJLZlLjePXEIzI@Ubbak{{4_UPFw8B{38DEnG$V3nxo`|
zScOFQ5RH^rKlDn6NyO3_3hnl2x7;bKrskli=A$2H6qy?qJ-jk7oQT@85TAJ}mumX*
z#KSq~=d2RFUIB^wI@K@%^7Op6p1IPI#SEiR#o(`4$|BGZs*B9?+br99*JOefpYz)I
zS}X1x(G4hhD$eqBkAXjZ5FWXVR0YOkbZ*7ubVemE%1{RzvhNxi`xu$-z7x$_)-dMn
z^b!;XEzwU1G&Qc85To<och4^F<pQjWzw>FP(Ddo0%PI3~+`qO-E*9Fc$_i8RN7L-s
zmo^&8SS{N!oa#znlTg5Wc2tNE78*j_h9YL5BDa5jrrmh)QZNB5dwDU2np58U!=(<3
zjB^XlQhCBS!i%c|Q4VFm@xQi?YJQokt)M1%wYW5jm{iVGE~cOFk)9Uk5K>%->@D1Q
zEf5<jp2m!v+pb@J=U!M9pt`L@kkxwiRPSAqX8Rbn+bHdUQ=5_Y2;MvrYf@gC1OqzG
zsISLfIO|^i4>y|Netg=}%W(IF5qT9+znL?{Oc%FH)>l><rf!1|fmFh$hew5dR@d4n
zJFgv1b5hn8u971t#arqgu_gr%*~F$LH%S&JsXjWDay7}b%-&>F?({BREx!2vfL>6Y
z8LaI87=PtPcw58=ll#(`NO<;0eqr`v1nPa{?wl^}N|zShG{OQvW)b7^P`(_4s{lFZ
zSORRwWN0?q;>`<+dyw{o44FRZlxl>AhROh<QpOkJ-joJdq3D<xb`qUL(R`#2fR;Gj
z9__M_O^JA-s~6m1zVRN4-LuW-r5ZA-{c-W>n>7A(RFjvl&jp@`MHmi=itTrl4h}h)
z!gTlPI2mP>eS#i{3j)=Vf|H#|zJ3o%xLvu>g(fVBX<fezC?43Zy7FU>dj@$M?@m2s
z?;|^RO&8aK=CogFFp_E062SjUU)pj5Bee$)BF-cyb9;*|zFYl3V<XOyH>L|I^3H*O
zP6jMEp1jwxG;^BJ!MMau<dtd*L#Sb~Mv5ser%4~to7KM=JoCzjixF`d8OolXo?@DG
zFIid5K_WvKQ2v|;I^Obtx=zWRJ#cS*xc3HF6hH*b_gc=*&IP5Vpb-A_<Hrwxrr*8$
z)L}>^EG!%osu1x%S^$7o08hE*Dge4)nD42!`|CW8mJqbEUlm*u@$K`4=SoUR;n{@J
zucvu9ohx*SY}0vgzT56)!VvKNx%K{8oWIYyq?c}Vc$r*^sLY%6YY90`gD}>)AXfHo
zu$~uQSuotpVn!OPBWYJ$RJ|w)6BEaE1!9F?uLz!aW%)N&-xJZ8$R14AdW(UVm2o|d
z8^O%weWLCf+yB=wOH?;}RC^UC0uo@~8hJ{zocqz0zH@Ze+KG;?(_^0f&~w?MiHQ%9
zZAxS8U_SD*+`vzE<E&v{0Q{Q)?fqwM4JqCLbgSB7PdX_vqM9WoxeP@6&8rX$Ig-$E
zMY|a{4u031>%M>gz8&T!kQNjijCv{%E|o1fEav+Q{@bR5wi<>M5)pO~F?M1A)&4Tm
z%H`vL2+9si+grktYJVm^Im#lpW+lo{9CZ8Jj?D4kjXI6Z5xfG{z#satxYLMVw%65X
zP8l0vssU|RKl1}??|J1h(Aa3LTAp+bmTs-4B{*BUup;m^s%dIUAJ34=6#Y8kdK}S&
zsurCwM_w#Dz*#~V6eOnWVF!~Rxz^vHoEi&`2z(d$dP7!R#5XNUKL0vZ3K+&Sfa69k
zo>OIVn~Go3fTHo^pC9-iEZZFNZH}N}VtZu5M+^_WA&HqR21O|f<eB1~hNdRr10Y90
zzv!|yEuG9bqEf=V0twzWNOnuyv!||(u2@Ge)nT(XbvztX)0G}Q6VuId7ZMv*Ke!4a
zo)b|DnWFX?FE(WOSk7aWy$W!~zl0}U*+?fNBojp5>ES7$H@<TE+h*PQz+sMl`K|KG
zDqB(w6%D6+aoQD3bdEL(B6{~}i>T$b;Gt{Cxc5;x?+RP*n(c_hFfGdpF<0KkRl2yi
zaHL6-4b9{!>H$4_cbzRmlJ?aJDdA=uL>7JYBfuS5G?Q92t9x<uUrv<Tj`a)5CzGdC
zQ9rW3C!L%s^=L9L7w>%AVU#XL%cxIkE@~bc?=jxqP3CRRExQCwe(p3OSfk>;YHkPX
z8_@_TEAV6)2^D^`R3V({VdAf@K^r6gh*}^P<R;hHWVzD3JS&#bgGh-UM>|h%_2#3W
zQ3tsdJN-u1w)Y&!5_Po0=k~JqNYoOJdsW^U_WX@!_E)K89QAom_}(pTzL(j><(le@
zdxik7?FE9to2Fv>RT%{G%o=HZQ>M(0A-sQTqWgUFV|ER)W~rR?Z#?oRk5<iNt#!Gk
zCT%B^&#@xroRwNScpU|wN&L_VG>sV7KRGy(=S;!N>H=Wn=d9zPuU>TdBq~4|zFZ)8
z-<j&xNKX&v&l%BXHBGBOp{83F;X7bqq$rB?Sh$*(*Flohq-1a@Id6=AR_FT_^kk^D
ztR+8JsjP7~7<FU3yhT{LG4Q-H8n&#Np<vt0FNk6z>1@u6M(6cdxHhKFizvk2we<?D
zog(wRrpBO_MBC;#$^_AdS*P|ZsFncAhqvMC|8`>PEIfcx&-iS2GBz&mTWsvlg$5Tu
z3Y0JV0D=Mfw*w8j{XGOY>UJg*o~Xc5$7t<Cd&N=p9L<r4^p3=0*E#9e@7f_&2`B9S
zJC?zQ88w?T_pPoI1mdorSyci}>*kY$y?GM!y%-*Dw?wsm7$N|6_77n0eJ_?9T<F6H
zUu}Zg*Bn6l--+yjvsD&qy8CVYcYx}l(W$lVu&%jYWc_L7Bv8FvCbm6jv3lZX!!1=-
zooGKN!(GlFzCfPWorH-onT(?sBkQh?t*V%-A+aHB5sUXnIv3QrK#yepD~ehnU5Gfa
zLrhzn<Ya5u0+6G=7Y0p$9t4#<=zRd%7IXo-0dX`_ZKYP_({}j>)0T%6^c(iK2k^E?
zHL2E1^Pb4Ts+Vz^riNc$2-@k`;vNLvz(`kC{bbPoM68gU#q&$uEUFCJ@)V!*mKC1*
zQ3B4$TkpBp^=66#k+4^_vbKf>L6iFdtf_6#oQ7+n&c){adm@l0MXUfT!K_so^a+!&
z*5Tuum05xB=fegEA8Xjzii8T!_?;E&6Gfg9w4%1@;5?iZx5uCKN;-^ezH^?WLLl4m
zhsXvzr}jMsOn_;{`{XlhjwFRaM2ZAb`}&V<cLxsY>Xw*Vo12@JfJTFNq%!W(J^x_9
z`^Z_@*@Ag1qC9D5-&NFh{C-9c7M)$~6bS8=u;e#bN9x))Yu>Qjg$ks16mVC)xR_4&
zJG(Eu5XINGnOrS!csRkNZfs^fwm$^rdsvm%NdhXH^~B4bNXp^u(QLqO;(|szyzLIp
z?=8bXj{+)d3)p&}9jmmWqGDC`wb#MmDExSrRN$?aE7Z_Q>s)xpb`MJcO7@I+ZGQ9;
zYiHL+rNap1KzIxW7{a}2@uBq%bUPI%_B*;+s4HbT)1*eWX04Zpm(JwWlyQyr=-%1=
zNlB5UrcPL8s-r=9F&X4~O&Z;vEO(QL{jrV=ZMC!C#MBfhsGf}6{BnCh<`|xvn^Rko
zwvgXK*0W>jNw*?$#ue9k9C75XD(~5}QCOutsu??m_c*XpTLov*Hluc-X>dsA=xWR(
z??JkBv5EXU=aC+@fc!rugItc=56QX@P{gyXK7v3ffl^UXLB{VK1TQy1-b2NuP_VOO
zQ5HHze${e1W)+v6nF%kuGq`e}3ZzBc-rkNguFV1^3epPL#>7~p)CioTb(<P(21dr`
zv^iIy1=Ml5+Xp-(0jCq)w2k8c`(O*OL}6D>KQ_HkP}t_{#ygMdSKT-IHZ!93_AQ{u
zdo8)HQ+W$oBdEN|zoDS9Zyog}kD$r#qPt=%b;?JKnOLmReia~(_a#*2yUn+2odVT#
zs**(*Q*em(BMfWDKQCFbMV=D`4$3*!Xy}+#YmDrL@h;v_$dOCjt#*}eCS_zqb{l_h
zZf+jj5Yj38h{=-z$Ubf6c$_;b__DO2h$f!6*F1Sz7q+=n?wQNp7Yd0=o8x;Y57{Mt
z1kZdzu3Hig+?&94b=`}1;}6;6qjN3uj{)04&Sogq4#UqXWPJ=&l9F6Ey@Hdxpfw+V
z40wRY6cjjchrU?$69pY+GN4OXNwP?%W@HTYSc3NYC>R-0gT@RU;78#UvF~t7dioOc
zpm|k=--9pcq2Aday##u)yxQ7$1cp9Id;P$omw}m7=&<W4&(&uyH7zFH-vWxW&(s*i
z4@I8X?*qkB9)`S(90c6XUoB8BDxd$DwC0peSkEpN`f}LL10l(K81j?iVTuhBQx`h>
z{+I|073i15emK{M4DCJg^N$G`c^B7Uk<GJpkl#D~aO~Z0FWvUyyR`B|j?UbN7N8aD
zu(hY&{%v!c$xT%AMr|N8lMZ$9ru_T-L#{V3w+F`EIIE?aFQ8r|rKQI~hm|Mc*cK2~
z2vH!Q{}7UwlY^7BG&I>J9F77jmlu!%8iOps#*FkUZ&^x#Z@=>{4v8~FgEIXH4LjwA
zzxt-<zHJ7c<$bug!c%6Du=T9{Oyp#GEB3-3ZTq*k=I5a?KyraTK3qDOr;zkVX`rvq
zik+%r{$Q|GxVyKv$;J;zf`~bLcpcHPv2!)c^=av9Z%Yr<2S0$GAq&ryTP*vF<tNAc
zA#dLW4=v;L;$)IKS8O#5dTHR}qN1QYwk<ztQA&9Rb6L5Yrh32=5UV>7{(OYN%K~Xq
z)QP+QWnZcsVdE@G^|Fvscb}f9lyy3wWsWgG!Q53KO~b_XPyN&}^dUh+-nIHABgHjN
zfP-jYDE@XUfVV#HQ#QMu?u75|6>S)6U?1J>&IJyj9gj<GS$$Bm_IQ6kEN{A=>{BgW
zI8dGJkL+PRLRAZzqO={c<ZH~xggv<~_m0kt(UdqZweYo1dAD~oMYJB{A=4B=utW-|
zwfXW%H-ioGv3LdkuIc=$!|b6(IFk*ceKKHZ;A$(NVBiE=l$W{h|L+`D8G9p0xKN;8
zst4V(#rf$Ht*&~fH3$*EvuTx(MwzZG$PsYO;7YNTsMWKEZL)A5f-L;@$)EYL7Ej}B
z-ndLVuB)NQD1|&I{S4oEJ-I<t@+_TtNC|EK_E#Mzhvk(<s{h8xYG{hlo>xZuuY*88
zu{zUb|8Db7e{?Sat_y`dp3CtQkgss3@cXEuE~M+1P<!3H*XW&R-2sYMVw%-(i*{AS
zhubio<b7K%a(0X2aqGU?vWuL>`}+8Du%58v=tM-Tz|~*+0j1A=90<#hdYQ*a3y{x6
zvqP@J74xn_R@ybA1ZJu-_|Jd0ey%?(F5UacRO9_#;0$xr%<ap=P<=fgh35(a$oOx7
z_ktwx707O_#`6%~YHP~@K`l@xQN4Wm8Dzssplew2+GBZwj}OG6FKMK{u4uLiv;}KU
z6LqmADQrvWqpJBG%tqai3%TT<U$(q++!~+z>jFvUMS;v&mWSe}pLI5m+b~kme$Bj<
za9ysTu+5z-87ZUQXPxDwX2-^{pGHiAJz-%x=DE@+b>FEGm4Fr!{P?YtHTZbl)sktZ
z$vPS4Jc3Ia*|$Kt7DD<Vuw;fJ$_MjnYJsrBPN1ZNzpn)KUcPoxe`vLkL$hWRkEkL|
zY2~DK`McaV-W1jWl;__49x6OoWQ(xlthG-m!WSo{xN3DLCgRI?^Rp;@OXC9T8=e%D
z^W`X<I@4XPc0X2L@THY0m7#>|(X7F6U*Eqb;(raK(2YM>PnP+OI*~ymU0zleLq$bJ
zM9~r$*x?E~6$eK&V8Q=5^6^1owBTB)MsMr+oYoYhYe3zN{t##n?3eMo?4|ExphxTn
zWBY|9N)|9oTe~JR4o+L!o%><1FH$oso*-o0q{XLupvN#le$5|we^Br!<sUpN74j*V
zQdl7$(e(|imC~@q1-pgw{@x2$p;>?4Z<%M<c07)L)UP()cL~~kJ`#{JUWmZf+nLFp
z&Ov@(>lEGg=mA01ENM!M=PBFGPSqch&Z9;Vm((MuuEh15EhiRT9g9z)_<Z<k48-Y*
zb$Tfu0@rID1*%P&^WFaHvzjmSEZ>|DU`I{h-d)_F%UMd$8&3!HtXz$K+sf~Ghd!F?
zngqcRE1#~Lk^W@>F=Zp#22x6C!s0!4WE+H!dx|$*(>1OxWLQ@ir`vl;RRtyJBKK{Z
zikhQ~RE#(gR|Z2UfQ55KIi;KwdXK?vRU`OHwB#Vche`KVH2q-Jd^jMhjA|#`taVGK
zFDT|sE2Shb{h{3aGgXMTi}Zyl%P?>HDC}lCgY84WgRd6%Gi1%U?|bnh&9fDGmC(PM
z&@r{4{iJ*Ykx_O9^Nh^AN)Nh4xwZ@BKC?I4?LLe=#rax;xmJUP7s8JK?FK<+c(Yy4
z$5P5=z$6bRGPQ?}n#jYq_A{@VfF?dq9z`ZA{1laiD)AUFPELhF7!Ma%x*mid&g0`Z
z?*^xJpkL0{o-AU|XR?#^6Ai35RzK!pIjeHq$ne25HHS)7Lmkjq4B30H&W%EMaI&jb
zVJRR@@cGU}y7iiOvZe<({eC)>$gi<?nLMF<<9~Z?a(RiJ$ktT{tvsIp@Vn#HNXpl!
zv1MMdN{wsxpF_W4ckRvMh&ik@6BqaDUV-$Eub9RTo8qm%>3*#0vVo1i#TRN5+e<u(
z+|#$b^&3OL@D>S>r@vC7@+yKgwnnL^2T3e<u9WcT7T{%Z%J{pohJ{Xx;4vzDSquBl
zpebMOgl$+iKH|1YQ;PJNo*B-=vQNZCk~S$Wkt7#HWkAw7B7JN;Cc>s0nX;Qy<RfEd
zZuNTL{?+POhE<w%OMi}n$~y7db^<iMt4cy)uEs24R6isXU;BER(x%3)Oq(qPskW>)
zwT|+ge$E-vF{@wNBtD*AEOvI;$WWZ0j15n$2#sEd2Pf$YI>rUF>0Z$rSbj+X^eygO
zCgRa6>?E!9w!5!QRAy0I(Coisx_r<Dm#<w*%~NCVQkrhsTCaTCbl{%LMl202b8bPf
zFb6n))_3#a%V8e3nc47!KxrQCq=*n}lRjqrrVs*d^H_72lTHBS2siPqC_?m7CO5Za
zw~c7oTn^pV+i&c1$cYy0!XI@$%Qm&<_@=Lv{z1N7!%#+B_+xUoDW&(0*w%N^<pZdG
zkhVCipU~!c>Ca<Kr}+@Sxq<DHZi4FGdvvl{M+4b_TdRGJTO3F~e(vJvGb**8_q>od
zCP_KC!Jf8$VX=^V$I}ZPTJ%&VDt)#s`aauj<^D$epl)yBFo1iC@!+bH_s%k=)mQv8
z>zh`bjAHQ*gnKVoRlI42MGIQK(dTuJ$lp;G^Ikrn`Uo_)wPY}6a`ak5C$)w}hNps3
zY7F-qo2s{DceFXfawYd(itIX<_KnW-W>|^tNKgxnsR>3c@u@HDj12KVKnsO;LrH9K
zNJ(7hKD?u*W9#*_{>IZ%G>P0;e|<kwB35FNR`aTbj;h7#BhRtb=ROZpodQ|RSaZ>$
zMf{uAiA~fS-&g9f);G3`=r?TvqMj&Atco``T5`!pG(p=OIjB3jX6RGN6}qAoi#5DQ
zB}_GQF=FbAA&GMYr`6J8Qv_vkOL`nt)#mJ$`!TR&K}b>fQE6_|rt93*x26*o$VoaZ
zwMs{I&1OL^#9)-a*@jB*b)`)(bow*TNI;fAF##78O=M-hBw0lLMu#*R`DfV;6C(wr
z_a)2Uv4#K|!s%O6Rz*S{3K;%@2h?g{&|mzs5<S9LgPg<PJ9bYNH#~bB1v}~bS>ENc
zc0n-?q8fj8kZZyN<mKp4w8ksK+mtY~I9dYw3Otn^*Pj?i&t;b}PXs-((!HE~SCuq@
z8+~JAWndSeSX|Q(4mtrWMAy9r)&6}uf3fwY6s_5DA>y6YH~oe|OT_O@yrJjEt-VV+
z8{W+}`njjy+0HG}PxC$|<7Ev*dEA|R@`%x-vbNEMqB54(a`bvhJ?(a%jt~FU_@r{A
zb=E^UIEdEPz@c}vik_OL$L1!Yji598gW$I?m%u&w0Qt|dn6BmZvz+}d^K_TSfx~3F
zA1-l;*hkg8W0y=(?T=QL-F(ZnCF4>gv>uohbVvzVhZ<E$zy5G2<`JD;b*K^X*QZ$~
z;_j=XxjMh_Nr8OA(+i%UN>kPy#5x#VZ76Nj9t^z*(f*9lcs3;xg(bDk`&(E#e9U{2
z>rJ;rzoDIh)1eE>fyyzP&A0Oz)rAj@VdN>Rq7Qq}LkH|S$EvM-hCmgYA~$TTZC-U9
zkzTW#8*NJZ>B<+GC=y<QQ)D>WcrONR#a(6f_Qy+I;^O0Z<XyOiVt^L5qc6sH6MvVg
z>;nq{CQx>j?w5{EPG{MSj5`Kzlz@KfrQ6(rJLn%XFfnxj(Vedt>Gs~P%4<da5JOJ&
zX=_n2=sWH>9(tS*q!0$2ex|t>M{0!y1!4@J^vjR_HtY5e3?L*A_bhz`DyX=)IE0qS
z<fW$TMlaV|?U4{;GC$!w<@+}hl33@znRl<lX?JV72R9J~iDQrl`xiWwy%q&oBLvU@
zxu8yuD@B+l)-oB)`cuKP?Hdv9K~Oo5%H#!NXQYobo?(+A*(AO46LtI7%YyhWYnJ_U
zEe6Kn{N3x(it6g>Z#6W67kxwd6Xvum7MuF|`u_3SU}Vb}&<!m+TwGiba0|f*F0m${
z<us%m9p|sFwn^yg)oKQjT51al3%_S&bvk6usDM5<kU^&MIU!1nD^*zhGOWo8I0dAa
zfi#dE&?|abV;S)wy0#F6p#a&UGm8Kj#LR52ptN>L;-JI0;_bV4&qi(vc3NzHXtPgP
zc2HONtR$mACZs)Re(5XAd~gs~UT=sjm!pKvZMaR%7Zep`)mtzERb5kVDkyy?tq7xC
zE$JcONgtHn*3{-@yvQTm$jv1xc}GMh5I{O$c<IHNi_}xYzYG)?ot>R?x7~%puK&8{
z-ynVRQc_Y?Tq%P+0A9$-%2ETK9yB_Dnhk7kPHrv=Gh1;<2`wLAA_CIfk5+ZT!t(Oo
z9oOh@-$s2Y|L_z~O}K|{ih>UQGCIv;hL25CnVD38e%S#7U+pm0QY)fg4u1eS7n_i9
zbLY4Fa_~?uUn1oQy_)-q%zc=Hiie4e0#P49bsa@3-)DJxdfFwY=&OiulTAm9&Fjrj
z<hOP0d7A7zsL~MOp8HsevYJOsN@CwNP_supwW0F|?x3OwoF%Kj_e+BwJsQ=@!Hwp7
zsD47$!1ePCOUy_ze}uFe=VtT0vM86DiP)<4z=)3ACb8KXdgQqf1~+t*UVayiT`>Sd
z6U|||C24}MzpO+Ckcy)yvi`I~qI#uk_1n5-hJ8)hH8!-wmg7RSr!YTTCKA`zt!h~H
zs2G5>j-N&xDr<;r=WL7S?}?%7y8E>43~x-|*4O9#bPT7JmlJKeYF5zym~b&c_=O>T
z!|nRgO57&Z@yUqPECzMsQ@9FCRH@9I=Lmyh*#8m6iC*ML&Ap*|vjyFS2$Jx8OCbBS
zX*`e;NhZJ<_CLM7+)1XYq6ZhVTmy~Cf0IQ#2Dj}ZY+4n<nKF0d^6pq;NI-5T`S-RW
zWkVCQb=Binw_ocyxXUnk*Xmk#u6rIke5czfV38!G`_CITan^kIp?hgZH<6Rp<n4Tt
zRDTxSKIHi-h|dnr8vA}|PMHv36dNP0j+n!cy$_^r>F}VPe_Z(bNfr~oMUMnH2lxyO
z81xG0FkAtnW9C3V#Byk$rq(6@s^urp76DO;LMq=kQitYmnN(jeKH+&@qtuLJ9p(=j
z{w43Dy94cI)uALyBcMtEb$gZbhFttBgCQUV2Qmi5u$$o&UfinL-%RDrov@7oz@*~J
z$1`IXegW|^+2--wkoB%J_eq1)XrC_N%o(;M;ojOaZQwTu<~zXn-_M^v{{p%8w>T!v
z>&3SJlky0GwE<7O`r8C(O6R8HBa8uIJu$y?j<U9b0u~UP`4T2p&)S0a%ykYi7AoNN
z5F2N*zzbv?My^HAM4F(9pliF9iXH`Go2-nCWjqBT;g(1D*7~|B@HxP~;zNKi87|x{
zRLjpA-w|kT2GTG()NG5|gUaVt=J;8d{PY4=f@C;ALsI3MJ^Fck=hXW*&{`#m_y|Ob
z-t&Q}s@H+2H8ZqZ(J3kO%!6+CU_?h;Tzn|=)FQ97mBJs0zIltW_g(^3O>AszR6@e^
z_0Yjj`#LpMRdFz~C1rvZP6x}*$w3BuF<@8PVa%+oJ=QhGRkMik7JyR{6Kf*^;~CL_
zv;#{<jNfYTIwBbe!*D`fB^l6wEmGLF9OMga_PQR?(bJ<bi&7^G&?C|U;dPyr0>g%f
zdh(*Z#o<DO(CLT-7|%h-AMY!AS6si9?)|>e)hg`j60=s<O0H1JDjn0bwX7KtnzjUW
z*J%Ctd;I3&DmkC2jk0U`#P-CEMBFi0x*=naNlvA#k7ho~A@e`Mik}{?OhRWSkgqxg
z+PBWGu9V!i|2GxV%s!Dn*a&kgo|JG9`arRbbZuw;U#Z`~5x09#f4>|c1jWj$T3ge1
zcdflH4iz2$YB3XeB{MQI%E-xe+Va%3x(F5)7sIFj>=zqRn3WU6?iD2@B!FEZ44$O6
zcrXpLYi;<F5Ux{|#o1g|EW!mpE?Ky$Qc|dBxD=<dE_`H92?@umEQZZ*G~c~@OiDV>
zEG#0DUQ|?Mz5`5vL0r71Axify%CzAzb>ml;=VCw^{~as{%)IueodXrB3tWYJLkAbp
z*MTv@3g0`gjOzcT3|e`_8iZ%Js=cofd<~e0RD4BS$gt1}7Q6)jDB2lYo`8UW_qDc3
z-Z$LSx?O6@!rfqu1jKryrDo{wRQ?!XqWkWGxw*OU=@0~)vM1u7H5%x`_7`zsxKOFe
z2J<V>@<?`}6WaycH=K9`w*m?fXu3YsJ^(4<@y-unAcqH&1VP=#9foXpfSBBVj!7*!
zPkV;OV;wmV3iKXXn0yw9w1j>q)CeqP6;oVM;8R$;YAnkxvTEj+Lr%Y^sZme|&GV0<
zff=A79n|b8l_ur2ZEbj<N}z%ZQeEjJ)@WmVNVY^7nWCv=KeLL#N2#|Tp@ts|Emb$3
z^iu~x;Gy^P(8giNQ#3*|n@C(grWz0uIN;GRi?bp4T5vzgCyf3s!*3@QhE5IncYW`3
zXb9=iBt^nA*(h2``e@MPqZUw;DcGvWrU=%5)Q42PWndSb$3rdf1wJt}8mAuL@r)Om
z?%>xp%L<q(1m-9Z!M7G9zmYQlzW{@NF<?{x(herz79x?pmzCl8AGJNCM@2<}X1JP~
zgprX^I}FKgboKW3!&fO9REZV)ULY_h!;uzCNaqI=qd>t3gSGqA1Gy<e`V9l#e?ig!
z?$Cj^=JX2frKAXC=nww(UNtvu6a6V&H5YELhakhXDbiH>9pqpMoADL(PgSKr_Ui7F
z+I+y^#l*xQ#b@9t%KERrU}WsSd!Ij!FqdKoTpkR{ZEtV;$931BmjfX!Gc&UxRO8({
zlv9BNcR?^t3#Pz~xJZ!V9e_a3yox_legx<SpVQFj`1t5u=zD`PRWU=0SPy};ZP!Ns
zO)EB{0`Rz&=|BwuJQd)8;oYT4Za(&zVj&Fbxtl1v+q0n73S@~5jN*dFWB;QCFlK=U
znd&2pff%Lo-S83n%GXD~&*Bpj@PQqTOi#PPGdn<KV376VpDzOiJ+QrPIK=4x<JAR(
zdARRcq%AF9Y7}#4PHPz$41*zP__!z^AD`hB0U==w*!o#pocHMfqydH*?ewg)v=l{K
zKP#W+W$wt{8NwhZEo~4OaQ5E>uU59QVmLlNw(vWe;w^N-<bLhSMQar{12iU`6P|V;
zaQ-AB2fE7F{$<m~6?o+2<o~lqfIfhg@&t}vRM=F^EooPgm~{_EI?a5i`=OX5ew+C3
zy9Uw*1;s)S$`mPxtpjp42O^PYTdrf?rDGXVf4X$~M#lpznz8Z4>(8>@wh<^*ni&}_
z8C?~3L=sJbh3tz)XepB}U-7aQ|MOOg4kL75PH|kZI<%7q@*NQHe2=ldipEMDtFQ>O
zJ~kI7tFL@?O!H68?ON;Ta-vP;!I?6pGM_tNrbuMda!v703sGH$+E@2KE3Z}K{?;I1
z%?rbbfG{fkvYi)q{f~zfkC6(I*R&8;q7cFGhu1FjF?5rHF28MaDBAvS8?Pw`31AYQ
z^5IgtZEG8h+kk1tOyxOnUb8=pkiZ!TT`ahetGa_n#S1np%|73kuDO@F3Nc#NP&Z|6
zEVd5}MxCj9)-NZ}lp~(%0q_6)r`HN(iEHpdR4{jRyUE*T)EE5%48)FDECUH6Tu5)e
z1t#Wk`M?m>O@nGE8s!JP;>rG3zQ>qAJT7p}0b+p9r=V;12?|b>bGvSyRWES~1vbmC
z1XNmb)#kMz1Ziqe4{_>ceX;vLNSZ5WTfJJ+6B857MZr+n8tB!y9bJHiE)q~TkLAk4
z$6o<d_}C2MlBJ)@aV*wwPf}u$+kV7J^4-JdQ;8>8!ax_flj6{71e%8;RzPXKKUY(%
z*n70l5CgR9W7*QU9&=#u>+<f%Z@Fv(XcGN7xC}aTDn0VHWNmTnVo7DB?WpIp5-g3Y
z-ZKwQU4k@ZrysTjMv(s0wcVB8Kz)5hz!KnEaNGG>?2xY4V8jNrqH(sh#lgUU#;M{`
z7Pb^gfP@OQ(Hu|9YtoH)zh4i|wBnyRr(Re`fl2b$uLr~ayecgOBM_kJ=yT1uv9(nN
z8cSeu$Mf!N8VZIB8a9({5_ui;o8omqsMIZ(ehS&-v)x1{`Qv3hjNRdzK=W8UwFUIZ
zbH;bx%B78hzSPa~)y6Boofr)rodGa|*70yV{7^p*QpQh!5%|EjScH++x&Yx&Q4vg5
zP44^n&ab|{?|FHfU;91jfUp_@pH!b=yZ{&ofF3ydil($AKXq|&0Zkhq8=k_oq+j-X
z5C+gEQQ@Ob7JO<33>5>3H<0Q(RcI6&Rv_DRyav);(4iI6G!e}>S_I9+Q+HB`TLm}S
z{c6F@61=6SrSW>X$0w}KGb0PfGiA5Ln-@&^8eU<Hs)HWr^XJb&AL>FD%u$w7Xaa}{
zAR<uBS130ErSVPhWSxTn7~R*#iULSORzbm5BLd9*e$2_C1%q@tENb4M!u$?;a$x-E
z$v**faZwRG_>do4)Xh8bk-@7iF0sm4+tb6v6u9bnqcHaQzb_rQobs@z2i-9Um_9&Z
z9tN{AZ-KCP<YxOrtA|@Wx~s-Pn2tnb>%`Q&@I~49>$j%pyT$Ka-<kbPAjDfjD1ZH5
z74l;$Ff$k#h=$7nh5|jbf#4f_;X}TVJrGC$7=RCeTugd=d=L0qk(2|2w{IQ$MP%+p
znC*BDQc}QV!?p~c=1Eqc>2o0=Au1>j7Z)B}{j*n6UJmA*zrqJ6CSwr}g^-}7bTJGo
z7vGA5nWLcEr5a#bA1QeY>T^UA*gksM`po{k#Ul-eEeX)nMg1e1soPQ0ng4AwZwGP#
z907)@#Xtzo^wH7!pBB@2HMAIv;lXtl3Mo9LvsXZ51pv%yn#Yn;{=r;L7;td&6Gos0
zNgyyD3qB1=NpTn_Z8x;7CHsQ+XW#7$NzVVSIWtCn`@dAt3*1a_Yy(E$&BB%!D4^?)
z37Frv&(+v``TRMvx?0_Qj6giq538pqVvbapyvD3khI&XFSOtVmA+a4qB%rnc9oqhU
zFK%k;4thSsMDT?R48w%ao^^w-Mge#m#l1;5({AhE*hHOM66DLn@RCojw92U;<Uv=l
zLH-2>HSI?(0Tn8eP-F~}CqXZ%WboNs->U!kc$fe3uVfD4vk!P5(cV0bdp`C?4E?(t
zMjCA_?Lw~JtDBp(`WKpGF5U_2H@+=YeYQzS^9?OWgTD_uT@!5gWXH_kHLNdFP*P^h
zJ^g|wuA7pQzXP>1*DOT)64ki!#3c%hD@;U*S$gS{E~=zN3A{mg0xnpH7k`8_X7f<H
z1t8LjqZbbU9=6G5nnf>EdrI|KT*k-TSD#eaIp}|G_c+wRufd?Mk%GUNi|gN;&t*Zt
z=j{rf#M!Rm2G3f44v*5#{_iD8yX?`^m@-qEoGIO1sc%Dh>Wp?{)!Hl&v0T+zy~CG1
zX!HVca%LxN`qC<e>TioDQ>WC5Co7@5XI?|Fcg2%$3)S(o4W19aPS<U1WsG~|e^-0(
zOQ^wKI=tlo_7%(1nmcIlIrZ-btY3#yJ2tDoYvn&zz4cd@0!9z!JDJCbf)OHn;oovY
zTJ89{R09c8X3#b?t9BSC_u_D=WkAapk~zK;m6C$_Q4I1;=BHLGhy%b`Q$^T?0LJ4;
z!UA&*sT<Iv^g{ZMh4gx=gn{pGU$(PhSwMNtYX_zaSM_xvxZerIDRUODZ0O|zvv--5
zEvgqm4s?nCze;-(Xe|G>ZS+E+GABu8Xi_ReNy$ux6qOJ%SCT0iGgQh{$<Tnxl)olq
zCdm*XMMyGdO2|BAEbQaw|32^j*8cXl*1Ok!*0Y|c#C_kc>-wF;aUREU@(I*$mlxVB
zR6cd~pBhX1&iBhvl#m7^Bi%5^VL5&q+vd#chc<O?sfGeaZ3~CfuP@i{|D|fP?PHJM
zl(22KpX>b4HO2;-z{QJF$JAWeTwPsfW@gB6F+;jCxOoNiG`p%%Q*y3ZBe2ac_s_Wa
z+-vJE-mbj|AIh6_e6*nCx16MXex~f;r`kuh;cVw)sd%RA^2hCXVyaWdCOWLVD?DdC
zWiLzfzL?w<As9X@5hh*fCOPmB{~y&BG-K{nMpwOmwA1}mwbv=tZU>8&ttI*|mvbnp
zDRnHEe%zfG*e9s*ZQE1b?ed#^Lj1P8_U{uUkCs6!x%)h{Q+GF?>$zIPym>{u*h)Np
zPX$EVSAD8ofAea|Z>dRscC9l@&zMq2&%)?*n5~9fN+kWlOqFD}z|-iZ2Z-nK$35ZG
zBY%T#Y*DoGBG2aqMxS%TrQf9KFE4r&$~FF;PBW_7v07mAVa0a_Xw6tb%~AxG_ji6?
z48z8f($c0^;rH)*!^h$Cu<Yag7lY6(t)-6Z>D>egqZ3525Vb98hYuIQ9OZzA4TFG>
zy*j_RQGbdqEzedq?v+l1EE^ifM{i3^eBYv|_RMgT{MSAwZa7dt3hIYuoqPYqwh~wY
z-Hnb80zt^&Rm&^=z3^-*v*5e7Fe_;H{S8f_lYa{oMZ}un@}HsX69xvifo#ORe93s>
z!UaQZT&Oqa3+`#8&~cW^|Bic3tq?V;A*V~HheOE*!f9p5f$l~|GU1VcQwk_%bWm=k
z>7U|fE3B@r?rDcp(;d7%9py687N{9@Pyo3@F_x{MpkN02<56ht9F6uVGIj>JJ;a;b
z9-)(^VHNrFVrMgBJQu<me!>@aHvyUGdOKklqSL$pRT<-o6$<vL7xH8_ZK8#+BvVgS
zC{!`Au3qE%3W1U}q51f{d*_)v*!XLrvQ{j;&QAwPS~|MjU!`SaVzuHEGUtKI1VZVT
zaV^`ng)7M*D_yS_Us8sM$I3(xC5n!s6l*23t?k@l*ZOTw4Ghg$?iHD){xN-@ApL3-
z?4#pCp%RXv<>i%?rMTFspmD6621EJc{pSq>G!)A7Hz-z~b;s}Q@$7bx{ARUJ&@C)1
z%)$5=RMn^*KAzok^U$WM@!di~Hw=aSHD7dzAM-OCl;vzXfog$<;<)9LA&nor9S|UC
ztNP7_kK<)l#|7T2IX@?SM%_nq#|PP8{#CSo@M3SX9h^PR&$63WZl&GA8_eKmW|cVv
zdc?`jk^#tk@)qU>9?D&l#V@||dwBl+pc>0%)W@^w>e(qrdK|vLyT)gp7Io2=<;n5w
z<gvqP)f}fgdYyD-kZ}ocIy7upwQ5!8@G#3Wtxx*V2ss8xVYVj?yH|MCvf4j7>WBAG
zd!|)2{`B=9Hj@{0Cex{bE1bQy^D_-EZ(@u3?jK?6t|N8q{A(k{$Wna5c<u75wfhpw
z_j}-H;pfwF|CnrAu3<{U8GxvlVSK^9di5FCsPtF<-IRP>O$W{ZbYSw3Iz9XIa%XRK
zjK5a=AtxvC^X&@(0Rf#oJ-TKW4UP83IGxV6tJLRbeAujlL)I}gRH+|Ld!52_2A$E5
zUB?_-Fpa&8dIgtvs32y8Z?oO<^bD`ujn{A3+*BWK__gof_<<X(1*lEf=<V6NVWFWq
z7v#qwPBw$~uDh+}7c#v9gh}AG^}l-M!={z;#zQvJf7uu5Es6;r!RA4gg?cwIimpVI
z33QWN@KAHv=&NX!7HE(JYJt=!?7o^>&qE##XiN0$N4Byxk;sKn@{E`N$o!4ql~Pm-
zeN)1A8X_c7k0~3dmbgGg_*hk?5-x1i*dZt<#|J+JWq>+TmpsrNK-d1RHF*fAj(J^z
z&P`~;pl;k1mJeFsFHlUob}BOpB+GA?Xl0GRvcIE!^Ye=NzT{>f?Q+}+>Q`&-e*7jt
z=ot9;_yShcT)%#utB9S7D17k>pY;boPL|ywNW!anJT^OPxUuoKE)N<(HVsk7XN-6x
z&o2+_ok8Q>x}irF;A^5M*1rCeAa&F7;@o`sg_BPXG45EaywQj4Ysd;8uhAk~UUw$Z
z`3J4b!}2@x_ONTQ9``!LD8L*gDZla-?LCd^S3ZNn8jahY(*7HDd#6mBZx}O3hyAu+
zs7W(&YLXM^ezFv}-=?#st+{>uykNxD@YdxL3uTh$HkFi~Sb8<1CjUmRH53z_Q=Gr+
zYAyDqxE{Mx>O>2VQKiaFBVk8AglPd5PELziS4(TEMdMv<;N}%+3QGk8J#Aube(unG
z52FC<2`}vr;DFhDWum8}YFC%Hwzk@aN!`{>Tu$Mc^w02wNz`3X^S}+miJ@?;0i;~^
z@KD+!be^%0h8!M_TgMFzKkG+>gM&h0Fs=JNumj58Bd!j81Q5`YVP=z!cn~LV1=22)
znK5$dmVPj-QEt?rZ@XVwIx^!gW10$$*18_kkH(J?d~(0ny!qS^xi)-KoSO+MWKnLW
z$g4M{a+3{v^rr;Zes=!(Hu{@OBKU~u?^fMjCG<UJZ;P-%l_FjOy_m!Wo<MrUCCmC(
z>kKk2i}dx;N>Nm7Y%Fq2<WGG=LncUeFUVvW?B-m#9I)n)?B&K2h(GLHT=W+&UaT7b
zW+^^8IjIacpzfUw><Bef<ki*FsJKxhWNtjsl0+jXGIa`%(v0?Kvt-5)$B$i3Yu2L<
zQ=t`L<2f@oKfa?k%$IHLA#$x@7P4mP=7@2uq=}(wN1Q~qDB3JN5`m%FF=z;?bGrFU
zmM`5_ANRYAX^Km?NJVkXJNJSde{OGHI@^yMB(+1_oc3eSv;rXS;(wF_LPA5AU%PgV
zV83u}TVr=pTiZt?LV{!u?gRGx5QsFMTj@&+zn^`Ykq2n*s`<!XnKPXk?Tl>u0tLF^
zV>cEK%J5ll8oO55kwdFsgaNAnC1_02V2YMES$}$Fw3zi0W&{q|lCHxWRPJ!on3$M&
zLdmao^k}A@EW8Zy7<566UkcdVerizv>5CUqZ-o#puFWiy&RhF6w6<#ZDgogS2pD)E
z{(&x5O;gkF`1V5(oI)Ue9ey&6MkYzRMEs~%;3?_Hvhj9Dhbf~ks}wO8Bl!ben!<KY
z#Z7lRYLCg40@$n435E1HOJ6Q8tcRc7ya=xiX{Ug!Y}+P0#de@BJ#C%LzStfFrMUy&
z&L=gNHoT)koGXE=6^R~g<$R2a=te4oj9PlG)7JF@=iVIh+7e#D&dqIkx2Laf1&UzT
zd8FFHvN8q~2HX-h+z{B_tc=*7QBho642<r&+`<f<S6}QM3`G>Itb}CR{n->h`)qo9
zFEGU6P05y_@n}DzHlKc+7r5h@7Ts7rIx*p`7)vOofExVZXGc+>jgG<9@C+>fkYFug
z0F|TUf(T++bI4Y4(BT$!tE@}M`qz<oZF4~UqZ51c)3$P~XuI6S3`I4IjQejd%@@Tm
zG8u!%KrEy@I3jwWp_6Zb&NTJ=yv4b5MV8_QPaiE7I1g~WD0Z~U;-T#4=bYc<<XpP<
zLNT^5O3Ky7PjKVml!ndlcQH?EH0OO32R(10si^#>yY^wdxSS+Y;wDs$=Xo@Ulj`|E
z#Yy4tILvfv+YGF7_jPT{W&Y*N$YezmoELFCTJpyCk9HCRl8Hlp{{BmaH!8Jz>d&D3
z%~XROVRy2~T{V7=Tf4sEU|xrSiz>vlswV!V;8eK;3ad+9AI#vd^~S!Fw)wTKwd=dK
zwzk2Mp8L7RDgNruZiI)g_4M>iFv;QJU|dE~A|fJ?N!i~MjHy<=bV=;R<hMR29P!bq
zsn?RvgSOmN>*(%Yj&G_Of1v&>MEzN0K>>~fbwKVpT&`-R<A>_+6h2vYERMEagz;nU
ziv7w0v;lTX>+RfvoR%tVeD>S7ZWVzN7jp03`{x)12pl}vJUoX)_2%*z83i?F;&*W2
zJqaQmDqY*xs~8#O(I@A0GqSL}K3r=l{>Eck7))$7PEPIagLPrEKI_@o3iU<&b91F0
zsBgt5w#wSReY?D=DSwH%*!kJETvt|-n-tRGp!Ip*&=6kR(C~frr*qL>eMeS=yJ@u~
z2}E-p-+BFoF(tgw|H*c~g79FPOZmHW9sm8r=J#B*v^Y=!`yobCRI#XDj>A^KAx+NZ
zS0lLhS}=tP8{I}BLp_%~3dhDWIG{2VIxYW>b7hyNVP_{?6)h70*Gk6|ThW8cdD8M$
z$-H%VoNTdqnebQdZ%+@ehEKtIoMUC2U~qP8o*h{$(E%$A)%JobL_#hb6AW%T4y_sD
z%*{J@4x#0X_8dP9X)_%KKt~io;||Zhl{iOV`kX#iR})V-$tXvpD&a~`kuCWwzodBH
z?3pO)UYMq?weUJC@~kFJ{}VjpfSBb`*WeN8=<E9=yo!nGtoP=Oh50|5QEDUgj80Ey
zKl&K?;DHaSlaJjFjJ}GRy*Tb_`CaY5J4c-MtrtiZE3w$d#`p!@K|sgH&uv>5dnR|j
zEw8Aq4!*G&(N$H?V)(%|QMNVKb5W>KZdxx_L)49wU+hTe{KByI!x!{WOHCCc70AKp
zK033l$*5m>Qy?$-zOQ<rS4@)zqq~i=`qU-9eq4&^CE}lZR*#Nelw{7h`yr6DP6DjH
zA0A%)%C`Q(&!v9tL&87jKjPT`=LZgLr+sf8X|z(&@J7Y=?SQy%u0H%KdPwDd=Hbn%
zGE48XqV)OtL+W`Ni$3Q3nUlrc;;gsC&A)kTtDxojLmBS%Goz?*5tG-KXOiM}gsM}8
zvlG3N9$Ig-;!9d{T@eW-Ke19U6trv0Taj<fYNk9P&=0Sn1`ubhrjv()psS?2b?Ana
z>#%*;d0cWzUO3z_>;>v~#aJ)(XQ(o*a-BPQeR#HdI;6AD@pr=$WxLLSl>a{Vq~AW!
zIMsOvNoBv?cX>56MocjzcOJn9)D~KU(@(?7%BnfOYsU_nnCjllXndbR5*M;J+pR;9
z-HE!1NX8%f&RAk#<9X!@I~&{a*>dNS4Tk=He!A%vtvhgNP`JMsy4h^k+HparW8y&}
zdu_YGu;;JMo1>atR$Ss*8B$In0Xc;fb?eakKL7aHCP$82%g}M$mtN<0{lyExr2{zn
z`?rs_Q1@>i%c}br@ds~haHH)IKNhM^JTxw#&=|N`M7Mpy_p_1_(`7fwERycU8PG3&
zRFb9fa$Sv#9xr#HWGO+Ln_vBXGy7eipwTB4o~2&fNBU<~tM4kGH+x7C;G?%k?tY+O
zquS;r;D+r;+tueW;wAt6y<6C1BcC#VsoZopZV46&@%J$=%>6tcB(cEo{s(Pus6U(F
zrAHL$u!)1BAJe)Ua5fVFRsX9k#Id7wr%cSy;No7M${7H|=m)-_eD&&8BQEH#d7h_o
zKRGQ|J5m!YKT_GsylKUyHPA!!IUOV(2r?d1C204EAfgi%T_vDRm$&wy0^}AmUxs@y
z*6Gi;NlYy5yR#geEr|u<6;Jf(A?i*8tW4Kxe+P%CkjCKuL!R$&i1Brr88x#M=gG8f
zslThk5AZ_ac#mOo!Ftm#QYpK94%~(C>V;;MqNit`vH`pmiGxAzz{U20jR1Eo;SlWm
zM`t^y+OcD0a1k;&e{QQ!2z}CExJHvv$JD;Z-iLU$hF|82FcelbHGQJ|_{EF!ImhUT
zr7^Ng0Q~N@hpOYvK^x~l_nuekjz*&xU3VwEC{oi`E2?=5K4mDo_n{0;gm+P)Y-H9h
zj|)@?a12a?;ayDO${xqtA336g{@@FS2~?rn;$LqJ%n<KR<!qRu-IJuds_^%h=5bmH
zRaCOn(c)X(`7F*FNxF{4$C7j_$PDj5#i_U&d3<Go(<RHGvTwlpYU7@8w}Wxy*<(4D
zNdL_$Ks-y6HTHfps@xwOG#tTYbj6}N#wPz=9khNviV7)ke_J7p@yG{xsV(#wEDqxJ
zm{oo^Dk>135CP(TW`$j2_7goCzb3y2mL~E=K1nvp{m^v({eQu57rqb_4u@Yhrusqw
zEL9PsM}PkuPFo2$F67hMkc#dl5Ux-2qoMJWe?ho}$>`+W2nYVv+?~R?ws*r$zw%Ry
z?a3SZP*P$~u($u062L}NZMQ+DJn{|I0n2S4OcaUPr|>(+UKY4Y-qB4mFoP+i%QVbM
zF5G`!XN4+s3A7UuN+@?uNJ8pTdZrZ%Z@2$0>5m;d_8-`8U?(E{bB*;?v<ev*N}xt{
z42y|~SdYYkL|mdD4UZAZ3#K+z<DV-klwd{qg3k_@Iz)lBZ6j;ImgjoRDPGStFXZ|h
z2*C3vCMNdAsgHx$u@Pe&V23S6aza8Y*NPY)LR^aoi-@SGI@8~hE%Mf7$SN%mxt#!9
zkR^nkX7M=n{!wlfz`bR(3Uxbmj!5&eQv7U;xaIt8_QuueZVrzwo+)4LRU35S%%=>6
ztj*V{RG#|YTi+w=Nf1NoLm4j$15@&lqBJn&riWqZA(WlDL5i`a9$KLOc*9j>6=AGA
z)!Km`4pg2X%oOOTw@!WL_sdXnenBOtp5jS7ZW%A#qT&%U9zsfqK0N?X*4gvt<KPc+
zc{rz+qTnWrep&<_Q(^A}`^JrdI3>{EZ$NHGbN&GshJMnDCcIF&04L3yf9(ctSyvfV
zpEoQ2_awb*8$UfgeR|PdeVAh2y?5^hAeWgxe~K$Blitr?gI=Fyrxa<Ac(}q>giy5)
zuev$<3<j#6%9KS^;Sp-_{yDtj<u5Ufr(?1D_T-}rEa97MTC&%VFpIOjU-xf7{pZO|
zwtxS$<Imtecb4@Ro-T!AE!lpJO@W+C3H(1lY94QsayLp<l>ICB@e0^Jn15<r&A4*k
z{BV!{*jpd*<I;X++~?Q6*FoLh_!O?s@NlK2a8=Kr%enGL0@ehIN=lL*@=sBYD#mvF
z-qEZQ#}2mw<e8q|oe$7+;&F80J3hD_V_LR=($KcaeBm9T&)5GA&EK0JcR`%TGRxqy
zo7<%@!7QUP2i=yTZ}F=Mq7~>KQjX*G#ZsK>0#k8eWo6*}r_hcLQ+&IoT;q(VEMT=4
zq#0&g>5JEml)GiLi%V$>$EC0r21$NuTYmhWtm`}Ks?@XR7+TxeKfcY$Xa4O=Ir&3t
z5_ERv_8|*XRHjYSy^QoE^-V7S;R1Zm_gG7y^&WjRk0=Z^C(!(&Tt9^I{tH2q9K#?G
z3^hGA2pi`yqWOb~tmD#$k9|F$*F!-5FI#LC50rF`MH($rwDM^dzF_m8PIak`{ZIFc
z`}gY0xdol$T3WPJr19(TdgJ0I995HBO9HzmpGj}@Z%*!-__;?;@zuY5Fq^rrUnB8#
zQhc@4D&t=HE)`EcY&!{TLQ?6!g_uCeN|}Jl<?|r!G{RC_oQE01DlzAN?Ke1xxq8l4
z6=A#KqwoPEqozzeg1Zbd88HV&u}B>9*}xjhQ>;IZIRz{nufrd>7>ysio)zJB*4T}*
z1nzk6;Ze`expE91l0?F%x$9Ehx<D0}UUGsl0lL`R(mmcqUZ|glS7gV;?60rSR>A)m
zHe0MNQLwN&1IP-ct|~|!#Mslv$H(3%Q0P;6`TMZlr$^zss3a8Hyh}t#=sgftlHxFB
zXG0$`m_FxlXL`6b0A_sh@!FgA?BPJGn{|L!Ctmf;0h7Bbw*`c|;Z15mTru&?iVB6^
zCNn%@9(Dk~dOz~{=hergi-^Ud&12ofb92)p)o8H1Yx<l%gP!*gXs#HbMpyI0iN}P)
z_d+pdwy^N$B8)xvAlsUgRRx3X?_!L4VS8v%05P*l>~YaOkM2B@cH1*CKiw{s_Dv@P
zCVIre_p`@Hei<gFO}nlEUw;Jea<P@ZHz3UY41DboC?e?<DaQHb_zv=x=ZT;7#dcYE
zmHD0!3N_S?(|R{QwSF(4LJI(-NAYW~dvw$yqY5DXBl0m}V@6uxVo0n5x+goiBdYhI
zCpd?I=Li!lc<;&{MH`l@d2Z5@sdwks%O|{kn!~}d?0dnLmQA+g^C1dGYX9^0oH)-T
z*l^zkIl^1nE7P%8(*he9xCStXig4=+-2X<{Jjtf@ZRjI7y(u@tW$h81fkl~$MW;vF
z&l88gfNop?44i+yy0eei;Htc+6$3>}f<_Bufcy9EnP+Ik$X<DfSrWoYPh7C9Ig*=I
z+Ft+|=dS)8Q!QKbmL9O<hcGvMDhV3&$L7RX4!*>Vtpx=ey3oD!psV4!Ksam*e<y&K
zC+G(IT`kxYxV}=sK>!0QH1p88XQ++TC#}cip!=y-T@tb(P!L|EE3Ur(XU!xWaLSq|
z@PozWqT*t@B_yEd^E#)#uchWWF)@6Ia6s`Scb?aPvzh$%OgW+I-U%`5`dtxDw+_)p
zX`)5E1B>=c0o~{xTytK}H=YUBxy;ZDleNu~lIxJlQ*-n1!Ev1m+uPeWX)FyXXkX^>
z26lQX^Xy+4VyK2J3rVB-{WkQFm<b3zcxeZ$!4&4955+IstK5S~Q&=b;AdWna+9mn+
zDM50nVW7tV`(YYtJLgJb7EO%cXeb0MiOFUCi(*sL)2hI$c7IJa&R<CYcg7Z}4;qif
zi{QS9U&jbU2S#N5kgza6c*vSzT>uFYOP>`q$*Lb0lXL^%SD#jvmhLY8dt!}%_~8~`
zxVbVeqm6KiJ<bR>dLNFuBnX4~KrM)EHO5yc*>(xz?C_p)QWMvx@^!+XO$mh~DYB4Q
z2^d3_%YE2%_oc#yGkf!mgTcn3&|k|&Scr1l^b0Q`Q8~iKNx-L`Jb6OF?cEFa08k``
z>ZCCRQzfpAw+nu=M9(-RllY34dEQf5ZGP_eg$~RJA3E>`o@%FEZjA@#5S$7zGC%L?
zrtF+@3^UarxSRffx9oe|uj_!ZIuHVg`Ks6Wz$ek8`Eno5<-?>EYZye~QC9rCDN(9e
zv}5eD;o1~k01~ouzc$az&eDi7Z!UnJAz{&a&AHvVHrFSJ(WY6+O?DdtoUf0)I~6J1
z)Z5xZzJ|ithu>XWENHE`GZ26=83^qgqobpVr6x|5Hy)8LG>7D`fC~gd)lcZ@S*As~
z4Xa;Ccb~L68g+0DjwL;XajGe742j6qYdkpZ=r(VHwY~}>%1tDlGhd!4aP>_E;U5xR
zZBK8yE5C(9|7n(G4J|wdss45`S*KrYR~P49$;@w4KJMl>+)WEk{v0^8@*$0)(hvBJ
zvJqq7=EKhtWYYBR5OxDRgcXn<YSPag5w~sT)QabXH?9vlLlEBV`@U`s7=jmfzfnFU
z(KbEjFvtR?fafFAs;>NbG!y}dQjSf3XmrX@e%%BXM21ij737D<ekccI&NaS{`{y5<
z{=1|^Y=}QrHUqSSxIl7=7~AaqZKN;5(?5HgZ7;6nQ5f#xf)y`ZL8<#a&uulRq?)^Q
zhHyIs#KdZTm|!mQ1)dUc(EA6E86nF8=4njy<n}9q@%i1ykNbaqJ#|b`k(QVP1;qNJ
z5d+>rixDKc>MvGVWH(0cUoeCCVAy8oDyw?M0kI3ofG+0RLQZQpJSqq`=n0wXp{61C
zo`lbHV%D}8c7js8L;c7Rnuk5QyTPfB`fX?uG=`fihT#n?CrXBOJdl)>q)ph(Kb+xt
zUlL4z=wDbDEd#;QQR>6Ve8Qo3_}eKYO}%$lWeWa|ACa&HZiMzt_5X&G^6-34+69&W
zC=DR``a_2fbs!kpySfr@<=b9f6jcZ?Rt$?vP)sVt)p%9+o<Sps8qCGvQzG({DV`PV
z*O1%wRm$m?*TaY05oUyqLUJJG1u)MXU@InVh20h5Vs7JS!&z>?6)??Z$iSuGZW9h_
z5DakzkZRHXT*m&1-NU<#`GUr<Jmk_)^S%h<-2g`L9W*4mbtAe7;^t`se#G&?qJ=0U
zPEGLD;YVGBIsylwN?0>cjL92t+|1mZ;x4WhBQo1kjQ!G6oE-~_1<F3m?S!Ta)F-d!
zB+V_=$CbkenY+19=>~srFWAJq{y!zGCCmRZPWYcyM*sQI|6@(|pW^D-g6xj;>j&Li
zIzs0C+{f7ZmB&O;eQB-15%P-U*xc5fcuCLxZ2i7NUbDCCIJc7uEYwn)=4(w0{hh`H
zhmQsOr3bDj3*6wb{ZqR&zu0?oq4r{oaNn3!WMiir`R>U_#k5bA*BSi1Z~Z4$?qwER
zbvC>HQ`>@<#bHO>Bb=4;-5$2Lw4P9QTG!LC<6D6L?5&{qksn<K8}e;MyVvxF>_W*G
zLSJk~^HnZ~J~w~USf#CpzQd7qxdSfBmF{Qz6OW5r4fTlSI%i4qq&#0Cz}ip<@A`DC
z^X$!}LI10cJE|?*@IU5J_-!22xrJ}*ifqicE&QkJIkiLf(DANV8B$a>_Ks5V);Fp3
z(UULjqk3Cz?(%C@vC$EDQ8MP}&>t7c5mzdmz<yfiz=87n+l*J^N1R`t(D*vcv#Bs^
zht~PEak`56Gm`}@xIH%$<Lns7YqITR8BpN)W^i++hc?$SXhV*zvV;1anNL-94$fxw
z)@D(xj@=Kqt@EmzZI0mE#a&odg}f5!?zL*U{quq7;Lo17Tzs44U!H0mx>%yS=10UT
zE6?hGaBc6NHZS>oKy0}>-b`&@jo^y_v19L(1J<kB6}-GA+S#hdu$8oYJi%7e{9;Gv
z^Z!PhaTh9dpXYwwlM<L?$U1fRq>bsdls?&}w|nGW(rUJ`k_(phV@xRK{mBbn+Ec}u
z1=ms~!fD%I9PH7o8>^hz$7)e1JrkqgxP$eNs{4<j`TpkRdzrjv*ktQpJhn?qeRGs|
za{d4ZUM8zZ^p0}KSrO9&QTaXBLiXt9uhW_JYIQj5|6Qw=Vc|)AUBpT42-}@sKiB@Q
zW@qwy%z8^qrP%u#>-2>FF?NR|clNc7IMzrX9oS-#ogEIGiS29SLvI1z;If4bmWN&|
zX`iMmuosJx;UG_-i~p6u<wxb_6W@>Jsz`HvR$aV#`y!`)c#CdN<HoXO43+tpErpYs
z9tg*2uHwEU+RyaGimz>r)EDM2_4b-MldrqG707izTYo)Ra{G^oS(NaD?ezoA%pv7d
zoa+Lg8#7JF4e-b3wQcd%q*FQXr6A~KV3c1mX`NxKyl0q4^n(A}s;N`;37oFt?De}w
z?X|DuRC$wLTe&`8#Z#7#_PW>nd6oy|Q(MNkrPzZcsXYwJNFG}8eZ!T&SWD0a*bje~
zwA}ft?TxXek0OKGA$gyRFqOB$8*m<^Z&<Z)@8Y)i;S9H4T3lCk?oZrDH3Z!G657X@
zsNNRju7Qzs$ejExt~k^(+~SsOfNyCiRHWz@ZJubq-v8-2!zXvWO3Za$)Y4E~!iF~y
z<_Zj!f928OtFkB<mY&Huef42Q9n;D>R=Tym$p-q`k8V~g`J|kgg1gttw<WZN)lvS^
zzI~Gk8m21bQgH#S#lA9Z!6+#WSUZM&2?{T2^?wUJ8oQi#yO$r`5wavI^}+8s8O@lq
z|6)r0H5AgAGZI_%>&=swyV5AORsGjQ*?-QavT$x7;h~PS6LA<{{`#J?S8DAi_kUcT
z-OsF4_=<~@5SpV#sSQ4BS=Y)wrlBNPzvai<#nDs0Hta3g_G5xS**m9NXLro(WJ3R(
z!P`?H1GBwvhHpHqsW6!yH1^W%KHQAmREfGD>fG{tW1`IVtJ)84c=~u+^Kgmub|)sz
zg9l!Do|_;8R-RnP-YR`VPkOyJzIgTU+4X@RRtlbNSj)lAx{=mMM4oZ{r$X$x4YHqq
z4cJLalLb-Yx&}%0Kg$0KP4b88B&u452nCFGOWDk@zRVSnCkHAPdzIp+G8%7oeetoz
z7fSYhVZMYP{2#Tp|KkAuf9{U|f4zh_BTS-|K*Inc1n}%<h&1uhID7W2DRez7P1kIp
zSV)^Bdzzw*HfRN*XoZzO8>k!3E+-h%#GHN|pOXI3-K{e)?>zi{9qPkU;8FZs0RvBb
zzH=PRrILRDQX)(flra#m`QYlORiLs?^O&}y3U9C3nTjg${F;m^fGcLGLv98Hc%xbd
z)*W*9?sd5EccDnB5{@<k%~l0u68p)19W)JPO{MX#VFl6&qBbo>QvU&`Um>OAXe2uq
z7e*E-3f!6Zz)BB@Y(52Qh$!05OzQ>~US8gh7_Ot>NVBNqfsTld!e9>V^(qQ4z*ZF_
zH&ViJnk<O|yr<LWt{SwV2xec<noyX0!3P^#Irug_2oXOEGxY3PMYJ-Py35yKKiu+n
ziwjbM-P=cGc=-5OK{LQ^yiErVaHGxs@l@Iww{It~LBfb7@8)L~X|%z=zd36ssgh84
zO6=z6r+|55@?L_m@1_24I|ReN6aeRoLoq_|{(PnoFwZLK)zG75uLwbrSG8cwH*iV^
z^yLzO+Pl93yb^=47Ai<{lzk{X>>VA?G~}XqhaW^8IN+k#n**v|CnoyNE|=8ky8HTW
zVV4v6q`*6I#T+h2cjv2SLwE?SU>C@hlfnmRx67xK-6v%8#MM7T`_Hc}jm2yqbc1ES
ziq}O7W@ypCDS*g?O;amSJ3(Ry|7Bx)7qRt`fE@*-q}D^B!&Ssajm~;H4K=T!>=*vJ
zk*<B6veO&Xdt^94*`xMCow*FOEHbD-HO9`taRa<IVt%eEe4%s-FbNh#q+UA)gEm|O
zRZyB?GG6{XJS>csBA_y=Mn_}Kuc;ehT#wN`jp^JyF<6O{?gDH1ut|>&O>mS}G9J=S
zETwT&Z*#uh!ynq!9d6=>$${?xkYDm{jnG44!{kliJqDi_F+Dn4<`{!L1fDnq2kbj2
zY_<soEp4@V8aE!u_ZV;!mu~EU<3@DL`nY-fc1FG{zu951?u*?rUAZF#N*E<44U~0_
zeL)|xG!h@fW}+YH6=tyDf#}ch904^dY-w4K-~Q|XL~Q|(tF46V)CY6{glvU!(?6*N
zaQ-NkCAHky50XL`8UC_tIN*h|K^7H(We}y2BV<r5UHu#n4dn%(0VlZwO>9=9D)5z2
zpgcd28a}}8+za~T)4SFNls?>ZZspba$y2Z%TlVhn_$mO-bh3+`E!R<ZqlYAwHrW7(
z#*0BKKC?b0_fgLI?c(B`xT!You&L`97+6IOrV1Z-JDt&s7_W+8a&@+Qtwa*qn8CCS
zShX23{L)1?8c;p@ZmJO1&imcM+$!w45FykuOq&$|_Tv%bhTa3i=`uT*=I_Gtu{-m*
z>1Xt(WS~H?gpK=3=>JNvhs+NvDjMGMI^=91IMX7Lbx}<8-pX%hTP7g-Ab|rru4v_>
z^0LYPh*u`>c*%vtwIftCj0+mx+C5#^zGH`3OEbQh>ePpM`NYFn&lMD!GAR4jky}hI
zD!4J2aldj}%y*xBPl%TI;t!0HO2E#6Qi+*bQ=ppEE1sm6MvJD0JYu;87(}q}$=<;l
zC9;ao=Izc*>wblFP%GfX{RQ9%=sdCChd%+B_XLavo<Bu#2dZRVZ0Ch~;4)xy=-gNW
zCeK$sUJe@a)CiUo&=|dSGF!jSl*eEejGd;7(KIwPgbe~BXp=MG0e^A|abYBINKSXT
z?_u(WnIUNoiFq8H!b#Ai3bB6AmuMh|UC1uSff_WMWY`b0B-8e@n42p3F~N<CM(Wf~
zYzsu3&SI2DkO?BK)!*?7lmf55y5o2vP>+s})5cVT>*=i(?_->jy}b)##3t6eA3v&*
z<#KTCS42MuSeuk0*nAXeB!>rv>;=Uq28EZU@Y`xrUV6IYz4xsEr&I<txenAbqaU-m
zQjtuj{Vqu-d~=Jw8vq4D>gk;z!0Zp;yTGlNQ3286WGf(A*uh&To@wFHp^*;_e%1tb
zaFdxM;3cBHz=ssx+gGq{WcPrXIyh9=&&3E5xPpPf<4O;2aTfJw_vVcf>u5zQ84TXo
z$Cf_0jpZ%0!P4W5*b1g^chMOG7}=Q)I_kM&Fwg?`-@mTooRc^L48I#UeW?8%AKC5(
zD4?UamjURODzBT5j0xWXd&+Cz-&?MmNRkXyINDhZ3pzX3eD=*Mg}~=<5s8&#Ywmu4
z4~^&Q2@?uuh3o>farHo_jNt2!XS^%r-2$y?zk=AbHdGqSm=0kWc^P^kF|6*w@nDkX
zt={xCIbP)5Uk-Y28bqH`ph0QazJR4;x{Z!5So3j*SLX4?ITXAE4<`g_KxKrNBM$_V
zq5vL@<V~>PPT?H*ft}NdU`R~vkrc5!FiI%Jwd(^je(Yb6Zm=br6%caT!{e+EX#+rp
z?gHewoWiM=#+o|n=;|kxd@m7t$@;*i!(ULXDr0~K^wE!bJw5fYyj&3!DddhKO`hac
zWV{Aco~n5A5=%^|UH%OTE%gZ%HAA1H(M-GWYC>rBUx*I_o?{<nuRJUzB3u0i+mQ$k
zfd}K2u(2W?)VJ$E4tS6}ZPo+ApbIW@RNI)C_;w@-I|f}IIsgtJEH839p|V945OgI<
zt*dB>@E?Tn7xi4^FTfYt;Id*=7N{ugk<p3Z>mfjJfX1spF~Yivg6~%zPfz0TKf#8<
z%UH4%TVvS)wiq<zIy1akuYqN}js#DSDlG5^-x<WOF0G-*v|FU=Q%r)fqYlW@>z&_G
zw^oEaeta9ZcQbGT@RblaC{sssdM`rlLmr<2;7?`*6ag^C5Pb=Km8+N^XA`#(^}#ag
zFM={zkc7u4721AE9`NK)&&iD>%8{iP$gjw?WGgVy?~P(~-i4iD*}0Vfk}lyNu`-8s
zF#EwqDVGjBn+F_wnEo&qdY)584c$31!b<7EdsQ<=HjqfxFxsGw%18ra(lJgEBJ?8o
zqIO}GU=flqjkNT(9{E08HWH4c7L^n0ZdCVf78YiKi0F)W36>6bB5{yy5=e(Sfv2(^
zddyxUqTDPs2VC+GmiGMT#(M`KMPNl#2YzGn1b%OAt$_dxWH&aXy3Gf%@~Ll1M>6c(
z5Fld!#9*b%;>xm8mQCqfTZSW(aKDk#l=#_T@<pD@Qp`V6u@v;RZAn>~)Qe3%j<;^z
zf((o-mQ2IaMog1wy!s6F3=Quf#r}hF26ePI6OcTJEOaoGrJ(|h^{lZ|N(HfiY|;di
z01JBQphQ@9e14)D_!|KMsY2`zwiuhhS-nwX=@jpxp=9HN&WO+lu)%06LPycnM3E!g
z?#E(;LAIijA^@mgTKl0&$DKmWa;A@8m`2f4!^6XbM|fxx4Mlc5Yz*45Q&cpy{{H3a
zNJ(#;wT#uZ&>eRU4F$rqk^zixB=m$6GT*;{rydq`KYrvPL+Ks0VP3ULNnTz)v_5e-
zi9wtvP}%MC#Fc|CgC7ez%U3corv5td#&dShpWGVDSGsMX#DJaTdn6jC{%<Q578Yzq
z4mVNp61GWu46`!E|Dx7F++`qn05x+QoEt&PSWYI;$WrNoCp{7zc2%6={kqHx(@b#i
z-*IKV^w4p;_-H_cb0x8ZjJWb(<DRX7)yM*$jV-mOq-~-<8|f<BDsQ`Qh85kv8dCVs
zFj!o=_VjV&HIjBA$LYZC8gFf(bMH7GdxRUt8=dsh4k85u9&Z%@vSXNjHSXe*^*qp$
z>x8#3%ynT#f>eNhV_?f-dRK|-@OQJ`^AFDPo=#qYnWn_tJBRL-3-i+^e{m8B4K-|+
z{tcFm2Tj4G0oj$$D$+kIz3aodcD?%SlP~+-_nBQa0wupmARrwadjU>a&(q&}_9cG?
zV+IPQ07U0F)Q~pE{3h$HgKVsW?9O+?Oz(P1iU?+2IFt7HP^3%3OnnBPzM$|2Vmd~4
zTZe|_|8qt^dhT-VLF*ougoc6Lv~GJR*n}_4d_I*nJ2yuP4l-Z-5=*z9KlDnBNxplQ
z!AKueHP~}7y9HBXB0j;8lLYvg<xe+(eq9DSFvwJXXnjz!DJAK4VZJ~aCX&l`9sL?P
z>@Mry!=ixZ;oBkXb+)CRM=aFocsVUDTmf>9qqsrVlN%thWbS87ria$W$7412gQ$`@
zj9fDupFrP53P5sRPyl|oNSl$)_Zy)82OK0C3gajncRRaeWmW^_fx3D?#-wlue+F%n
z-_V*LW0%4B0z7>g`??z^Vf3(bu=HgTs$5gdLGVfaTN4|+9%t{Hur`?bX<1n_^hz34
za~a|ia{t|j4?k8_pvdHbjKQaj*W*rpdAc;BKzHSfogSA;KymGVuC-xrcm#@7Twktm
z2dVK!_J%VBx0yww<OsC_JMggFiMU5{*?K~m$yK{t$Z98^^ufAx=y)yOiK6yM`VhlR
zx)@E(pi;v)j8#$<B6en3KlawX4$83^Xwxp?kNg!;y?{CtfKV2Tli(bdt@+|i?*=l=
zMG+;B=`RQ(x6!jUxI5SpWkYo*EEP$j!d7y!M9JlSlvey%k<-a!`Q%i@UtA(AFbW_Q
zQodp*o~t)#1wtF>3XKH_HG_oAfv?9LN7>h{y9HwoR;YUv(UyyyuiD^=b>etZ4qTZN
z|6`$X<cK%QPF(fIKdbAuI=UIWw$Zoq(yo1N!CKIMhoUy-96qF=EnBl}`Emga^bmHl
z9ytyDa?#~=Y>))I7BUVD)d`V+EHcfv_0IWgmGzkU>Otpz9~?+|em3nsy|8`QiH!D9
z0O1`2bXwDunDaeS>i=bl^`0~sGfm7AeGd5<6oQ1cPByW&tiE7~{cX&CW<UkRuriW*
z9?yxpfneJJnRzBgJj7gsnBAZi&HW?vhj%42voEN?m=d3HwDuZri|ZDPal~+uwxe^D
zr^8^#z*6Rgw;4C8X8aeS_GO*b_Rw<rMcWcj`*nV4F9%4D1*2D`0#y;T0tA9XVntJB
zJ#oDU0-!*ZGh*6t4FgT{%@v6@tXNd#trAz1=Qcs^21fMFO#9G~{hfZdm^7ApAOD2O
zw?8PBo40IPhCQE;Di5N9Ry}<9R%`mB3X`dQVvEKStbr;-{l<h;tOVJOhwl?Nlf~(4
ziws!?%vlCT&nBr3)KQmz-c^O2$1RZiSfClo<PNK~%96muIuLaw8OabXEoqXmJQtfV
z&Sl@ncx8NC5eX69xIZd142#&&jiCNM-#f3DeYcr-c}jhzr_fHNR7xi%WbX4QvqUC;
zc+gpB87+;6vWb3=MBhhbSJY#j=thfi>YAQB+lAREw0UW^U^5)CY<ayKs3P=lchH9l
zZ9L+uSo9oOHtl|OC3-ad7rn2?t2SHIk34G5%N90#<Awgx3>MwL8q+oxEX-L4IKULB
z@%ytV@m=-3`wPVny~F6A01+6;k}efGZurCVnZjX!5m_jQ#+s1*{?1&An6O08fgOo8
zIgaPC&HjDQs{1*rFY=}>-FmyhwOftDc($Uzb;Q_ZUBl2pfC-qnU|8g@wuM}R>S@J+
z^{XG#fxbt9iHB?Lt9Zg}{}m<Eh0(pP)e#r63<#=#VW}8(^3ZOMTcc>Wz010=dv^_n
zfn<zsVX@8Ft$Pj1GeBLj@Yrq*mghH}wiZcAFM8l+1aTM01ni^^!0Zjk)uoX(8Ni;a
z6HleQZhXFt<SfjdJF%AX5Gqa7Pr;xE8Wwo21J|SsyU9R@9}G*fk@=i*Yu0tIV>7CK
z0!yHx<V6tmtFbHr10oHZb|WPnpEnO+#UwpiYbc<g5A#usP32Z+U+KH1pdPK85JH?c
z$-}@{(7qO-h$PiMsL()=UZDx~ia(_+6uQ*nBUy<;{2d{ob-L$Mb5DEHv{C45-Kmzx
zISa^-Mc6^+3;Zk=uLNg+B8+fJR6!+Q)vWJE+BWpBWX<}<gHBMzD1i8&87+MadJZyh
z9<&|<4A$k%Dl3m}rKQ`hfLTGgEZruTjQvQgL;m1gzI-`~!lc%QT#0h2U_Fuj@6KEO
zX+uk#LI{e4?uOI>2{0)?$pTNZ$hs&ddwuSBa|=EWLO&yhxI{4kg#{UI8oT#xh3geN
zg1(>Lt`C4`QnB+GOpZ01?tUD0`<ay8_8t+Hup?3DgVq8;AUB8tm_2Iyj-2rLL+6FA
zR$<%yqV{$^fZGyc^x;DX0lT61MezmR6&vF+8?Ta++c~*Rm@8I=e6E3m&bPIA;0CiR
zQ6K7D*4c)Elm5G_S8`}LDi!_}?ELREs4E}>Glyx#zxsLx^PUnOC~yG_|1W%|{~)%8
z67qZ6P0Rm2y+r<<E&0voEkhtl9PMjZ73_LTNxowH%?MRmY2GI%6a=eP7rG}7xNKV8
zJ}7H5Mro6u2zlG{oBD(~B;Oq#pXkFo*!+}Rk~?~6PS_t%yiq)6Sbg^Qwc$+*%@cyM
z?Y|>R<D{2R>Hga%>7EL2Y&Ng@CceyhXhQVFiDib8beunF5ANpYprfh0{!35(2c21k
z#4nj-=H_f$@9gla-nDB*q*7=BYBOF4iR9vU?)$=J^7-M)5mk?e9K)@&E*GgiF5&M^
zd_P%ua<kX-PR9NPajqlzqxJ9C%=CWKw_B+}KCepAj{BKcJUvCvTQwJV{ECSx%)7LP
zLDlQRY~H2klQfr0T}sQkRQ09PBDAX^s13#e`GLZu2%wPI)xVdgR@9J6i$v;FY_TS{
zm}qqH4CWH@jWX7{<t(CnZ*pEvOM?x*zSszD+W#%SyQeAHNQ!_R-~uFGd1iD7@sVr`
zB@{0t)M7lZ%S1#{R%VZljuu*{<oh8*b`qcr;PV)^GpAQ1PBhcW6NNAY$Y^}d0B&=c
zaQbO&653@_C!rPk_FEP<+Vbz+yBgW-j)p!W+Su>fW2c|Wp)&?{atK9+b!0Drz(0Tf
zER+-h3mp^zOuW)>KFsRqfF_e>)3j#;&;xAc{cvqzg3kv`cx%2j8waqhRk!iAKg&jg
z;j4-6(TR4av4UA-0okb&k-QPBG1{(78v)j$qoBnU+BJg9lQ35PAZ1&xz1BX>K7~2c
zZ%_P-triEDErc6S2CnProE!T@Zjh{7-zE#;Kd*;ZH5X0}wX8++^MMc@NU<Gt(D^-p
z=-4`|F~~-nT2*#&ox!r+|4Q<pN`e4)lVkx9=D-2a%Xi;LP`?OUe9+q{u1TDrya=o@
z+EE1Xfg!l;Ol^gGfBXy4OFs?)YArSu?Z5_Y=UI*e8bAp!2J!=yu8Rm$<>e{kE0a2C
zk~+a}-|d=<C`IxfvThkr-kNvUu0e@Ll->vMRD_y%P()fT7~^>Z2fP!|>W8L`FqyQV
zt3!zne-qO+b^3p7N89EDo;Yq{nr(}OL_X>|$_s+*zvzr@-0`8G0Lq`;v;QRB=j$zA
zG<2hQpd@$2;A8!Oy-X1b_)%<Y1SiuBO(3DMK+Co=R9&Y}R!FM;+Wh2NTo22PR%{%;
z0YkbUxHB%(r$Z+UHJaTa#UAZ@5wk?wL9VUIb8{q!8sn-}#i-ryhKHZe&O;0$D<H`3
zZuN<mM>VQCpY|VM4MD|Ovv)`7@3Q>Uh0jH-UdL&eP`g`bG>Y$jxkxEqUGPq}OMFhC
zBHXGO=*?&-{3eR1LBu>~MjhG>QI4wu*B1p3f&fMofj02gA~3&}A}S)}C^BH-YPnTv
zPWa?_O1+7$nv@`Ne@E#E;7`y7mkBgP#pEKOUoyuULS*rmiTzS!v3ijx6PX7*(^^4+
zY4mWPH5Z{r17wP@Mg4&qJQdsv>}sT;>;Xtg&;5!ccMHu!;@@Tldio#0;){>T&HI4a
zbi<50;_rs}=2P7>r_Mzsd@H?6Wb;7QN|v;Lc+P&o-bdLTtSI0s#k>sIX+5q8)}cKR
zc<dBgh0F;Wb8?-mn>6ob>1d5vvX&hsBEbZ~^`uDu2C^lA#7mKzg<4(DmA!rPq;S|*
z5`_j?NPP(#XmkH0mAfTxv%T_nW^4vMBT0=C8vJa8fKQ|@cI^d$$d3_2*ahPYQf+|O
z+vipeEP^^^ZeHTgOj7($B@VTj_OmCf@DsC@8{f$IIIG0xDeuh8yQyPOO1MyPqjx8R
zuYlipSN<J4Nau}tjYNyv<;xTWscKUuiU`UpH`gO)R%YV!UQ8iM06l$vj!hs(Lc<(E
zlnhNi$(IGx_<U8QnfGSufai=TbyCDTpE?XV<ra=uY`fLi%M(ao?%)8KDdRtoaHCQt
zxP(`qclE(k``=5^E7UnbYpv=*G!lrI%Ry;H>HDuo22H!?1T(P|qR#`|fRI?4-7Y&g
zG&Mf{9%n2F)jR412t^oy2>BZdpC8zRlTC^SJj{UfN|C4&w39J}KyBrTCmXZ9;=R?<
z-T-FOD~jL0PX(Tba~uZ<nAD%Z?#L*FY?L*+hyabK>4Q-qSnfAqB2)T&`V88)nUQOY
z6|gUTqMHaB$VvVLhgV6y{{C^m<hb&avDpLiB~k?>ct$6xpXQUhDor#r_|<{l7{0Nm
zLA1}%>jh>`QJB%?nlysC4|LKGS!Nwi;Pd9mthQ4+{b&+`$aE2>)=yig6hK@Wh6M1V
z{YQ6pKYe7if_vVP$iB78C%PSjgzl%cWb#qLU?f-E+b4{w5@iRdq}giw8I*BfU9u}a
zev}8&Nvte@l7Q7j<gmc9OMq8ZR9+;*DQtkj$g@A0V^R<3O-@RFOiWCxEx67ugN@fw
zUd9^bx<KJb;3KpT7uf&mWlT5AVI}xtd)qvNvd+S)4cA752YF_cPfjqXZ@1#O{mfZM
zYwtDFZEQitfrUp$xz^nlF!{Tw|H15L=2i0vk_WSEc+PyK&v7c#T75=Kc-y+0H#)0N
ze4|xJ*DGG*GQu|y-Pqmm&(0@TjrU5u^>>WE61~@c$}zfkLfRi$A|1I3KnD<Af)57v
z$Y-hSZViJV76=6@E{4y=R%Dn7WJaS;nJ6c)um)ounjSA##3sab(%7TOTL&@&c+l!B
zhe1aNT!Lv;>g6$WjA7eNlQlE|c|yEE>^9L&1O9gIc}`Y8q0Pb_wn#-&0Qj9yMw6x<
ziJH-pX%GMuXBJjnZEYfj#izj=8|S;TuU)GOf+}KT`ovKQBfJ>^UoEIp(3i{r^7fK?
zS5?;yMj8YGLJA$2?lit@hq)oP#9&=bB!1(1eL^tr!@w5ZWab~ofio9d&@!)pMiIXe
zIOjLveF>)}uNN<RjT$-?8U|(5qTJgwdt~nSKBTSwS#)x9-_M#_v2K|ykx@}J@a0Nx
z#=2!{BK4>dD9(_a+O!u$VLYtTB!S^qcXC5Lw445DgMoEBX-|%LR8>`J$WAXYO_96E
zhVAm#ID+Ti)aqX8ZL6)l4~QfcT^BA1hD!XfzXAHn&c}D{MWq!x?hZN_2Nd3f@{MR)
zl4@3rNq856%1_`AX8}3E^8vHGBiL>3=GJ3v0?jJaxkqDUqX6bmkffvTm4OQdPQeX~
z^3eBG1DK{E<i5l@4j!In(<p*<A|Zr!#^51X4J9nxUaYoWPBD3Byn;}mN5{s@@aPY=
z=ALR2l#z+T7&5iSk&yH-qNld++__m=`fsv_dVg0>kIM1mZ0jX#Qv1aW-(AfgUzEzP
zsyhcVA!ZOMZ86i>Ee?T??Cj6giP4#l!4B6{QSocd_hL@Vwk7L7Fyhv+`v-e?)y>T}
zp<n)iQp43Km_a{diXS?SiIq*J#=Rfe?ZV@Rw$-F=uG@bI^Krc_t2L;7c9P9sh_F9l
z)CPF{+>{@gB;d80ZEnR+=lc2igLwi+W>QUdq3hTLiaS}@#Ke8PcVRNh<J75BI2`EU
z1b6NXKoCqvn*#YTej-At#W5wz=`_4bi8QvMVXbH{-#H5lT6@UkNT7nb21LP#{ael4
zFitdl>vRi$K^))-TnbT8f1oybz}pO9FR?mfNbKbiTql*d6YFTpF<wb&Sk29?XKm6#
z^<V@8-?Q~ub0aAX9&5X0Ft`Tbuth-N`uMm*eX@~9wZ*z)kM=CT|LKr{LG#A&&X=)z
z`}gn1UBY8=5`LP@+@ciFQ^HU1_p~^(%eJDj^4yP-TRy(N5cev;fD6ZB8<dIq{g>pd
z>#6v+Z=3(lu!)M$_A!PzI8FAN-fZ6Gdf>I|@vvNu3Zt7z=xk7GsDdegk;hpJi#-?H
zq?@xZRs$BX9vbQH_Kk@#$uQy^UQ6#C`>D485gqK|P(0$$H%o%(jo{8NA`%Q7rlI*=
znxhGb#*+)c314a_7#yMb&T8;>H&kq75(U-xmwXS`MCwyzB{RZ2x_10YC#Vl-1*G--
zQc|bgaMl0OO}ymjc#^|6Vy=V=RaZvY31A9d$3K4jxN-mfYV?Q*)^W9cJkksJ<Od`r
zYcM9;7QK98R1<&<P%kq)CD8kbUHVb7ExeXnP*9LS2?P*Hl$Jg1>K<ji5*k3zP_Tps
ziT6p{vcC<+9)YNH8y>y6A~<N<f|%0;fSok=a3cV$%dug@V8}t~Uyq${R@T+6v0FID
zVi7qs(roi($l>UkcT&&u{DD&}r}w@NiGhJ_c3#Sea27HsR`Q+D(lXbn$Mtao)@n7x
z$Hk?Bc~sKb5B^4Y?W9M0rXoGOFk4F;DhuWl+_L3q{Q`5;AJi{INS0N3FGj0<RNlId
zrx||86|eL&h>1-OuvefYv>k}nky5TfC*oAbP!FC?N&$WbM>eLB{AuH)aq#mKJZrL0
zBOG=nK^^@x<T(wG7WZOwkB!?WN5;@l{PXth+rzUrfbXh84GEO|+-VSNVKm8rmg=I`
z{19pKcs&NwR4(QX|2}wL1Zqjdm_8J%7_Ypo`x*Ke2x~b4ags$|>u!%0U48veNUH=m
z1G5K1(m>cwg#&if+pQpAWnIs^CEvI@X*{uKf7FKGs*bslr{A@=ip&*5|Kb4f3A5e{
zS*>4(E<So>F@QG$#9btG7jCY2z)uWlPNPcP%FpkGIhlAxX3w9)xakOhy2bax&bH0f
zkLEx%NL`uJlgmiOXa~U=0}2adb6t}H%+le`P#<C1Y>?&mnv~vgHEM{cc%*cd_Cubu
zY}v9WTJeBhPQ32@`4e+Q1%3T>fK190;k^K8X&eDef2-T+H*eoc=!HY01u6Go7`kE(
zr<;Q4Vh~`^ye%Ub7&{)O{e2f*P^uE1cEbKR^g!@{sFU10VYg2rtNn*&isKY~4$Kfw
z@Z{`!c#4yUM_pOj_g;1%<5XrPG6%{dKi2&s<%ZhYC1{090@v>+bZb2$UhPWRfT8{i
zj(av;p+ES%u%-J!w3PFyQYidy<C`Q7=*%nJ2WAL;CmH)fWe+TCFK%L@m{ZGy`6K2m
zG}*+<9Nr>DnDf|U^})WqXV8*AJo=peyq?S}obC981apiF$!nrr1n{i^I2z-9FE_DI
zVC!#B6plIQd>e%_sQ$<7?L`4&Zk3P-!<e*9=)!s)xoI@dSelRyQxlb7UeZe<n<ANg
zpEPoIv}U%&q6A{TMAR@=Dr+9n9l*3p*rdQlC+5KeP86L@kc@y(K_<(%K7^PCizzs)
z6O9lQFKj$Kw~>a6ii()GSq?ZIL|<Emr%V;b2hb+yreJ~$2Z&ehBE6LnWEL|vF>$8!
z_8Ro#88{BxvaX?&1-l~}*qJB*bU=7Ym<T{XsD4ogHxyl#(*5$J3e0ERKrk8@lDQoi
zm}TJG5<L(pLrY5wA%K=c2B=*tR;}s+CQ?>c7m9JWfg2k3Q<7DsrM|dmY1R$G{(`Ue
z*T06D2P6t2$fpn5(*UR&8XERTH3}@$4`wt5ZJ2*_c(b6FwbVpL-LOT%Vn)~JZc5ea
z`=i!dU^mp_4C31#OiJ-O5${isx3<E(2hSe4NN+C1)%li9<ze%{YIb%4!RV!*U4gWO
zx`Kn9J(g*wmz$v{C=7#b`MGU&z5e=8YY|QgNv0^=5mt#Fq5jpZht4b}8=c+V$_Ol^
z3_iOD^;sE)$Zp^^Txc(l0X5((3OU`qL4%UhxKer0s$lRi1_T-Mg~J*ewy#y44rp0l
zID)d;*w{FWOIaFgzv#V-5~dkHD};7m+#xI+M2!7FqeHF^ZBj-x3JrP~T$F(i5mRV<
z0n&qS`u>E*(BE%x(KHVi5pF?`Q04FWJqO?ct_1Rsw&q-&KS<P3-Wgf__u#{e$awSi
zZF8&!1<VMJ3dYLVTR|(Da^Ir*c5@)PPl)PSv?mC0;J17JcpJHu2wdtYUm%yml@OA-
z<Set$gP{cRnIkAkCvr|m{QPSk>xdf5vuJMfCaR=B%p3o>IhZ&a;)ZTWzc=lz`K)0e
z>FbQfw$<=BfyFv%M=bmR85hvchVko&yML?w_8pk~Es^px-@QI+H&?t?(K}gR;oy@x
zPJWA#f2vh-Py4bdKGf&BZJ27RkTmHz?2CHv0rcbg!%CS#1cgXWPR_hgOOFA8N*v4_
z<Fw)*U%NR3^1z0p@1nTtyDwedI#spOeQUj^w7q@%evS<=3f);3tHgkV1B~d%kyFZy
zx%)i4mW>t#Ek66XXvc6^Nl^Zy++y@XJ9lV#-r|=P6enME#h;d~W$^VfNVIN99{YMk
zq|ROQ2^#?LUjFND2E*?p{_bD2`@^~*X8%yKv9IsU)z=0}l838<yf4M7SD#S6DiM(F
zDIA;vCjK_q%bNpds+@A0ZW(9C76Gqi_JiTIyYzBzJzJa<jEb4q&mdzqCVJuZ?3lo_
z@X7%BgVvQNXa6v1bEFr2cg(z8(YrzBQu?#U{6IKwGH3@ZSEJ;5c+ZdDc(!ZRqHnJt
zeQaNS?S^GC`_kLaB;2kdzjod`86Ec^f{0!^mT8GM1Xad-zg?+}v&h9ed{y8a9TE(V
z?m3^>$6KU5CY60vB<}b39k4A*9s0BRUdV3pP96!RbXx+T_)c|dIh8t;7?yk+uP4Ck
zuzODcK}^~H*>|7#r-SiKb`6t1NDxnU{2_J8hZ=uiCa&xFip`RVwIZXL5-p)fD>ANk
zVNf$jnli3eB0u*}lQ>sH;~M^WZ6QeJHfRZu@3|S))79mz{w!g-H;`Tp!b6ln_V%et
zJGYS+sL_JrC6xPxurs|{7#(~)pe)QX2z>WW4(7SjndwWPXW9S}7k#e!os)Zq(=12L
zvcl;0@i0@4J)dJu-d%-N%n584Ei!$Ry_<YWAA1aH0+28eP*Gjc!N2K&URjuWN<)N|
z9m=GKN4LQfL`O$QTSw;-`V}n25#wh1{?<7FYZUM=+wLEhBouYFf3nhkuhds5jf{-6
zcX3fktosT}0N|3EnwqH8^e4B~fp)iB4fF89L8d31hDJt7%e0n0aZ=RQ(Hcu}d>7Eo
zOD-<St`C#i1SF1lon2>~!D)bR{LPDnCFJt_QK<ZGg>m6z`ZBeBb?f-`UDEG-s@sKO
z9qpc1<Qm=lbEI<S!&KeVEw)EY&(1b{wcb*6m;9c?s{8!E_CL<IsOeyX-LDqQgF-d(
zu~}EP6n;z=kZ6grrdMD3IIOql%p>UOo_=8AB7eWk)-<1Luyk|$OBd<5BxvVa%eJ#q
P_|IWw%|pqGrZ@gCAwg=4

literal 0
HcmV?d00001

diff --git a/research/fed-bpt/index.html b/research/fed-bpt/index.html
new file mode 100644
index 0000000000..f7252b1a3d
--- /dev/null
+++ b/research/fed-bpt/index.html
@@ -0,0 +1,16 @@
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head>
+    <title>FedBPT</title>
+    <meta http-equiv="refresh" content="0;URL='https://github.com/NVIDIA/NVFlare/tree/main/research/fed-bpt'" />
+</head>
+<body>
+<p>
+    Redirecting to the project codebase:
+</p>
+<p>
+    <a href="https://github.com/NVIDIA/NVFlare/tree/main/research/fed-bpt">
+        https://github.com/NVIDIA/NVFlare/tree/main/research/fed-bpt
+    </a>
+</p>
+</body>
+</html>
\ No newline at end of file
diff --git a/research/fed-bpt/job_templates/fedbpt/config_fed_client.conf b/research/fed-bpt/job_templates/fedbpt/config_fed_client.conf
new file mode 100644
index 0000000000..7e107be61d
--- /dev/null
+++ b/research/fed-bpt/job_templates/fedbpt/config_fed_client.conf
@@ -0,0 +1,127 @@
+{
+  # version of the configuration
+  format_version = 2
+
+  # This is the application script which will be invoked. Client can replace this script with user's own training script.
+  app_script = "fedbpt_train.py"
+
+  # Additional arguments needed by the training code. For example, in lightning, these can be --trainer.batch_size=xxx.
+  app_config = ""
+
+
+  # Client Computing Executors.
+  executors = [
+    {
+      # tasks the executors are defined to handle
+      tasks = ["train"]
+
+      # This particular executor
+      executor {
+
+        # Executor name : ClientAPILauncherExecutor
+        # This is an executor for Client API. The underline data exchange is using Pipe.
+        path = "nvflare.app_common.executors.client_api_launcher_executor.ClientAPILauncherExecutor"
+
+        args {
+
+          # This executor take an component named "launcher"
+          launcher_id = "launcher"
+
+          # This executor needs Pipe component
+          pipe_id = "pipe"
+
+          # Timeout in seconds for waiting for a heartbeat from the training script. Defaults to 30 seconds.
+          # Please refer to the class docstring for all available arguments
+          heartbeat_timeout = 120
+
+          # format of the exchange parameters
+          params_exchange_format =  "numpy"
+
+          # if the transfer_type is FULL, then it will be sent directly
+          # if the transfer_type is DIFF, then we will calculate the
+          # difference VS received parameters and send the difference
+          params_transfer_type = "FULL"
+
+          # if train_with_evaluation is true, the executor will expect
+          # the custom code need to send back both the trained parameters and the evaluation metric
+          # otherwise only trained parameters are expected
+          train_with_evaluation = true
+
+        }
+      }
+    }
+  ],
+
+  # this defined an array of task data filters. If provided, it will control the data from server controller to client executor
+  task_data_filters =  []
+
+  # this defined an array of task result filters. If provided, it will control the result from client executor to server controller
+  task_result_filters = []
+
+  components =  [
+    {
+      # component id is "launcher"
+      id = "launcher"
+
+      # the class path of this component
+      path = "nvflare.app_common.launchers.subprocess_launcher.SubprocessLauncher"
+
+      args {
+        # the launcher will invoke the script
+        script = "python3 custom/{app_script}  {app_config} "
+        # if launch_once is true, the SubprocessLauncher will launch once for the whole job
+        # if launch_once is false, the SubprocessLauncher will launch a process for each task it receives from server
+        launch_once = true
+      }
+    },
+    {
+      id = "pipe"
+      path = "nvflare.fuel.utils.pipe.cell_pipe.CellPipe"
+      args {
+        mode = "PASSIVE"
+        site_name = "{SITE_NAME}"
+        token = "{JOB_ID}"
+        root_url = "{ROOT_URL}"
+        secure_mode = "{SECURE_MODE}"
+        workspace_dir = "{WORKSPACE}"
+      }
+    },
+    {
+      id = "metrics_pipe"
+      path = "nvflare.fuel.utils.pipe.cell_pipe.CellPipe"
+      args {
+        mode = "PASSIVE"
+        site_name = "{SITE_NAME}"
+        token = "{JOB_ID}"
+        root_url = "{ROOT_URL}"
+        secure_mode = "{SECURE_MODE}"
+        workspace_dir = "{WORKSPACE}"
+      }
+    },
+    {
+      id = "metric_relay"
+      path = "nvflare.app_common.widgets.metric_relay.MetricRelay"
+      args {
+        pipe_id = "metrics_pipe"
+        event_type = "fed.analytix_log_stats"
+        # how fast should it read from the peer
+        read_interval = 0.1
+      }
+    },
+    {
+      # we use this component so the client api `flare.init()` can get required information
+      id = "client_api_config_preparer"
+      path = "nvflare.app_common.widgets.external_configurator.ExternalConfigurator"
+      args {
+        component_ids = ["metric_relay"]
+      }
+    },
+    {
+      # we use this component so the client api `flare.init()` can get required information
+      id = "register_decomposer"
+      path = "decomposer_widget.RegisterDecomposer"
+      args {}
+    }
+  ]
+}
+
diff --git a/research/fed-bpt/job_templates/fedbpt/config_fed_server.conf b/research/fed-bpt/job_templates/fedbpt/config_fed_server.conf
new file mode 100644
index 0000000000..66422969c4
--- /dev/null
+++ b/research/fed-bpt/job_templates/fedbpt/config_fed_server.conf
@@ -0,0 +1,55 @@
+{
+  # version of the configuration
+  format_version = 2
+
+  # task data filter: if filters are provided, the filter will filter the data flow out of server to client.
+  task_data_filters =[]
+
+  # task result filter: if filters are provided, the filter will filter the result flow out of client to server.
+  task_result_filters = []
+
+  # This assumes that there will be a "net.py" file with class name "Net".
+  # If your model code is not in "net.py" and class name is not "Net", please modify here
+  # model_class_path = "net.Net"
+
+  # workflows: Array of workflows the control the Federated Learning workflow lifecycle.
+  # One can specify multiple workflows. The NVFLARE will run them in the order specified.
+  workflows = [
+      {
+        # 1st workflow"
+        id = "global_es"
+
+        # name = GlobalES, path is the class path of the GlobalES controller.
+        path = "global_es.GlobalES"
+        args {
+            # argument of the GlobalES class.
+            # min number of clients required for GlobalES controller to move to the next round
+            # during the workflow cycle. The controller will wait until the min_clients returned from clients
+            # before move to the next step.
+            min_clients = 10
+
+            # number of global round of the training.
+            num_rounds = 200
+
+            # seed for CMA-ES algorithm
+            seed = 42
+        }
+      }
+  ]
+
+  # List of components used in the server side workflow.
+  components = [
+    {
+      id = "receiver"
+      path = "nvflare.app_opt.tracking.tb.tb_receiver.TBAnalyticsReceiver"
+      args.events = ["fed.analytix_log_stats"]
+    },
+    {
+      # we use this component so the client api `flare.init()` can get required information
+      id = "register_decomposer"
+      path = "decomposer_widget.RegisterDecomposer"
+      args {}
+    }
+  ]
+
+}
diff --git a/research/fed-bpt/job_templates/fedbpt/info.conf b/research/fed-bpt/job_templates/fedbpt/info.conf
new file mode 100644
index 0000000000..cc9b2a8687
--- /dev/null
+++ b/research/fed-bpt/job_templates/fedbpt/info.conf
@@ -0,0 +1,5 @@
+{
+    description = "FedBPT based on FedAvg workflow"
+    execution_api_type = "client_api"
+    controller_type = "server"
+}
\ No newline at end of file
diff --git a/research/fed-bpt/job_templates/fedbpt/info.md b/research/fed-bpt/job_templates/fedbpt/info.md
new file mode 100644
index 0000000000..52ba5d72ec
--- /dev/null
+++ b/research/fed-bpt/job_templates/fedbpt/info.md
@@ -0,0 +1,11 @@
+# Job Template Information Card
+
+## fedbpt
+    name = "fedbpt"
+    description = "FedBPT based on FedAvg workflow" 
+    class_name  =  "GlobalES"
+    controller_type = "server"
+    executor_type = "launcher_executor"
+    contributor = "NVIDIA"
+    init_publish_date = "2024-03-19"
+    last_updated_date = "2024-03-19" # yyyy-mm-dd
diff --git a/research/fed-bpt/job_templates/fedbpt/meta.conf b/research/fed-bpt/job_templates/fedbpt/meta.conf
new file mode 100644
index 0000000000..639edac162
--- /dev/null
+++ b/research/fed-bpt/job_templates/fedbpt/meta.conf
@@ -0,0 +1,10 @@
+{
+  name = "fedbpt"
+  resource_spec = {}
+  deploy_map {
+    # change deploy map as needed.
+    app = ["@ALL"]
+  }
+  min_clients = 2
+  mandatory_clients = []
+}
diff --git a/research/fed-bpt/src/LMForwardAPI.py b/research/fed-bpt/src/LMForwardAPI.py
new file mode 100644
index 0000000000..9445bdf050
--- /dev/null
+++ b/research/fed-bpt/src/LMForwardAPI.py
@@ -0,0 +1,910 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Part of this code is adopted from BBT (https://github.com/txsun1997/Black-Box-Tuning)
+
+# MIT License
+#
+# Copyright (c) 2022 Tianxiang Sun
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+
+import copy
+import os
+
+import numpy as np
+import torch
+from fastNLP import DataSet, DataSetIter, SequentialSampler, Tester
+from models.modeling_roberta import RobertaForMaskedLM
+from sklearn.metrics import f1_score
+from transformers import RobertaConfig, RobertaTokenizer
+from utils import hinge_loss
+
+
+class LMForwardAPI:
+    def __init__(self, args, train_data=None, dev_data=None, init_prompt_path=None, baseAPI=True):
+        model_name = args.model_name
+        from metrics.metrics import (
+            AGNewsMetric,
+            DBPediaMetric,
+            MRPCMetric,
+            RTEMetric,
+            SNLIMetric,
+            SST2Metric,
+            YelpPMetric,
+        )
+
+        task_name = args.task_name
+        if task_name in ["sst2", "yelpp", "rte", "mrpc", "chnsent", "lcqmc", "bq"]:
+            self.num_labels = 2
+        elif task_name in ["snli", "cmnli", "ocnli"]:
+            self.num_labels = 3
+        elif task_name in ["agnews", "ccpm", "c3"]:
+            self.num_labels = 4
+        elif task_name in ["amazon"]:
+            self.num_labels = 5
+        elif task_name in ["thucnews"]:
+            self.num_labels = 10
+        elif task_name in ["dbpedia", "tnews"]:
+            self.num_labels = 14
+        else:
+            raise ValueError
+        n_prompt_tokens = args.n_prompt_tokens
+        intrinsic_dim = args.intrinsic_dim
+
+        sigma = args.sigma
+        alpha = args.alpha
+
+        self.args = args
+
+        device = args.device
+        random_proj = args.random_proj
+        loss_type = args.loss_type
+        print_every = args.print_every
+        eval_every = args.eval_every
+        cat_or_add = args.cat_or_add
+
+        inference_framework = args.inference_framework
+        onnx_model_path = args.onnx_model_path
+
+        self.model_name = args.model_name
+        self.parallel = args.parallel
+        self.n_prompt_tokens = args.n_prompt_tokens
+        self.batch_size = args.batch_size
+        self.device = args.device
+
+        if inference_framework not in ["pt", "ort"]:
+            raise ValueError(f'inference_framework only supports "pt", "ort", got `{inference_framework}` instead.')
+        if inference_framework == "ort":
+            assert onnx_model_path is not None, "Path to onnx model is required, got None instead."
+            assert os.path.exists(onnx_model_path), f"In valid onnx model path `{onnx_model_path}`"
+
+        self.train_data = train_data
+        self.dev_data = dev_data
+        self.train_data_aux = None
+        self.config = RobertaConfig.from_pretrained(model_name)
+        self.tokenizer = RobertaTokenizer.from_pretrained(model_name)
+        self.model = RobertaForMaskedLM.from_pretrained(
+            model_name,
+            config=self.config,
+            n_prompt_tokens=n_prompt_tokens,
+            inference_framework=inference_framework,
+            onnx_model_path=onnx_model_path,
+        )
+        self.model.lm_head.bias = torch.nn.parameter.Parameter(torch.zeros(self.config.vocab_size))
+
+        if inference_framework == "ort":
+            self.model.roberta = None
+        if cat_or_add == "cat":
+            self.model.set_concat_prompt(True)
+            if init_prompt_path is not None:
+                print("Initialize prompt embedding from {}".format(init_prompt_path))
+                self.init_prompt = torch.load(init_prompt_path).weight.cpu().reshape(-1)
+            else:
+                print("Initial prompt embedding not found. Initialize to zero embedding.")
+                self.init_prompt = torch.zeros(n_prompt_tokens * self.config.hidden_size)
+            print("Shape of initial prompt embedding: {}".format(self.init_prompt.shape))
+        else:
+            # self.model.set_concat_prompt(False)
+            self.init_prompt = None
+
+        if args.init_score_path is not None:
+            if args.llama_causal:
+                raise ValueError("You cannot initilize a score layer for a causal model")
+            score_state = self.model.score.state_dict()
+            score_state["weight"] = torch.load(args.init_score_path)
+            self.model.score.load_state_dict(score_state)
+        self.model.to(device)
+        self.model.eval()
+        self.linear = torch.nn.Linear(intrinsic_dim, n_prompt_tokens * self.config.hidden_size, bias=False)
+        if random_proj == "normal":
+            # calculate std for normal distribution
+            embedding = self.model.roberta.get_input_embeddings().weight.clone().cpu()
+
+            # embedding = embedding[1000: 2000]
+            mu_hat = np.mean(embedding.reshape(-1).detach().cpu().numpy())
+            std_hat = np.std(embedding.reshape(-1).detach().cpu().numpy())
+            mu = 0.0
+            std = alpha * std_hat / (np.sqrt(intrinsic_dim) * sigma)
+            # temp = intrinsic_dim - std_hat * std_hat
+            # mu = mu_hat / temp
+            # std = std_hat / np.sqrt(temp)
+            print("[Embedding] mu: {} | std: {} [RandProj]  mu: {} | std: {}".format(mu_hat, std_hat, mu, std))
+            for p in self.linear.parameters():
+                torch.nn.init.normal_(p, mu, std)
+        self.best_train_perf = 0.0
+        self.best_dev_perf = 0.0
+        self.best_prompt = None
+        self.num_call = 0
+        # self.save_path = save_path
+        self.print_every = print_every
+        self.eval_every = eval_every
+        self.loss_type = loss_type
+        # if save_path is not None:
+        #     os.makedirs(save_path, exist_ok=True)
+        if task_name == "sst2":
+            self.metric = SST2Metric(target="labels", pred="logits", tokenizer=self.tokenizer)
+            self.metric_key = "acc"
+            self.metric_name = "SST2Metric"
+        elif task_name == "agnews":
+            self.metric = AGNewsMetric(target="labels", pred="logits", tokenizer=self.tokenizer)
+            self.metric_key = "acc"
+            self.metric_name = "AGNewsMetric"
+        elif task_name == "yelpp":
+            self.metric = YelpPMetric(target="labels", pred="logits", tokenizer=self.tokenizer)
+            self.metric_key = "acc"
+            self.metric_name = "YelpPMetric"
+        elif task_name == "dbpedia":
+            self.metric = DBPediaMetric(target="labels", pred="logits", tokenizer=self.tokenizer)
+            self.metric_key = "acc"
+            self.metric_name = "DBPediaMetric"
+        elif task_name == "rte":
+            self.metric = RTEMetric(target="labels", pred="logits", tokenizer=self.tokenizer)
+            self.metric_key = "acc"
+            self.metric_name = "RTEMetric"
+        elif task_name == "mrpc":
+            self.metric = MRPCMetric(target="labels", pred="logits", tokenizer=self.tokenizer)
+            self.metric_key = "f1"
+            self.metric_name = "MRPCMetric"
+        elif task_name == "snli":
+            self.metric = SNLIMetric(target="labels", pred="logits", tokenizer=self.tokenizer)
+            self.metric_key = "acc"
+            self.metric_name = "SNLIMetric"
+        elif task_name == "chnsent":
+            self.metric = ChnSentMetric(target="labels", pred="logits", tokenizer=self.tokenizer)
+            self.metric_key = "acc"
+            self.metric_name = "ChnSentMetric"
+        elif task_name == "thucnews":
+            self.metric = THUCNewsMetric(target="labels", pred="logits", tokenizer=self.tokenizer)
+            self.metric_key = "acc"
+            self.metric_name = "THUCNewsMetric"
+        elif task_name == "lcqmc":
+            self.metric = LCQMCMetric(target="labels", pred="logits", tokenizer=self.tokenizer)
+            self.metric_key = "acc"
+            self.metric_name = "LCQMCMetric"
+        elif task_name == "cmnli":
+            self.metric = CMNLIMetric(target="labels", pred="logits", tokenizer=self.tokenizer)
+            self.metric_key = "acc"
+            self.metric_name = "CMNLIMetric"
+        elif task_name == "ocnli":
+            self.metric = OCNLIMetric(target="labels", pred="logits", tokenizer=self.tokenizer)
+            self.metric_key = "acc"
+            self.metric_name = "OCNLIMetric"
+        elif task_name == "amazon":
+            self.metric = AmazonMetric(target="labels", pred="logits", tokenizer=self.tokenizer)
+            self.metric_key = "acc"
+            self.metric_name = "AmazonMetric"
+        elif task_name == "bq":
+            self.metric = BQMetric(target="labels", pred="logits", tokenizer=self.tokenizer)
+            self.metric_key = "acc"
+            self.metric_name = "BQMetric"
+        elif task_name == "ccpm":
+            self.metric = CCPMMetric(target="labels", pred="logits", tokenizer=self.tokenizer)
+            self.metric_key = "acc"
+            self.metric_name = "CCPMMetric"
+        elif task_name == "tnews":
+            self.metric = TNewsMetric(target="labels", pred="logits", tokenizer=self.tokenizer)
+            self.metric_key = "acc"
+            self.metric_name = "TNewsMetric"
+        elif task_name == "c3":
+            self.metric = C3Metric(target="labels", pred="logits", tokenizer=self.tokenizer)
+            self.metric_key = "acc"
+            self.metric_name = "C3Metric"
+        else:
+            raise NotImplementedError
+        self.margin = self.metric.margin
+        self.ce_loss = torch.nn.CrossEntropyLoss(reduction="mean")
+
+    def convert_pred(self, logits, target):
+        label_map = self.metric.label_map
+
+        converted_target = target.clone()
+        for key, val in label_map.items():
+            converted_target[target == key] = val
+        if self.args.model_name not in ["llama2"] or self.args.llama_causal:
+            interest_index = list(label_map.keys())
+            logits = logits[:, interest_index]
+        pred = logits.argmax(dim=-1)
+        return pred, converted_target
+
+    def calc_metric(self, logits, target):
+        label_map = self.metric.label_map
+
+        converted_target = target.clone()
+        for key, val in label_map.items():
+            converted_target[target == key] = val
+        if self.args.model_name not in ["llama2"] or self.args.llama_causal:
+            interest_index = list(label_map.keys())
+            logits = logits[:, interest_index]
+        pred = logits.argmax(dim=-1)
+
+        if self.metric_key == "acc":
+            perf = (pred == converted_target).sum() / len(target)
+        elif self.metric_key == "f1":
+            perf = f1_score(converted_target.detach().cpu().numpy().tolist(), pred.detach().cpu().numpy().tolist())
+        else:
+            raise KeyError(f"[Metric] Only support [acc, f1], got {self.metric_key} instead.")
+
+        if self.loss_type == "hinge":
+            loss = hinge_loss(logits, converted_target, margin=self.margin, reduction="sum").item() / len(target)
+        elif self.loss_type == "ce":
+            loss = self.ce_loss(logits, converted_target).item()
+        elif self.loss_type == "perf":
+            loss = -1 * perf
+        else:
+            raise KeyError(f"[Loss] Only support [hinge, ce, perf], got {self.loss_type} instead.")
+
+        return loss, perf
+
+    def set_dataset(self, train_data, dev_data, train_data_aux=None):
+        self.train_data, self.dev_data = train_data, dev_data
+        if train_data_aux is not None:
+            self.train_data_aux = train_data_aux
+
+    def load_client_record(self, record):
+        self.best_train_perf = record["best_train_perf"]
+        self.best_dev_perf = record["best_dev_perf"]
+        self.best_prompt = record["best_prompt"]
+        self.num_call = record["num_call"]
+
+    def client_record(self):
+        record = {}
+        record["best_train_perf"] = copy.deepcopy(self.best_train_perf)
+        record["best_dev_perf"] = copy.deepcopy(self.best_dev_perf)
+        record["best_prompt"] = copy.deepcopy(self.best_prompt)
+        record["num_call"] = copy.deepcopy(self.num_call)
+        return record
+
+    # def inference(self, model, data):
+    #     for k, v in data.items():
+    #         data[k] = v.to(self.device)
+    #     with torch.no_grad():
+    #         if self.model_name in ['t5-small', 't5-base', 't5-large', 't5-3b']:
+    #             logits = self.model(
+    #                 input_ids=data['input_ids'],
+    #                 attention_mask=data['attention_mask'],
+    #                 decoder_input_ids=data['decoder_input_ids'],
+    #                 decoder_attention_mask=data['decoder_attention_mask'],
+    #             )['logits']
+    #         elif self.model_name in ['gpt2', 'gpt2-medium', 'gpt2-large', 'gpt2-xl']:
+    #             logits = self.model(
+    #                 input_ids=data['input_ids'],
+    #                 attention_mask=data['attention_mask'],
+    #             )['logits']
+    #         else:
+    #             logits = self.model(
+    #                 input_ids=data['input_ids'],
+    #                 attention_mask=data['attention_mask'],
+    #                 mask_pos=data['mask_pos'],
+    #             )['logits']
+
+    #     target = data['labels']
+    #     label_map = self.metric.label_map
+
+    #     converted_target = target.clone()
+    #     for key, val in label_map.items():
+    #         converted_target[target == key] = val
+    #     interest_index = list(label_map.keys())
+    #     logits = logits[:, interest_index]
+    #     pred = logits.argmax(dim=-1)
+    #     return pred, converted_target
+
+    def eval(self, prompt_embedding=None, test_data=None, return_pred=False):
+        self.num_call += 1
+        if prompt_embedding is None:
+            prompt_embedding = self.best_prompt
+        if test_data is None:
+            bsz_dev = len(self.dev_data["input_ids"])
+            bsz_train = len(self.train_data["input_ids"])
+            bsz = bsz_train if bsz_train > bsz_dev else bsz_dev
+        else:
+            bsz = self.batch_size  # for test data
+        tmp_prompt = copy.deepcopy(prompt_embedding)  # list or numpy.ndarray
+        if isinstance(prompt_embedding, list):  # multiple queries
+            if self.args.norm_prompt:
+                for i in range(len(prompt_embedding)):
+                    if np.linalg.norm(prompt_embedding[i]) > self.args.prompt_norm_threshold:
+                        prompt_embedding[i] = (
+                            prompt_embedding[i] / np.linalg.norm(prompt_embedding[i]) * self.args.prompt_norm_threshold
+                        )
+            pe_list = []
+            for pe in prompt_embedding:
+                z = torch.tensor(pe).type(torch.float32)  # z
+                z = self.linear(z)  # Az
+                if self.init_prompt is not None:
+                    z = z + self.init_prompt  # Az + p_0
+                pe_list.append(z.reshape(self.n_prompt_tokens, -1).repeat(bsz, 1, 1))
+            prompt_embedding = torch.cat(pe_list)  # num_workers*bsz x prompt_len x dim
+            assert len(prompt_embedding) == len(self.train_data["input_ids"])
+        elif isinstance(prompt_embedding, np.ndarray):  # single query or None
+            if self.args.norm_prompt:
+                if np.linalg.norm(prompt_embedding) > self.args.prompt_norm_threshold:
+                    prompt_embedding = (
+                        prompt_embedding / np.linalg.norm(prompt_embedding) * self.args.prompt_norm_threshold
+                    )
+            prompt_embedding = torch.tensor(prompt_embedding).type(torch.float32)  # z
+            prompt_embedding = self.linear(prompt_embedding)  # Az
+            if self.init_prompt is not None:
+                prompt_embedding = prompt_embedding + self.init_prompt  # Az + p_0
+            prompt_embedding = prompt_embedding.reshape(self.n_prompt_tokens, -1).repeat(bsz, 1, 1)
+        else:
+            raise ValueError(
+                f"[Prompt Embedding] Only support [list, numpy.ndarray], got `{type(prompt_embedding)}` instead."
+            )
+
+        self.model.set_prompt_embedding(prompt_embedding)
+
+        if return_pred is True:
+            if self.parallel:  # if we have multiple queries, use the one that achieves minimal loss
+                self.model.set_prompt_embedding(prompt_embedding)
+            for k, v in self.dev_data.items():
+                self.dev_data[k] = v.to(self.device)
+            with torch.no_grad():
+                if self.model_name in ["t5-small", "t5-base", "t5-large", "t5-3b"]:
+                    logits = self.model(
+                        input_ids=self.dev_data["input_ids"],
+                        attention_mask=self.dev_data["attention_mask"],
+                        decoder_input_ids=self.dev_data["decoder_input_ids"],
+                        decoder_attention_mask=self.dev_data["decoder_attention_mask"],
+                    )["logits"]
+                elif self.model_name in ["gpt2", "gpt2-medium", "gpt2-large", "gpt2-xl", "llama2"]:
+                    logits = self.model(
+                        input_ids=self.dev_data["input_ids"],
+                        attention_mask=self.dev_data["attention_mask"],
+                    )["logits"]
+                else:
+                    logits = self.model(
+                        input_ids=self.dev_data["input_ids"],
+                        attention_mask=self.dev_data["attention_mask"],
+                        mask_pos=self.dev_data["mask_pos"],
+                    )["logits"]
+            pred, labels = self.convert_pred(logits, self.dev_data["labels"])
+            return pred, labels
+
+        if isinstance(test_data, DataSet):
+            if prompt_embedding.shape[0] > bsz:
+                raise ValueError("Provide a single prompt embedding for testing.")
+
+            test_tester = Tester(
+                data=test_data,
+                model=self.model,
+                metrics=self.metric,
+                batch_size=self.batch_size,
+                num_workers=1,
+                device=self.device,
+                use_tqdm=False,
+            )
+            results = test_tester.test()
+            test_acc = results[self.metric_name][self.metric_key]
+            # fitlog.add_best_metric(test_acc, name='test_acc')
+            return test_acc
+        else:
+            for k, v in self.train_data.items():
+                self.train_data[k] = v.to(self.device)
+            with torch.no_grad():
+                if self.model_name in ["t5-small", "t5-base", "t5-large", "t5-3b"]:
+                    logits = self.model(
+                        input_ids=self.train_data["input_ids"],
+                        attention_mask=self.train_data["attention_mask"],
+                        decoder_input_ids=self.train_data["decoder_input_ids"],
+                        decoder_attention_mask=self.train_data["decoder_attention_mask"],
+                    )["logits"]
+                elif self.model_name in ["gpt2", "gpt2-medium", "gpt2-large", "gpt2-xl", "llama2"]:
+                    logits = self.model(
+                        input_ids=self.train_data["input_ids"],
+                        attention_mask=self.train_data["attention_mask"],
+                    )["logits"]
+                else:
+                    logits = self.model(
+                        input_ids=self.train_data["input_ids"],
+                        attention_mask=self.train_data["attention_mask"],
+                        mask_pos=self.train_data["mask_pos"],
+                    )["logits"]
+
+            if self.parallel:  # we have multiple queries
+                all_losses, all_perfs = [], []
+                for i in range(len(logits) // bsz):
+                    tmp_logits = logits[i * bsz : i * bsz + bsz]
+                    tmp_target = self.train_data["labels"][i * bsz : i * bsz + bsz]
+                    tmp_loss, tmp_perf = self.calc_metric(tmp_logits, tmp_target)
+                    all_losses.append(tmp_loss)
+                    all_perfs.append(tmp_perf)
+                loss = min(all_losses)
+                best_sol = all_losses.index(loss)  # argmin
+                perf = all_perfs[best_sol]  # corresponding performance
+                tmp_prompt = tmp_prompt[best_sol]  # numpy.ndarray
+                prompt_embedding = pe_list[best_sol]  # to be prepended to the input
+            else:  # single query
+                loss, perf = self.calc_metric(logits, self.train_data["labels"])
+            # fitlog.add_loss(loss, name=self.loss_type, step=self.num_call)
+            # fitlog.add_metric(perf, name='train_acc', step=self.num_call)
+
+            if perf > self.best_train_perf:
+                self.best_train_perf = perf
+                # fitlog.add_best_metric(self.best_train_perf, name='train_acc')
+
+            # if self.save_path is not None:
+            #     with open(os.path.join(self.save_path, 'train_acc.txt'), 'a') as fout:
+            #         fout.write('{}\t{}\n'.format(self.num_call, perf))
+
+            # if self.num_call % self.print_every == 0:
+            #     print(
+            #         '[# API Calls {}] loss: {}. Current perf: {}. Best perf so far: {}'.format(
+            #             self.num_call,
+            #             round(float(loss), 4),
+            #             round(float(perf), 4),
+            #             round(float(self.best_train_perf), 4)))
+
+            # if self.num_call % self.eval_every == 0:
+            #     print('********* Evaluated on dev set *********')
+            #     if self.parallel:  # if we have multiple queries, use the one that achieves minimal loss
+            #         self.model.set_prompt_embedding(prompt_embedding)
+            #     for k, v in self.dev_data.items():
+            #         self.dev_data[k] = v.to(self.device)
+            #     with torch.no_grad():
+            #         if self.model_name in ['t5-small', 't5-base', 't5-large', 't5-3b']:
+            #             logits = self.model(
+            #                 input_ids=self.dev_data['input_ids'],
+            #                 attention_mask=self.dev_data['attention_mask'],
+            #                 decoder_input_ids=self.dev_data['decoder_input_ids'],
+            #                 decoder_attention_mask=self.dev_data['decoder_attention_mask'],
+            #             )['logits']
+            #         elif self.model_name in ['gpt2', 'gpt2-medium', 'gpt2-large', 'gpt2-xl', 'llama2']:
+            #             logits = self.model(
+            #                 input_ids=self.dev_data['input_ids'],
+            #                 attention_mask=self.dev_data['attention_mask'],
+            #             )['logits']
+            #         else:
+            #             logits = self.model(
+            #                 input_ids=self.dev_data['input_ids'],
+            #                 attention_mask=self.dev_data['attention_mask'],
+            #                 mask_pos=self.dev_data['mask_pos'],
+            #             )['logits']
+
+            #     dev_loss, dev_perf = self.calc_metric(logits, self.dev_data['labels'])
+            #     # fitlog.add_metric(dev_perf, name='dev_acc', step=self.num_call)
+            #     if dev_perf > self.best_dev_perf:
+            #         self.best_dev_perf = dev_perf
+            #         # fitlog.add_best_metric(self.best_dev_perf, name='dev_acc')
+            #         self.best_prompt = copy.deepcopy(tmp_prompt)
+            #     # if self.save_path is not None:
+            #     #     with open(os.path.join(self.save_path, 'dev_acc.txt'), 'a') as fout:
+            #     #         fout.write('{}\t{}\n'.format(self.num_call, dev_loss))
+            #     print('Dev loss: {}. Dev perf: {}. Best dev perf: {}'.format(
+            #         round(float(dev_loss), 4),
+            #         round(float(dev_perf), 4),
+            #         round(float(self.best_dev_perf), 4)))
+            #     print('********* Done *********')
+            if self.parallel:
+                return all_losses
+            else:
+                return loss
+
+    def eval_perturb(self, prompt_embedding=None, test_data=None, return_pred=False):
+        self.num_call += 1
+        if prompt_embedding is None:
+            prompt_embedding = self.best_prompt
+        if test_data is None:
+            bsz_dev = len(self.dev_data["input_ids"])
+            bsz_train = len(self.train_data_aux["input_ids"])
+            bsz = bsz_train if bsz_train > bsz_dev else bsz_dev
+        else:
+            bsz = self.batch_size  # for test data
+        tmp_prompt = copy.deepcopy(prompt_embedding)  # list or numpy.ndarray
+        if isinstance(prompt_embedding, list):  # multiple queries
+            if self.args.norm_prompt:
+                for i in range(len(prompt_embedding)):
+                    if np.linalg.norm(prompt_embedding[i]) > self.args.prompt_norm_threshold:
+                        prompt_embedding[i] = (
+                            prompt_embedding[i] / np.linalg.norm(prompt_embedding[i]) * self.args.prompt_norm_threshold
+                        )
+            pe_list = []
+            for pe in prompt_embedding:
+                z = torch.tensor(pe).type(torch.float32)  # z
+                z = self.linear(z)  # Az
+                if self.init_prompt is not None:
+                    z = z + self.init_prompt  # Az + p_0
+                pe_list.append(z.reshape(self.n_prompt_tokens, -1).repeat(bsz, 1, 1))
+            prompt_embedding = torch.cat(pe_list)  # num_workers*bsz x prompt_len x dim
+            assert len(prompt_embedding) == len(self.train_data_aux["input_ids"])
+        elif isinstance(prompt_embedding, np.ndarray):  # single query or None
+            if self.args.norm_prompt:
+                if np.linalg.norm(prompt_embedding) > self.args.prompt_norm_threshold:
+                    prompt_embedding = (
+                        prompt_embedding / np.linalg.norm(prompt_embedding) * self.args.prompt_norm_threshold
+                    )
+            prompt_embedding = torch.tensor(prompt_embedding).type(torch.float32)  # z
+            prompt_embedding = self.linear(prompt_embedding)  # Az
+            if self.init_prompt is not None:
+                prompt_embedding = prompt_embedding + self.init_prompt  # Az + p_0
+            prompt_embedding = prompt_embedding.reshape(self.n_prompt_tokens, -1).repeat(bsz, 1, 1)
+        else:
+            raise ValueError(
+                f"[Prompt Embedding] Only support [list, numpy.ndarray], got `{type(prompt_embedding)}` instead."
+            )
+
+        self.model.set_prompt_embedding(prompt_embedding)
+
+        if return_pred is True:
+            if self.parallel:  # if we have multiple queries, use the one that achieves minimal loss
+                self.model.set_prompt_embedding(prompt_embedding)
+            for k, v in self.dev_data.items():
+                self.dev_data[k] = v.to(self.device)
+            with torch.no_grad():
+                if self.model_name in ["t5-small", "t5-base", "t5-large", "t5-3b"]:
+                    logits = self.model(
+                        input_ids=self.dev_data["input_ids"],
+                        attention_mask=self.dev_data["attention_mask"],
+                        decoder_input_ids=self.dev_data["decoder_input_ids"],
+                        decoder_attention_mask=self.dev_data["decoder_attention_mask"],
+                    )["logits"]
+                elif self.model_name in ["gpt2", "gpt2-medium", "gpt2-large", "gpt2-xl", "llama2"]:
+                    logits = self.model(
+                        input_ids=self.dev_data["input_ids"],
+                        attention_mask=self.dev_data["attention_mask"],
+                    )["logits"]
+                else:
+                    logits = self.model(
+                        input_ids=self.dev_data["input_ids"],
+                        attention_mask=self.dev_data["attention_mask"],
+                        mask_pos=self.dev_data["mask_pos"],
+                    )["logits"]
+            pred, labels = self.convert_pred(logits, self.dev_data["labels"])
+            return pred, labels
+
+        if isinstance(test_data, DataSet):
+            if prompt_embedding.shape[0] > bsz:
+                raise ValueError("Provide a single prompt embedding for testing.")
+
+            test_tester = Tester(
+                data=test_data,
+                model=self.model,
+                metrics=self.metric,
+                batch_size=self.batch_size,
+                num_workers=1,
+                device=self.device,
+                use_tqdm=False,
+            )
+            results = test_tester.test()
+            test_acc = results[self.metric_name][self.metric_key]
+            # fitlog.add_best_metric(test_acc, name='test_acc')
+            return test_acc
+        else:
+            for k, v in self.train_data_aux.items():
+                self.train_data_aux[k] = v.to(self.device)
+            with torch.no_grad():
+                if self.model_name in ["t5-small", "t5-base", "t5-large", "t5-3b"]:
+                    logits = self.model(
+                        input_ids=self.train_data_aux["input_ids"],
+                        attention_mask=self.train_data_aux["attention_mask"],
+                        decoder_input_ids=self.train_data_aux["decoder_input_ids"],
+                        decoder_attention_mask=self.train_data_aux["decoder_attention_mask"],
+                    )["logits"]
+                elif self.model_name in ["gpt2", "gpt2-medium", "gpt2-large", "gpt2-xl", "llama2"]:
+                    logits = self.model(
+                        input_ids=self.train_data_aux["input_ids"],
+                        attention_mask=self.train_data_aux["attention_mask"],
+                    )["logits"]
+                else:
+                    logits = self.model(
+                        input_ids=self.train_data_aux["input_ids"],
+                        attention_mask=self.train_data_aux["attention_mask"],
+                        mask_pos=self.train_data_aux["mask_pos"],
+                    )["logits"]
+
+            if self.parallel:  # we have multiple queries
+                all_losses, all_perfs = [], []
+                for i in range(len(logits) // bsz):
+                    tmp_logits = logits[i * bsz : i * bsz + bsz]
+                    tmp_target = self.train_data_aux["labels"][i * bsz : i * bsz + bsz]
+                    tmp_loss, tmp_perf = self.calc_metric(tmp_logits, tmp_target)
+                    all_losses.append(tmp_loss)
+                    all_perfs.append(tmp_perf)
+                loss = min(all_losses)
+                best_sol = all_losses.index(loss)  # argmin
+                perf = all_perfs[best_sol]  # corresponding performance
+                tmp_prompt = tmp_prompt[best_sol]  # numpy.ndarray
+                prompt_embedding = pe_list[best_sol]  # to be prepended to the input
+            else:  # single query
+                loss, perf = self.calc_metric(logits, self.train_data_aux["labels"])
+            # fitlog.add_loss(loss, name=self.loss_type, step=self.num_call)
+            # fitlog.add_metric(perf, name='train_acc', step=self.num_call)
+
+            if perf > self.best_train_perf:
+                self.best_train_perf = perf
+                # fitlog.add_best_metric(self.best_train_perf, name='train_acc')
+
+            # if self.save_path is not None:
+            #     with open(os.path.join(self.save_path, 'train_acc.txt'), 'a') as fout:
+            #         fout.write('{}\t{}\n'.format(self.num_call, perf))
+
+            # if self.num_call % self.print_every == 0:
+            #     print(
+            #         '[# API Calls {}] loss: {}. Current perf: {}. Best perf so far: {}'.format(
+            #             self.num_call,
+            #             round(float(loss), 4),
+            #             round(float(perf), 4),
+            #             round(float(self.best_train_perf), 4)))
+
+            # if self.num_call % self.eval_every == 0:
+            #     print('********* Evaluated on dev set *********')
+            #     if self.parallel:  # if we have multiple queries, use the one that achieves minimal loss
+            #         self.model.set_prompt_embedding(prompt_embedding)
+            #     for k, v in self.dev_data.items():
+            #         self.dev_data[k] = v.to(self.device)
+            #     with torch.no_grad():
+            #         if self.model_name in ['t5-small', 't5-base', 't5-large', 't5-3b']:
+            #             logits = self.model(
+            #                 input_ids=self.dev_data['input_ids'],
+            #                 attention_mask=self.dev_data['attention_mask'],
+            #                 decoder_input_ids=self.dev_data['decoder_input_ids'],
+            #                 decoder_attention_mask=self.dev_data['decoder_attention_mask'],
+            #             )['logits']
+            #         elif self.model_name in ['gpt2', 'gpt2-medium', 'gpt2-large', 'gpt2-xl', 'llama2']:
+            #             logits = self.model(
+            #                 input_ids=self.dev_data['input_ids'],
+            #                 attention_mask=self.dev_data['attention_mask'],
+            #             )['logits']
+            #         else:
+            #             logits = self.model(
+            #                 input_ids=self.dev_data['input_ids'],
+            #                 attention_mask=self.dev_data['attention_mask'],
+            #                 mask_pos=self.dev_data['mask_pos'],
+            #             )['logits']
+
+            #     dev_loss, dev_perf = self.calc_metric(logits, self.dev_data['labels'])
+            #     # fitlog.add_metric(dev_perf, name='dev_acc', step=self.num_call)
+            #     if dev_perf > self.best_dev_perf:
+            #         self.best_dev_perf = dev_perf
+            #         # fitlog.add_best_metric(self.best_dev_perf, name='dev_acc')
+            #         self.best_prompt = copy.deepcopy(tmp_prompt)
+            #     # if self.save_path is not None:
+            #     #     with open(os.path.join(self.save_path, 'dev_acc.txt'), 'a') as fout:
+            #     #         fout.write('{}\t{}\n'.format(self.num_call, dev_loss))
+            #     print('Dev loss: {}. Dev perf: {}. Best dev perf: {}'.format(
+            #         round(float(dev_loss), 4),
+            #         round(float(dev_perf), 4),
+            #         round(float(self.best_dev_perf), 4)))
+            #     print('********* Done *********')
+            if self.parallel:
+                return all_losses
+            else:
+                return loss
+
+    def eval_multi_batch(self, prompt_embedding=None, test_data=None, return_pred=False):
+        self.num_call += 1
+        if prompt_embedding is None:
+            prompt_embedding = self.best_prompt
+        bsz = self.batch_size  # for test data
+        if isinstance(prompt_embedding, list):  # multiple queries
+            if self.args.norm_prompt:
+                for i in range(len(prompt_embedding)):
+                    if np.linalg.norm(prompt_embedding[i]) > self.args.prompt_norm_threshold:
+                        prompt_embedding[i] = (
+                            prompt_embedding[i] / np.linalg.norm(prompt_embedding[i]) * self.args.prompt_norm_threshold
+                        )
+            pe_list = []
+            for pe in prompt_embedding:
+                z = torch.tensor(pe).type(torch.float32)  # z
+                z = self.linear(z)  # Az
+                if self.init_prompt is not None:
+                    z = z + self.init_prompt  # Az + p_0
+                pe_list.append(z.reshape(self.n_prompt_tokens, -1).repeat(bsz, 1, 1))
+            prompt_embedding = torch.cat(pe_list)  # num_workers*bsz x prompt_len x dim
+            assert len(prompt_embedding) == len(self.train_data["input_ids"])
+        elif isinstance(prompt_embedding, np.ndarray):  # single query or None
+            if self.args.norm_prompt:
+                if np.linalg.norm(prompt_embedding) > self.args.prompt_norm_threshold:
+                    prompt_embedding = (
+                        prompt_embedding / np.linalg.norm(prompt_embedding) * self.args.prompt_norm_threshold
+                    )
+            prompt_embedding = torch.tensor(prompt_embedding).type(torch.float32)  # z
+            prompt_embedding = self.linear(prompt_embedding)  # Az
+            if self.init_prompt is not None:
+                prompt_embedding = prompt_embedding + self.init_prompt  # Az + p_0
+            prompt_embedding = prompt_embedding.reshape(self.n_prompt_tokens, -1).repeat(bsz, 1, 1)
+        else:
+            raise ValueError(
+                f"[Prompt Embedding] Only support [list, numpy.ndarray], got `{type(prompt_embedding)}` instead."
+            )
+
+        self.model.set_prompt_embedding(prompt_embedding)
+
+        if isinstance(test_data, DataSet):
+            if prompt_embedding.shape[0] > bsz:
+                raise ValueError("Provide a single prompt embedding for testing.")
+
+            test_tester = Tester(
+                data=test_data,
+                model=self.model,
+                metrics=self.metric,
+                batch_size=self.batch_size,
+                num_workers=1,
+                device=self.device,
+                use_tqdm=False,
+            )
+            results = test_tester.test()
+            test_acc = results[self.metric_name][self.metric_key]
+            # fitlog.add_best_metric(test_acc, name='test_acc')
+            return test_acc
+        else:
+            dataloader_train = DataSetIter(self.train_data, batch_size=self.batch_size, sampler=SequentialSampler())
+            loss_list = []
+            for train_data, train_label in dataloader_train:
+                for k, v in train_data.items():
+                    train_data[k] = v.to(self.device)
+                for k, v in train_label.items():
+                    train_label[k] = v.to(self.device)
+                if self.model_name in ["t5-small", "t5-base", "t5-large", "t5-3b"]:
+                    logits = self.model(
+                        input_ids=train_data["input_ids"],
+                        attention_mask=train_data["attention_mask"],
+                        decoder_input_ids=train_data["decoder_input_ids"],
+                        decoder_attention_mask=train_data["decoder_attention_mask"],
+                    )["logits"]
+                elif self.model_name in ["gpt2", "gpt2-medium", "gpt2-large", "gpt2-xl", "llama2"]:
+                    logits = self.model(
+                        input_ids=train_data["input_ids"],
+                        attention_mask=train_data["attention_mask"],
+                    )["logits"]
+                else:
+                    logits = self.model(
+                        input_ids=train_data["input_ids"],
+                        attention_mask=train_data["attention_mask"],
+                        mask_pos=train_data["mask_pos"],
+                    )["logits"]
+
+                loss, perf = self.calc_metric(logits, train_label["labels"])
+                loss_list.append(loss)
+
+            return np.average(loss_list)
+
+    def eval_perturb_multi_batch(self, prompt_embedding=None, test_data=None, return_pred=False):
+        self.num_call += 1
+        if prompt_embedding is None:
+            prompt_embedding = self.best_prompt
+        bsz = self.batch_size  # for test data
+        if isinstance(prompt_embedding, list):  # multiple queries
+            if self.args.norm_prompt:
+                for i in range(len(prompt_embedding)):
+                    if np.linalg.norm(prompt_embedding[i]) > self.args.prompt_norm_threshold:
+                        prompt_embedding[i] = (
+                            prompt_embedding[i] / np.linalg.norm(prompt_embedding[i]) * self.args.prompt_norm_threshold
+                        )
+            pe_list = []
+            for pe in prompt_embedding:
+                z = torch.tensor(pe).type(torch.float32)  # z
+                z = self.linear(z)  # Az
+                if self.init_prompt is not None:
+                    z = z + self.init_prompt  # Az + p_0
+                pe_list.append(z.reshape(self.n_prompt_tokens, -1).repeat(bsz, 1, 1))
+            prompt_embedding = torch.cat(pe_list)  # num_workers*bsz x prompt_len x dim
+            assert len(prompt_embedding) == len(self.train_data_aux["input_ids"])
+        elif isinstance(prompt_embedding, np.ndarray):  # single query or None
+            if self.args.norm_prompt:
+                if np.linalg.norm(prompt_embedding) > self.args.prompt_norm_threshold:
+                    prompt_embedding = (
+                        prompt_embedding / np.linalg.norm(prompt_embedding) * self.args.prompt_norm_threshold
+                    )
+            prompt_embedding = torch.tensor(prompt_embedding).type(torch.float32)  # z
+            prompt_embedding = self.linear(prompt_embedding)  # Az
+            if self.init_prompt is not None:
+                prompt_embedding = prompt_embedding + self.init_prompt  # Az + p_0
+            prompt_embedding = prompt_embedding.reshape(self.n_prompt_tokens, -1).repeat(bsz, 1, 1)
+        else:
+            raise ValueError(
+                f"[Prompt Embedding] Only support [list, numpy.ndarray], got `{type(prompt_embedding)}` instead."
+            )
+
+        self.model.set_prompt_embedding(prompt_embedding)
+
+        if isinstance(test_data, DataSet):
+            if prompt_embedding.shape[0] > bsz:
+                raise ValueError("Provide a single prompt embedding for testing.")
+
+            test_tester = Tester(
+                data=test_data,
+                model=self.model,
+                metrics=self.metric,
+                batch_size=self.batch_size,
+                num_workers=1,
+                device=self.device,
+                use_tqdm=False,
+            )
+            results = test_tester.test()
+            test_acc = results[self.metric_name][self.metric_key]
+            # fitlog.add_best_metric(test_acc, name='test_acc')
+            return test_acc
+        else:
+            dataloader_train = DataSetIter(self.train_data_aux, batch_size=bsz, sampler=SequentialSampler())
+            loss_list = []
+            for train_data, train_label in dataloader_train:
+                for k, v in train_data.items():
+                    train_data[k] = v.to(self.device)
+                for k, v in train_label.items():
+                    train_label[k] = v.to(self.device)
+                if self.model_name in ["t5-small", "t5-base", "t5-large", "t5-3b"]:
+                    logits = self.model(
+                        input_ids=train_data["input_ids"],
+                        attention_mask=train_data["attention_mask"],
+                        decoder_input_ids=train_data["decoder_input_ids"],
+                        decoder_attention_mask=train_data["decoder_attention_mask"],
+                    )["logits"]
+                elif self.model_name in ["gpt2", "gpt2-medium", "gpt2-large", "gpt2-xl", "llama2"]:
+                    logits = self.model(
+                        input_ids=train_data["input_ids"],
+                        attention_mask=train_data["attention_mask"],
+                    )["logits"]
+                else:
+                    logits = self.model(
+                        input_ids=train_data["input_ids"],
+                        attention_mask=train_data["attention_mask"],
+                        mask_pos=train_data["mask_pos"],
+                    )["logits"]
+
+                loss, perf = self.calc_metric(logits, train_label["labels"])
+                loss_list.append(loss)
+
+            return np.average(loss_list)
+
+
+class ClientLMForwardAPI(LMForwardAPI):
+    def __init__(self, args, train_data=None, dev_data=None, init_prompt_path=None, baseAPI=None):
+        super().__init__(args, train_data, dev_data, init_prompt_path)
+        if not isinstance(baseAPI, LMForwardAPI):
+            raise ValueError("Please provide a base API to initialize API for the clients")
+        self.model = baseAPI.model
+        self.tokenizer = baseAPI.tokenizer
+        self.config = baseAPI.config
+        self.metric = baseAPI.metric
+        self.metric_key = baseAPI.metric_key
+        self.metric_name = baseAPI.metric_name
+        self.linear = baseAPI.linear
diff --git a/research/fed-bpt/src/cma_decomposer.py b/research/fed-bpt/src/cma_decomposer.py
new file mode 100644
index 0000000000..b2c9828785
--- /dev/null
+++ b/research/fed-bpt/src/cma_decomposer.py
@@ -0,0 +1,116 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import copy
+from typing import Any, Type
+
+import cma
+import numpy as np
+from cma import CMADataLogger, CMAOptions
+from cma.constraints_handler import BoundNone
+from cma.evolution_strategy import _CMAParameters, _CMASolutionDict_functional, _CMAStopDict
+from cma.optimization_tools import BestSolution
+from cma.recombination_weights import RecombinationWeights
+from cma.sampler import GaussFullSampler
+from cma.sigma_adaptation import CMAAdaptSigmaCSA
+from cma.transformations import DiagonalDecoding, GenoPheno
+from cma.utilities.utils import BlancClass, DictFromTagsInString, ElapsedWCTime, MoreToWrite, SolutionDict
+
+from nvflare.app_common.decomposers.common_decomposers import Float64ScalarDecomposer, NumpyArrayDecomposer
+from nvflare.fuel.utils import fobs
+from nvflare.fuel.utils.fobs import Decomposer
+from nvflare.fuel.utils.fobs.datum import DatumManager
+
+
+class GaussFullSamplerDecomposer(Decomposer):
+    def supported_type(self) -> Type[GaussFullSampler]:
+        return GaussFullSampler
+
+    def decompose(self, target: GaussFullSampler, manager: DatumManager = None) -> Any:
+        target = copy.deepcopy(target)
+        members = vars(target)
+        # The functions can't be serialized
+        if "randn" in members:
+            del members["randn"]
+        if "eigenmethod" in members:
+            del members["eigenmethod"]
+        return members
+
+    def recompose(self, data: dict, manager: DatumManager = None) -> GaussFullSampler:
+        instance = GaussFullSampler.__new__(GaussFullSampler)
+
+        # Recreate the removed function fields
+        data["randn"] = np.random.randn
+        data["eigenmethod"] = np.linalg.eigh
+        instance.__dict__.update(data)
+        return instance
+
+
+class CMADataLoggerDecomposer(Decomposer):
+    def supported_type(self) -> Type[CMADataLogger]:
+        return CMADataLogger
+
+    def decompose(self, target: GaussFullSampler, manager: DatumManager = None) -> Any:
+        target = copy.deepcopy(target)
+        members = vars(target)
+
+        # This field causes a circular reference, FOBS doesn't support this.
+        if "es" in members:
+            del members["es"]
+
+        return members
+
+    def recompose(self, data: dict, manager: DatumManager = None) -> CMADataLogger:
+        instance = CMADataLogger.__new__(CMADataLogger)
+        instance.__dict__.update(data)
+        return instance
+
+
+def register_decomposers():
+    fobs.register(NumpyArrayDecomposer)
+    fobs.register(Float64ScalarDecomposer)
+    fobs.register(GaussFullSamplerDecomposer)
+    fobs.register(CMADataLoggerDecomposer)
+    fobs.register_data_classes(
+        cma.CMAEvolutionStrategy,
+        CMAOptions,
+        GenoPheno,
+        SolutionDict,
+        BoundNone,
+        _CMAParameters,
+        RecombinationWeights,
+        CMAAdaptSigmaCSA,
+        DiagonalDecoding,
+        _CMASolutionDict_functional,
+        BestSolution,
+        BlancClass,
+        CMADataLogger,
+        DictFromTagsInString,
+        ElapsedWCTime,
+        _CMAStopDict,
+        MoreToWrite,
+    )
+
+
+register_decomposers()
+
+es = cma.CMAEvolutionStrategy(4 * [5], 10, dict(ftarget=1e-9, seed=5))
+
+buffer = fobs.dumps(es)
+print(f"Encoded size: {len(buffer)}")
+
+new_es = fobs.loads(buffer)
+new_es.logger.register(new_es)
+
+print(new_es)
diff --git a/research/fed-bpt/src/data_process.py b/research/fed-bpt/src/data_process.py
new file mode 100644
index 0000000000..b03b0b9f93
--- /dev/null
+++ b/research/fed-bpt/src/data_process.py
@@ -0,0 +1,391 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Part of this code is adopted from BBT (https://github.com/txsun1997/Black-Box-Tuning)
+
+# MIT License
+#
+# Copyright (c) 2022 Tianxiang Sun
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+import os
+
+os.environ["TOKENIZERS_PARALLELISM"] = "false"
+import copy
+import random
+
+import numpy as np
+import torch
+from cvxopt import matrix, solvers
+from fastNLP import DataSet, cache_results
+from numpy.random import RandomState
+from torch.utils.data import Dataset
+from transformers import (
+    AutoTokenizer,
+    BartTokenizer,
+    BertTokenizer,
+    ElectraTokenizer,
+    GPT2Tokenizer,
+    RobertaTokenizer,
+    T5Tokenizer,
+)
+
+cache_fn = None
+
+
+class data_processor:
+    def __init__(self, args) -> None:
+        # below are free hyper-params
+        self.model_name = args.model_name
+        if self.model_name in ["t5-small", "t5-base", "t5-large", "t5-3b"]:
+            from dataloaders.dataloader_t5 import (
+                AGNewsLoader,
+                DBPediaLoader,
+                MRPCLoader,
+                RTELoader,
+                SNLILoader,
+                SST2Loader,
+                YelpPLoader,
+            )
+        elif self.model_name in ["gpt2", "gpt2-medium", "gpt2-large", "gpt2-xl"]:
+            from dataloaders.dataloader_gpt import (
+                AGNewsLoader,
+                DBPediaLoader,
+                MRPCLoader,
+                RTELoader,
+                SNLILoader,
+                SST2Loader,
+                YelpPLoader,
+            )
+        elif self.model_name in ["fnlp/cpt-large"]:
+            from dataloaders.dataloader_cpt import (
+                AmazonLoader,
+                BQLoader,
+                C3Loader,
+                CCPMLoader,
+                ChnSentLoader,
+                CMNLILoader,
+                LCQMCLoader,
+                OCNLILoader,
+                THUCNewsLoader,
+                TNewsLoader,
+            )
+        elif self.model_name in ["llama2"]:
+            from dataloaders.dataloader_llama import (
+                AGNewsLoader,
+                DBPediaLoader,
+                MRPCLoader,
+                RTELoader,
+                SNLILoader,
+                SST2Loader,
+                YelpPLoader,
+            )
+        else:
+            from dataloaders.dataloader import (
+                AGNewsLoader,
+                DBPediaLoader,
+                MRPCLoader,
+                RTELoader,
+                SNLILoader,
+                SST2Loader,
+                YelpPLoader,
+            )
+
+        self.task_name = args.task_name
+        self.n_prompt_tokens = args.n_prompt_tokens
+
+        self.seed = args.seed
+
+        # if task_name in ['mrpc', 'snli', 'qnli', 'rte']:
+        #     args.cat_or_add = 'cat'
+        self.cat_or_add = args.cat_or_add
+
+        if self.task_name in ["sst2", "yelpp", "rte", "mrpc", "chnsent", "lcqmc", "bq"]:
+            num_labels = 2
+        elif self.task_name in ["snli", "cmnli", "ocnli"]:
+            num_labels = 3
+        elif self.task_name in ["agnews", "ccpm", "c3"]:
+            num_labels = 4
+        elif self.task_name in ["amazon"]:
+            num_labels = 5
+        elif self.task_name in ["thucnews"]:
+            num_labels = 10
+        elif self.task_name in ["dbpedia", "tnews"]:
+            num_labels = 14
+        else:
+            raise ValueError
+
+        # log_dir = './logs'
+        # fitlog.set_log_dir(log_dir)
+        # fitlog.commit(__file__, fit_msg=save_path)
+        # fitlog.add_hyper(args)
+        # fitlog.add_hyper_in_file(__file__)
+
+        random.seed(self.seed)
+        np.random.seed(self.seed)
+        torch.manual_seed(self.seed)
+
+        if self.model_name in ["roberta-base", "roberta-large"]:
+            self.tokenizer = RobertaTokenizer.from_pretrained(self.model_name)
+        elif self.model_name in ["bert-base-uncased", "bert-large-uncased", "fnlp/cpt-large"]:
+            self.tokenizer = BertTokenizer.from_pretrained(self.model_name)
+        elif self.model_name in ["google/electra-base-generator", "google/electra-large-generator"]:
+            self.tokenizer = ElectraTokenizer.from_pretrained(self.model_name)
+        elif self.model_name in ["facebook/bart-base", "facebook/bart-large"]:
+            self.tokenizer = BartTokenizer.from_pretrained(self.model_name)
+        elif self.model_name in ["t5-small", "t5-base", "t5-large", "t5-3b"]:
+            self.tokenizer = T5Tokenizer.from_pretrained(self.model_name)
+        elif self.model_name in ["gpt2", "gpt2-medium", "gpt2-large", "gpt2-xl"]:
+            self.tokenizer = GPT2Tokenizer.from_pretrained(self.model_name)
+        elif self.model_name in ["llama2"]:
+            self.tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf")
+            self.tokenizer.pad_token = self.tokenizer.unk_token
+        else:
+            raise NotImplementedError
+
+        global cache_fn
+        cache_fn = (
+            f"caches/data_{self.model_name.replace('/', '-')}_{self.task_name}_{self.n_prompt_tokens}_{self.seed}.pt"
+        )
+
+        if self.model_name not in ["fnlp/cpt-large"]:
+            self.DataLoader = {
+                "sst2": SST2Loader,
+                "agnews": AGNewsLoader,
+                "yelpp": YelpPLoader,
+                "dbpedia": DBPediaLoader,
+                "rte": RTELoader,
+                "mrpc": MRPCLoader,
+                "snli": SNLILoader,
+            }
+        else:
+            self.DataLoader = {
+                "chnsent": ChnSentLoader,
+                "thucnews": THUCNewsLoader,
+                "lcqmc": LCQMCLoader,
+                "cmnli": CMNLILoader,
+                "ocnli": OCNLILoader,
+                "amazon": AmazonLoader,
+                "bq": BQLoader,
+                "ccpm": CCPMLoader,
+                "tnews": TNewsLoader,
+                "c3": C3Loader,
+            }
+
+    # @cache_results(cache_fn, _refresh=False)
+    def get_data(self):
+        if self.task_name in ["agnews", "yelpp", "dbpedia", "snli"]:
+            splits = ["train", "test"]
+        else:  # for datasets without test set, we use dev set
+            splits = ["train", "validation"]
+        if self.cat_or_add == "cat":
+            data_bundle = self.DataLoader[self.task_name](tokenizer=self.tokenizer, n_prompt_tokens=0).my_load(splits)
+        else:
+            data_bundle = self.DataLoader[self.task_name](
+                tokenizer=self.tokenizer, n_prompt_tokens=self.n_prompt_tokens
+            ).my_load(splits)
+        return data_bundle
+
+
+def construct_true_few_shot_data(args, train_data, k_shot):
+    train_label_count = {}
+    dev_label_count = {}
+    new_train_data = DataSet()
+    new_dev_data = DataSet()
+    all_indices = [_ for _ in range(len(train_data))]
+    np.random.shuffle(all_indices)
+
+    if k_shot < 0:
+        idxs_train = np.random.choice(len(train_data), int(len(train_data) * 0.9), replace=False)
+        idxs_dev = list(set(range(len(train_data))) - set(idxs_train))
+        new_train_data = train_data[idxs_train.tolist()]
+        new_dev_data = train_data[np.array(idxs_dev).tolist()]
+
+    else:
+        for index in all_indices:
+            label = train_data[index]["labels"]
+            if label < 0:
+                continue
+
+            if label not in train_label_count:
+                train_label_count[label] = 0
+            if label not in dev_label_count:
+                dev_label_count[label] = 0
+
+            if train_label_count[label] < k_shot:
+                new_train_data.append(train_data[index])
+                train_label_count[label] += 1
+            elif dev_label_count[label] < k_shot:
+                new_dev_data.append(train_data[index])
+                dev_label_count[label] += 1
+
+    if args.model_name in ["t5-small", "t5-base", "t5-large", "t5-3b"]:
+        new_train_data.set_input("input_ids", "attention_mask", "decoder_input_ids", "decoder_attention_mask")
+        new_dev_data.set_input("input_ids", "attention_mask", "decoder_input_ids", "decoder_attention_mask")
+    elif args.model_name in ["gpt2", "gpt2-medium", "gpt2-large", "gpt2-xl", "llama2"]:
+        new_train_data.set_input("input_ids", "attention_mask")
+        new_dev_data.set_input("input_ids", "attention_mask")
+    else:
+        new_train_data.set_input("input_ids", "attention_mask", "mask_pos")
+        new_dev_data.set_input("input_ids", "attention_mask", "mask_pos")
+
+    new_train_data.set_target("labels")
+    new_dev_data.set_target("labels")
+    return new_train_data, new_dev_data
+
+
+def split_data(args, train_data, dev_data):
+    train_data_idxs = [i for i in range(len(train_data))]
+    dev_data_idxs = [i for i in range(len(dev_data))]
+    user_dict_train, user_dict_dev = {}, {}
+    num_items_train = int(len(train_data) / args.num_users)
+    num_items_dev = int(len(dev_data) / args.num_users)
+
+    if args.iid == 1:
+        for i in range(args.num_users):
+            user_dict_train[i] = set(np.random.choice(train_data_idxs, num_items_train, replace=False))
+            user_dict_dev[i] = set(np.random.choice(dev_data_idxs, num_items_dev, replace=False))
+            train_data_idxs = list(set(train_data_idxs) - user_dict_train[i])
+            dev_data_idxs = list(set(dev_data_idxs) - user_dict_dev[i])
+
+    if args.iid == 0:
+        rs = RandomState(args.seed)
+        user_dict_train, _ = Dirichlet_noniid(train_data, args.num_users, args.alpha_dir, rs)
+        for i in range(args.num_users):
+            user_dict_dev[i] = set(np.random.choice(dev_data_idxs, num_items_dev, replace=False))
+            dev_data_idxs = list(set(dev_data_idxs) - user_dict_dev[i])
+
+    return user_dict_train, user_dict_dev
+
+
+def Dirichlet_noniid(dataset, num_users, alpha, rs):
+    """
+    Sample dataset with dirichlet distribution and concentration parameter alpha
+    """
+    # img_num_per_client = len(dataset)//num_users
+    dict_users = {i: np.array([], dtype=np.int64) for i in range(num_users)}
+    idxs = np.arange(len(dataset))
+    labels = np.array(dataset["labels"])
+    classes = np.unique(labels)
+    num_classes = len(classes)
+    labels_idxs = []
+    prior_class_distribution = np.zeros(num_classes)
+    b = np.zeros(num_classes)
+    for i in range(num_classes):
+        labels_idxs.append(idxs[labels == classes[i]])
+        prior_class_distribution[i] = len(labels_idxs[i]) / len(dataset)
+        b[i] = len(labels_idxs[i])
+
+    data_ratio = np.zeros([num_classes, num_users])
+
+    if isinstance(alpha, list):
+        for i in range(num_users):
+            data_ratio[:, i] = rs.dirichlet(prior_class_distribution * alpha[i])
+    else:
+        data_ratio = np.transpose(rs.dirichlet(prior_class_distribution * alpha, size=num_users))
+    # data_ratio = data_ratio/np.sum(data_ratio,axis=1,keepdims=True)
+    # Client_DataSize = len(dataset)//num_users*np.ones([num_users,1],dtype=np.int64)
+    print(f"Class_distribution {prior_class_distribution}. Data_ratio {data_ratio}")
+    A = matrix(data_ratio)
+    b = matrix(b)
+    G = matrix(-np.eye(num_users))
+    h = matrix(np.zeros([num_users, 1]))
+    P = matrix(np.eye(num_users))
+    q = matrix(np.zeros([num_users, 1]))
+    try:
+        results = solvers.qp(P, q, G, h, A, b)
+        Client_DataSize = np.array(results["x"])
+        Data_Division = data_ratio * np.transpose(Client_DataSize)
+    except ValueError:
+        prior_user_distribution = np.array([1 / num_users for _ in range(num_users)])
+        data_ratio = rs.dirichlet(prior_user_distribution * alpha, size=num_classes)
+        Class_DataSize = np.array([int(len(dataset) / num_classes) for _ in range(num_classes)])
+        Data_Division = (data_ratio.T * Class_DataSize).T
+    # print(Client_DataSize)
+    print(Data_Division)
+    print(np.sum(Data_Division, axis=0))
+    print(np.sum(Data_Division, axis=1))
+    rest = []
+    for label in range(num_classes):
+        for client in range(num_users):
+            data_idx = rs.choice(labels_idxs[label], int(Data_Division[label, client]), replace=False)
+            dict_users[client] = np.concatenate([dict_users[client], data_idx], 0)
+            labels_idxs[label] = list(set(labels_idxs[label]) - set(data_idx))
+        rest = rest + labels_idxs[label]
+
+    rest_clients = rs.choice(range(num_users), len(rest), replace=True)
+
+    for n, user in enumerate(rest_clients):
+        dict_users[user] = np.append(dict_users[user], rest[n])
+
+    for user in range(num_users):
+        rs.shuffle(dict_users[user])
+    return dict_users, data_ratio
+
+
+def perturb_dataset(args, dataset, config):
+    pert_dataset = copy.deepcopy(dataset)
+    if isinstance(dataset, dict):
+        preserve_mask = torch.ones_like(dataset["input_ids"])
+        random_text = torch.randint_like(dataset["input_ids"], 0, config.vocab_size)
+        replace_mask = torch.bernoulli(args.perturb_rate * dataset["attention_mask"]).long()
+        preserve_mask -= replace_mask
+        pert_dataset["input_ids"] = pert_dataset["input_ids"] * preserve_mask + random_text * replace_mask
+        return pert_dataset
+    else:
+        input_content = torch.tensor(pert_dataset["input_ids"].get(range(len(dataset))))
+        preserve_mask = torch.ones_like(input_content)
+        random_text = torch.randint_like(input_content, 0, config.vocab_size)
+        replace_mask = torch.bernoulli(
+            args.perturb_rate * torch.tensor(dataset["attention_mask"].get(range(len(dataset))))
+        ).long()
+        preserve_mask -= replace_mask
+        pert_dataset["input_ids"].content = (input_content * preserve_mask + random_text * replace_mask).tolist()
+        pert_dataset["attention_mask"].content = torch.tensor(
+            pert_dataset["attention_mask"].get(range(len(dataset)))
+        ).tolist()
+        pert_dataset["mask_pos"].content = torch.tensor(pert_dataset["mask_pos"].get(range(len(dataset)))).tolist()
+        pert_dataset["labels"].content = torch.tensor(pert_dataset["labels"].get(range(len(dataset)))).tolist()
+        return pert_dataset
+
+
+class DatasetSplit(Dataset):
+    """An abstract Dataset class wrapped around Pytorch Dataset class."""
+
+    def __init__(self, dataset, idxs):
+        self.dataset = dataset
+        self.idxs = [int(i) for i in idxs]
+
+    def __len__(self):
+        return len(self.idxs)
+
+    def __getitem__(self, item):
+        return self.dataset[item]
diff --git a/research/fed-bpt/src/dataloaders/dataloader.py b/research/fed-bpt/src/dataloaders/dataloader.py
new file mode 100644
index 0000000000..fc31a8a106
--- /dev/null
+++ b/research/fed-bpt/src/dataloaders/dataloader.py
@@ -0,0 +1,482 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Part of this code is adopted from BBT (https://github.com/txsun1997/Black-Box-Tuning)
+
+# MIT License
+#
+# Copyright (c) 2022 Tianxiang Sun
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+from functools import partial
+
+import datasets
+from fastNLP import DataSet, Instance
+from fastNLP.io import DataBundle, Loader
+from transformers import RobertaTokenizer
+
+
+def convert_to_features(example_batch, tokenizer):
+    input_encodings = tokenizer.batch_encode_plus(example_batch["input_text"])
+    target_encodings = tokenizer.batch_encode_plus(example_batch["target_text"], add_special_tokens=False)
+    mask_pos = []
+    for input_ids in input_encodings["input_ids"]:
+        mask_pos.append(input_ids.index(tokenizer.mask_token_id))
+    encodings = {
+        "input_ids": input_encodings["input_ids"],
+        "attention_mask": input_encodings["attention_mask"],
+        "mask_pos": mask_pos,
+        "labels": target_encodings["input_ids"],
+    }
+
+    return encodings
+
+
+class SST2Loader(Loader):
+    def __init__(self, tokenizer=None, n_prompt_tokens=50):
+        super().__init__()
+        if tokenizer is None:
+            self.tokenizer = RobertaTokenizer.from_pretrained("roberta-large")
+        else:
+            self.tokenizer = tokenizer
+        self.n_prompt_tokens = n_prompt_tokens
+        self.label2text = {
+            0: "bad",
+            1: "great",
+        }
+
+    def convert_examples(self, example):
+        if self.n_prompt_tokens > 0:  # use randomly selected words as initial prompt
+            offset = 1000
+            prompt = self.tokenizer.decode(list(range(offset, offset + self.n_prompt_tokens)))
+            example["input_text"] = "%s . %s . It was %s ." % (prompt, example["sentence"], self.tokenizer.mask_token)
+            example["target_text"] = self.label2text[example["label"]]
+        else:
+            example["input_text"] = "%s . It was %s ." % (example["sentence"], self.tokenizer.mask_token)
+            example["target_text"] = self.label2text[example["label"]]
+        return example
+
+    def _load(self, split) -> DataSet:
+        # load dataset with Huggingface's Datasets
+        dataset = datasets.load_dataset("glue", "sst2", split=split)
+        dataset = dataset.map(self.convert_examples, load_from_cache_file=False)
+        print("Example in {} set:".format(split))
+        print(dataset[0])
+        dataset = dataset.map(
+            partial(convert_to_features, tokenizer=self.tokenizer), batched=True, load_from_cache_file=False
+        )
+        # Convert to fastNLP.DataSet
+        ds = DataSet()
+        for ins in dataset:
+            if len(ins["input_ids"]) <= 512:
+                example = {
+                    "input_ids": ins["input_ids"],
+                    "attention_mask": ins["attention_mask"],
+                    "mask_pos": ins["mask_pos"],
+                    "labels": ins["labels"][0],
+                }
+                ds.append(Instance(**example))
+        ds.set_input("input_ids", "attention_mask", "mask_pos")
+        ds.set_target("labels")
+        return ds
+
+    def my_load(self, splits) -> DataBundle:
+        datasets = {name: self._load(name) for name in splits}
+        data_bundle = DataBundle(datasets=datasets)
+        return data_bundle
+
+
+class YelpPLoader(Loader):
+    def __init__(self, tokenizer=None, n_prompt_tokens=50):
+        super().__init__()
+        if tokenizer is None:
+            self.tokenizer = RobertaTokenizer.from_pretrained("roberta-large")
+        else:
+            self.tokenizer = tokenizer
+        self.n_prompt_tokens = n_prompt_tokens
+        self.label2text = {
+            0: "bad",
+            1: "great",
+        }
+
+    def convert_examples(self, example):
+        if self.n_prompt_tokens > 0:  # use randomly selected words as initial prompt
+            offset = 1000
+            prompt = self.tokenizer.decode(list(range(offset, offset + self.n_prompt_tokens)))
+            example["input_text"] = "%s . %s . It was %s ." % (
+                prompt,
+                example["text"].replace("\\n", " "),
+                self.tokenizer.mask_token,
+            )
+            example["target_text"] = self.label2text[example["label"]]
+        else:
+            example["input_text"] = "%s . It was %s ." % (
+                example["text"].replace("\\n", " "),
+                self.tokenizer.mask_token,
+            )
+            example["target_text"] = self.label2text[example["label"]]
+        return example
+
+    def _load(self, split) -> DataSet:
+        # load dataset with Huggingface's Datasets
+        dataset = datasets.load_dataset("yelp_polarity", "plain_text", split=split)
+        dataset = dataset.map(self.convert_examples, load_from_cache_file=False)
+        print(dataset[0])
+        dataset = dataset.map(
+            partial(convert_to_features, tokenizer=self.tokenizer), batched=True, load_from_cache_file=False
+        )
+        # Convert to fastNLP.DataSet
+        ds = DataSet()
+        for ins in dataset:
+            if len(ins["input_ids"]) <= 512:
+                example = {
+                    "input_ids": ins["input_ids"],
+                    "attention_mask": ins["attention_mask"],
+                    "mask_pos": ins["mask_pos"],
+                    "labels": ins["labels"][0],
+                }
+                ds.append(Instance(**example))
+        ds.set_input("input_ids", "attention_mask", "mask_pos")
+        ds.set_target("labels")
+        return ds
+
+    def my_load(self, splits) -> DataBundle:
+        datasets = {name: self._load(name) for name in splits}
+        data_bundle = DataBundle(datasets=datasets)
+        return data_bundle
+
+
+class AGNewsLoader(Loader):
+    def __init__(self, tokenizer=None, n_prompt_tokens=50):
+        super().__init__()
+        if tokenizer is None:
+            self.tokenizer = RobertaTokenizer.from_pretrained("roberta-large")
+        else:
+            self.tokenizer = tokenizer
+        self.n_prompt_tokens = n_prompt_tokens
+        self.label2text = {0: "World", 1: "Sports", 2: "Business", 3: "Tech"}
+
+    def convert_examples(self, example):
+        if self.n_prompt_tokens > 0:  # use randomly selected words as initial prompt
+            offset = 1000
+            prompt = self.tokenizer.decode(list(range(offset, offset + self.n_prompt_tokens)))
+            example["input_text"] = "%s . %s News: %s" % (prompt, self.tokenizer.mask_token, example["text"])
+            example["target_text"] = self.label2text[example["label"]]
+        else:
+            example["input_text"] = "%s News: %s" % (self.tokenizer.mask_token, example["text"])
+            example["target_text"] = self.label2text[example["label"]]
+        return example
+
+    def _load(self, split) -> DataSet:
+        # load dataset with Huggingface's Datasets
+        dataset = datasets.load_dataset("ag_news", "default", split=split)
+        dataset = dataset.map(self.convert_examples, load_from_cache_file=False)
+        print(dataset[0])
+        dataset = dataset.map(
+            partial(convert_to_features, tokenizer=self.tokenizer), batched=True, load_from_cache_file=False
+        )
+        # Convert to fastNLP.DataSet
+        ds = DataSet()
+        for ins in dataset:
+            if len(ins["input_ids"]) <= 512:
+                example = {
+                    "input_ids": ins["input_ids"],
+                    "attention_mask": ins["attention_mask"],
+                    "mask_pos": ins["mask_pos"],
+                    "labels": ins["labels"][0],
+                }
+                ds.append(Instance(**example))
+        ds.set_input("input_ids", "attention_mask", "mask_pos")
+        ds.set_target("labels")
+        return ds
+
+    def my_load(self, splits) -> DataBundle:
+        datasets = {name: self._load(name) for name in splits}
+        data_bundle = DataBundle(datasets=datasets)
+        return data_bundle
+
+
+class DBPediaLoader(Loader):
+    def __init__(self, tokenizer=None, n_prompt_tokens=50):
+        super().__init__()
+        if tokenizer is None:
+            self.tokenizer = RobertaTokenizer.from_pretrained("roberta-large")
+        else:
+            self.tokenizer = tokenizer
+        self.n_prompt_tokens = n_prompt_tokens
+        self.label2text = {
+            0: "Company",
+            1: "Education",
+            2: "Artist",
+            3: "Athlete",
+            4: "Office",
+            5: "Transportation",
+            6: "Building",
+            7: "Natural",
+            8: "Village",
+            9: "Animal",
+            10: "Plant",
+            11: "Album",
+            12: "Film",
+            13: "Written",
+        }
+
+    def convert_examples(self, example):
+        if self.n_prompt_tokens > 0:  # use randomly selected words as initial prompt
+            offset = 1000
+            prompt = self.tokenizer.decode(list(range(offset, offset + self.n_prompt_tokens)))
+            example["input_text"] = "%s [ Category: %s ] %s" % (
+                prompt,
+                self.tokenizer.mask_token,
+                example["content"].strip(),
+            )
+            example["target_text"] = self.label2text[example["label"]]
+        else:
+            example["input_text"] = "[ Category: %s ] %s" % (self.tokenizer.mask_token, example["content"].strip())
+            example["target_text"] = self.label2text[example["label"]]
+        return example
+
+    def _load(self, split) -> DataSet:
+        # load dataset with Huggingface's Datasets
+        dataset = datasets.load_dataset("dbpedia_14", split=split)
+        # dataset = datasets.load_dataset('./data/dbpedia.py', split=split)  # if you cannot reach the source of dbpedia, try this
+        dataset = dataset.map(self.convert_examples, load_from_cache_file=False)
+        print(dataset[0])
+        dataset = dataset.map(
+            partial(convert_to_features, tokenizer=self.tokenizer), batched=True, load_from_cache_file=False
+        )
+        # Convert to fastNLP.DataSet
+        ds = DataSet()
+        for ins in dataset:
+            if len(ins["input_ids"]) <= 512:
+                example = {
+                    "input_ids": ins["input_ids"],
+                    "attention_mask": ins["attention_mask"],
+                    "mask_pos": ins["mask_pos"],
+                    "labels": ins["labels"][0],
+                }
+                ds.append(Instance(**example))
+        ds.set_input("input_ids", "attention_mask", "mask_pos")
+        ds.set_target("labels")
+        return ds
+
+    def my_load(self, splits) -> DataBundle:
+        datasets = {name: self._load(name) for name in splits}
+        data_bundle = DataBundle(datasets=datasets)
+        return data_bundle
+
+
+class MRPCLoader(Loader):
+    def __init__(self, tokenizer=None, n_prompt_tokens=50):
+        super().__init__()
+        if tokenizer is None:
+            self.tokenizer = RobertaTokenizer.from_pretrained("roberta-large")
+        else:
+            self.tokenizer = tokenizer
+        self.n_prompt_tokens = n_prompt_tokens
+        self.label2text = {
+            0: "No",
+            1: "Yes",
+        }
+
+    def convert_examples(self, example):
+        if self.n_prompt_tokens > 0:  # use randomly selected words as initial prompt
+            offset = 1000
+            prompt = self.tokenizer.decode(list(range(offset, offset + self.n_prompt_tokens)))
+            example["input_text"] = "%s . %s ? %s , %s" % (
+                prompt,
+                example["sentence1"],
+                self.tokenizer.mask_token,
+                example["sentence2"],
+            )
+            example["target_text"] = self.label2text[example["label"]]
+        else:
+            example["input_text"] = "%s ? %s , %s" % (
+                example["sentence1"],
+                self.tokenizer.mask_token,
+                example["sentence2"],
+            )
+            example["target_text"] = self.label2text[example["label"]]
+        return example
+
+    def _load(self, split) -> DataSet:
+        # load dataset with Huggingface's Datasets
+        dataset = datasets.load_dataset("glue", "mrpc", split=split)
+        dataset = dataset.map(self.convert_examples, load_from_cache_file=False)
+        print(dataset[0])
+        dataset = dataset.map(
+            partial(convert_to_features, tokenizer=self.tokenizer), batched=True, load_from_cache_file=False
+        )
+        # Convert to fastNLP.DataSet
+        ds = DataSet()
+        for ins in dataset:
+            if len(ins["input_ids"]) <= 512:
+                example = {
+                    "input_ids": ins["input_ids"],
+                    "attention_mask": ins["attention_mask"],
+                    "mask_pos": ins["mask_pos"],
+                    "labels": ins["labels"][0],
+                }
+                ds.append(Instance(**example))
+        ds.set_input("input_ids", "attention_mask", "mask_pos")
+        ds.set_target("labels")
+        return ds
+
+    def my_load(self, splits) -> DataBundle:
+        datasets = {name: self._load(name) for name in splits}
+        data_bundle = DataBundle(datasets=datasets)
+        return data_bundle
+
+
+class RTELoader(Loader):
+    def __init__(self, tokenizer=None, n_prompt_tokens=50):
+        super().__init__()
+        if tokenizer is None:
+            self.tokenizer = RobertaTokenizer.from_pretrained("roberta-large")
+        else:
+            self.tokenizer = tokenizer
+        self.n_prompt_tokens = n_prompt_tokens
+        self.label2text = {
+            0: "Yes",
+            1: "No",
+        }
+
+    def convert_examples(self, example):
+        if self.n_prompt_tokens > 0:  # use randomly selected words as initial prompt
+            offset = 1000
+            prompt = self.tokenizer.decode(list(range(offset, offset + self.n_prompt_tokens)))
+            example["input_text"] = "%s . %s ? %s , %s" % (
+                prompt,
+                example["sentence1"],
+                self.tokenizer.mask_token,
+                example["sentence2"],
+            )
+            example["target_text"] = self.label2text[example["label"]]
+        else:
+            example["input_text"] = "%s ? %s , %s" % (
+                example["sentence1"],
+                self.tokenizer.mask_token,
+                example["sentence2"],
+            )
+            example["target_text"] = self.label2text[example["label"]]
+        return example
+
+    def _load(self, split) -> DataSet:
+        # load dataset with Huggingface's Datasets
+        dataset = datasets.load_dataset("glue", "rte", split=split)
+        dataset = dataset.map(self.convert_examples, load_from_cache_file=False)
+        print(dataset[0])
+        dataset = dataset.map(
+            partial(convert_to_features, tokenizer=self.tokenizer), batched=True, load_from_cache_file=False
+        )
+        # Convert to fastNLP.DataSet
+        ds = DataSet()
+        for ins in dataset:
+            if len(ins["input_ids"]) <= 512:
+                example = {
+                    "input_ids": ins["input_ids"],
+                    "attention_mask": ins["attention_mask"],
+                    "mask_pos": ins["mask_pos"],
+                    "labels": ins["labels"][0],
+                }
+                ds.append(Instance(**example))
+        ds.set_input("input_ids", "attention_mask", "mask_pos")
+        ds.set_target("labels")
+        return ds
+
+    def my_load(self, splits) -> DataBundle:
+        datasets = {name: self._load(name) for name in splits}
+        data_bundle = DataBundle(datasets=datasets)
+        return data_bundle
+
+
+class SNLILoader(Loader):
+    def __init__(self, tokenizer=None, n_prompt_tokens=50):
+        super().__init__()
+        if tokenizer is None:
+            self.tokenizer = RobertaTokenizer.from_pretrained("roberta-large")
+        else:
+            self.tokenizer = tokenizer
+        self.n_prompt_tokens = n_prompt_tokens
+        self.label2text = {
+            0: "Yes",
+            1: "Maybe",
+            2: "No",
+        }
+
+    def convert_examples(self, example):
+        if self.n_prompt_tokens > 0:  # use randomly selected words as initial prompt
+            offset = 1000
+            prompt = self.tokenizer.decode(list(range(offset, offset + self.n_prompt_tokens)))
+            example["input_text"] = "%s . %s ? %s , %s" % (
+                prompt,
+                example["premise"],
+                self.tokenizer.mask_token,
+                example["hypothesis"],
+            )
+            example["target_text"] = self.label2text[example["label"]]
+        else:
+            example["input_text"] = "%s ? %s , %s" % (
+                example["premise"],
+                self.tokenizer.mask_token,
+                example["hypothesis"],
+            )
+            example["target_text"] = self.label2text[example["label"]]
+        return example
+
+    def _load(self, split) -> DataSet:
+        # load dataset with Huggingface's Datasets
+        dataset = datasets.load_dataset("snli", split=split)
+        dataset = dataset.filter(lambda example: example["label"] in [0, 1, 2])
+        dataset = dataset.map(self.convert_examples, load_from_cache_file=False)
+        print(dataset[0])
+        dataset = dataset.map(
+            partial(convert_to_features, tokenizer=self.tokenizer), batched=True, load_from_cache_file=False
+        )
+        # Convert to fastNLP.DataSet
+        ds = DataSet()
+        for ins in dataset:
+            if len(ins["input_ids"]) <= 512:
+                example = {
+                    "input_ids": ins["input_ids"],
+                    "attention_mask": ins["attention_mask"],
+                    "mask_pos": ins["mask_pos"],
+                    "labels": ins["labels"][0],
+                }
+                ds.append(Instance(**example))
+        ds.set_input("input_ids", "attention_mask", "mask_pos")
+        ds.set_target("labels")
+        return ds
+
+    def my_load(self, splits) -> DataBundle:
+        datasets = {name: self._load(name) for name in splits}
+        data_bundle = DataBundle(datasets=datasets)
+        return data_bundle
diff --git a/research/fed-bpt/src/decomposer_widget.py b/research/fed-bpt/src/decomposer_widget.py
new file mode 100644
index 0000000000..efa22547aa
--- /dev/null
+++ b/research/fed-bpt/src/decomposer_widget.py
@@ -0,0 +1,30 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from cma_decomposer import register_decomposers
+
+from nvflare.apis.event_type import EventType
+from nvflare.apis.fl_context import FLContext
+from nvflare.widgets.widget import Widget
+
+
+class RegisterDecomposer(Widget):
+    def __init__(self):
+        """Handler to register CMA decomposers."""
+        super().__init__()
+
+    def handle_event(self, event_type: str, fl_ctx: FLContext):
+        if event_type == EventType.START_RUN:
+            # We serialize CMAEvolutionStrategy object directly. This requires registering custom decomposers.
+            register_decomposers()
diff --git a/research/fed-bpt/src/fedbpt_train.py b/research/fed-bpt/src/fedbpt_train.py
new file mode 100644
index 0000000000..149f7c85be
--- /dev/null
+++ b/research/fed-bpt/src/fedbpt_train.py
@@ -0,0 +1,364 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Part of this code is adopted from BBT (https://github.com/txsun1997/Black-Box-Tuning)
+
+# MIT License
+#
+# Copyright (c) 2022 Tianxiang Sun
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+from cma_decomposer import register_decomposers
+
+import nvflare.client as flare
+from nvflare.client.tracking import SummaryWriter
+
+# initializes NVFlare client API
+flare.init()
+# We serialize CMAEvolutionStrategy object directly. This requires registering custom decomposers.
+register_decomposers()
+# Use SummaryWriter to stream metrics to the server
+writer = SummaryWriter()
+
+import os
+
+os.environ["TOKENIZERS_PARALLELISM"] = "false"
+import argparse
+import copy
+import random
+import time
+import warnings
+
+import cma
+import numpy as np
+import torch
+
+warnings.simplefilter("ignore", cma.evolution_strategy.InjectionWarning)
+
+from cma.recombination_weights import RecombinationWeights
+from data_process import construct_true_few_shot_data, data_processor, perturb_dataset, split_data
+from LMForwardAPI import LMForwardAPI
+
+parser = argparse.ArgumentParser()
+parser.add_argument("--model_name", default="roberta-large", choices=["roberta-base", "roberta-large"], type=str)
+parser.add_argument("--task_name", default="sst2", type=str)
+parser.add_argument("--n_prompt_tokens", default=50, type=int)
+parser.add_argument("--intrinsic_dim", default=500, type=int)
+parser.add_argument("--k_shot", default=16, type=int)
+parser.add_argument("--batch_size", default=32, type=int)
+parser.add_argument("--bound", default=0, type=int)
+parser.add_argument("--sigma", default=1, type=float)
+parser.add_argument("--alpha", default=1, type=float)
+parser.add_argument("--print_every", default=50, type=int)
+parser.add_argument("--eval_every", default=100, type=int)
+parser.add_argument("--device", default="cuda:0", type=str)
+parser.add_argument("--alg", default="CMA", type=str)
+parser.add_argument("--random_proj", default="normal", type=str)
+parser.add_argument("--seed", default=42, type=int)
+parser.add_argument("--loss_type", default="ce", type=str)
+parser.add_argument("--cat_or_add", default="add", type=str)
+parser.add_argument("--parallel", action="store_true", help="Whether to allow parallel evaluation")
+# fl args
+parser.add_argument("--num_users", default=10, type=int)
+parser.add_argument("--iid", default=1, type=int)
+parser.add_argument("--local_popsize", default=20, type=int)
+parser.add_argument("--local_iter", default=8, type=int)
+parser.add_argument("--alpha_dir", default=0.5, type=float)
+parser.add_argument("--perturb_rate", default=0.5, type=float)
+parser.add_argument("--perturb", default=0, type=int)
+parser.add_argument("--note", default=None, type=str)
+parser.add_argument("--llama_causal", default=0, type=int)
+parser.add_argument("--norm_prompt", default=0, type=int)
+parser.add_argument("--init_score_path", default=None, type=str)
+parser.add_argument("--prompt_norm_threshold", default=15, type=float)
+parser.add_argument("--prompt_norm_threshold_upper", default=20, type=float)
+parser.add_argument("--save_prompt", default=0, type=int)
+parser.add_argument(
+    "--inference_framework",
+    default="pt",
+    type=str,
+    help="""Which inference framework to use. 
+         Currently supports `pt` and `ort`, standing for pytorch and Microsoft onnxruntime respectively""",
+)
+parser.add_argument("--onnx_model_path", default=None, type=str, help="Path to your onnx model.")
+args = parser.parse_args()
+
+model_name = args.model_name
+task_name = args.task_name
+n_prompt_tokens = args.n_prompt_tokens
+intrinsic_dim = args.intrinsic_dim
+k_shot = args.k_shot
+batch_size = args.batch_size
+bound = args.bound
+sigma = args.sigma
+alpha = args.alpha
+
+if args.local_popsize > 0:
+    args.local_popsize = args.local_popsize
+else:
+    args.local_popsize = 4 + 3 * np.log(intrinsic_dim)
+
+device = args.device
+alg = args.alg
+random_proj = args.random_proj
+seed = args.seed
+loss_type = args.loss_type
+print_every = args.print_every
+eval_every = args.eval_every
+cat_or_add = args.cat_or_add
+parallel = args.parallel
+inference_framework = args.inference_framework
+onnx_model_path = args.onnx_model_path
+
+if inference_framework not in ["pt", "ort"]:
+    raise ValueError(f'inference_framework only supports "pt", "ort", got `{inference_framework}` instead.')
+if inference_framework == "ort":
+    assert onnx_model_path is not None, "Path to onnx model is required, got None instead."
+    assert os.path.exists(onnx_model_path), f"In valid onnx model path `{onnx_model_path}`"
+
+# fixed hyper-params
+if cat_or_add == "add":
+    init_prompt_path = None
+else:
+    init_prompt_path = "./nli_base_prompt.pt"
+
+
+random.seed(seed)
+np.random.seed(seed)
+torch.manual_seed(seed)
+
+from transformers import RobertaTokenizer
+
+tokenizer = RobertaTokenizer.from_pretrained("roberta-large")
+
+# Initialize API
+
+model_forward_api = LMForwardAPI(args=args, init_prompt_path=init_prompt_path)
+
+global_api_setting = model_forward_api.client_record()
+
+# Initialize data processor
+
+data_processor = data_processor(args)
+
+
+data_bundle = data_processor.get_data()
+if task_name in ["agnews", "yelpp", "dbpedia", "snli"]:
+    train_data, test_data = data_bundle.get_dataset("train"), data_bundle.get_dataset("test")
+else:
+    train_data, test_data = data_bundle.get_dataset("train"), data_bundle.get_dataset("validation")
+
+train_data, dev_data = construct_true_few_shot_data(args, train_data, k_shot)
+
+for ds in [train_data, dev_data, test_data]:
+    ds.set_pad_val(
+        "input_ids", data_processor.tokenizer.pad_token_id if data_processor.tokenizer.pad_token_id is not None else 0
+    )
+    ds.set_pad_val("attention_mask", 0)
+print("# of train data: {}".format(len(train_data)))
+print("Example:")
+print(train_data[0])
+print("\n# of dev data: {}".format(len(dev_data)))
+print("Example:")
+print(dev_data[0])
+print("\n# of test data: {}".format(len(test_data)))
+print("Example:")
+print(test_data[0])
+
+# Split dataset
+user_dict_train, user_dict_dev = split_data(args, train_data, dev_data)
+
+# use site name index to access data shards and track outputs
+site_name = flare.get_site_name()
+idx = int(site_name.split("-")[1]) - 1
+print(f"idx from site name {site_name}: {idx}")
+
+client_fitnesses_orig_dict = {idx: []}
+client_fitnesses_pert_dict = {idx: []}
+client_prompt_dict = {idx: []}
+
+local_cma_mu = RecombinationWeights(args.local_popsize).mu
+
+client_api_setting_list = {idx: model_forward_api.client_record()}
+
+best_test_acc = 0
+train_step = 0
+
+# Run this loop every round
+while flare.is_running():
+    input_model = flare.receive()
+    global_es = input_model.params["global_es"]
+    current_round = input_model.current_round
+    print(f"Running current_round={current_round}")
+    print(
+        f"Received global_es.sigma={global_es.sigma} and global_es.mean: len={len(global_es.mean)}, mean={np.mean(global_es.mean)}, std={np.std(global_es.mean)}"
+    )
+
+    local_es = global_es._copy_light(
+        inopts={"seed": seed, "maxiter": args.local_iter, "popsize": args.local_popsize, "CMA_mu": None}
+    )
+    local_sigma_current = global_es.sigma
+
+    print("Global es evaluate on test data...")
+    global_api_setting["best_prompt"] = local_es.mean
+    model_forward_api.load_client_record(global_api_setting)
+    global_test_acc = model_forward_api.eval(prompt_embedding=local_es.mean, test_data=test_data)
+    print("Global test acc: {}".format(round(global_test_acc, 4)))
+    print("Global prompt norm: {}".format(np.linalg.norm(local_es.mean)))
+    writer.add_scalar("global_test_acc", global_test_acc, current_round)
+
+    if args.norm_prompt and np.linalg.norm(local_es.mean) < args.prompt_norm_threshold_upper:
+        args.prompt_norm_threshold += 1
+        model_forward_api.args = args
+        print("Set prompt_norm_threshold as {}".format(args.prompt_norm_threshold))
+    if args.save_prompt:
+        if global_test_acc > best_test_acc:
+            best_test_acc = global_test_acc
+            torch.save(
+                model_forward_api.model.prompt_embedding.cpu().detach(),
+                "results/llama/sst2/larger_global_pop_new_sigma_pert/fl_prompt.pt",
+            )
+
+    client_sigmas = {}
+
+    model_forward_api.load_client_record(client_api_setting_list[idx])
+    # initialize local data
+
+    train_sample_idxs, dev_sample_idxs = user_dict_train[idx], user_dict_dev[idx]
+    print(f"Client {idx} execute local training on {len(train_sample_idxs)} samples...")
+    print(f"Client {idx} train_sample_idxs {train_sample_idxs}")
+
+    local_train_data = {
+        "input_ids": torch.tensor(train_data["input_ids"].get(train_sample_idxs)),
+        "attention_mask": torch.tensor(train_data["attention_mask"].get(train_sample_idxs)),
+        "mask_pos": torch.tensor(train_data["mask_pos"].get(train_sample_idxs)),
+        "labels": torch.tensor(train_data["labels"].get(train_sample_idxs)),
+    }
+    local_dev_data = {
+        "input_ids": torch.tensor(dev_data["input_ids"].get(dev_sample_idxs)),
+        "attention_mask": torch.tensor(dev_data["attention_mask"].get(dev_sample_idxs)),
+        "mask_pos": torch.tensor(dev_data["mask_pos"].get(dev_sample_idxs)),
+        "labels": torch.tensor(dev_data["labels"].get(dev_sample_idxs)),
+    }
+
+    print("Population Size: {}".format(local_es.popsize))
+    print("{} Evaluation.".format("Parallel" if parallel else "Serial"))
+    if parallel:
+        # expand training data to a larger batch for parallel evaluation
+        train_data["input_ids"] = train_data["input_ids"].repeat(local_es.popsize, 1)
+        train_data["attention_mask"] = train_data["attention_mask"].repeat(local_es.popsize, 1)
+        train_data["mask_pos"] = train_data["mask_pos"].repeat(local_es.popsize)
+        train_data["labels"] = train_data["labels"].repeat(local_es.popsize)
+
+    local_train_data_aux = perturb_dataset(args, local_train_data, model_forward_api.config)
+
+    model_forward_api.set_dataset(local_train_data, local_dev_data, local_train_data_aux)
+
+    # opt = cma.CMAOptions()
+    local_sigmas = []
+    start_time = time.time()
+    while not local_es.stop():
+        local_sigmas.append(local_es.sigma)
+        solutions = local_es.ask()
+        if args.norm_prompt:
+            for i in range(len(solutions)):
+                if np.linalg.norm(solutions[i]) > args.prompt_norm_threshold:
+                    solutions[i] = solutions[i] / np.linalg.norm(solutions[i]) * args.prompt_norm_threshold
+        if parallel:
+            fitnesses_orig = model_forward_api.eval(solutions)
+            fitnesses_pert = model_forward_api.eval_perturb(solutions)
+            if args.perturb != 0:
+                fitnesses = fitnesses_orig / fitnesses_pert
+            else:
+                fitnesses = fitnesses_orig
+        else:
+            if args.perturb != 0:
+                fitnesses = [model_forward_api.eval(x) / model_forward_api.eval_perturb(x) for x in solutions]
+            else:
+                fitnesses = [model_forward_api.eval(x) for x in solutions]
+        local_es.tell(solutions, fitnesses)
+        if len(local_sigmas) % 10 == 0:
+            test_acc = model_forward_api.eval(prompt_embedding=local_es.mean, test_data=test_data)
+            print(f"Local test acc at local iter {len(local_sigmas)}: {round(test_acc, 4)}")
+            writer.add_scalar("local_test_acc", test_acc, train_step)
+        train_step += 1
+
+    end_time = time.time()
+    print("Done. Elapsed time: {} (mins)".format((end_time - start_time) / 60))
+
+    client_prompt_dict[idx].append(copy.deepcopy(local_es.mean))
+
+    # Generate solutions uploaded to the server
+    solutions = [local_es.mean]
+    if args.norm_prompt:
+        for i in range(len(solutions)):
+            if np.linalg.norm(solutions[i]) > args.prompt_norm_threshold:
+                solutions[i] = solutions[i] / np.linalg.norm(solutions[i]) * args.prompt_norm_threshold
+    if parallel:
+        fitnesses_orig = model_forward_api.eval(solutions)
+        fitnesses_pert = model_forward_api.eval_perturb(solutions)
+        if args.perturb != 0:
+            fitnesses = fitnesses_orig / fitnesses_pert
+        else:
+            fitnesses = fitnesses_orig
+    else:
+        fitnesses_orig = np.array([model_forward_api.eval(x) for x in solutions])
+        fitnesses_pert = np.array([model_forward_api.eval_perturb(x) for x in solutions])
+        if args.perturb != 0:
+            fitnesses = fitnesses_orig / fitnesses_pert
+        else:
+            fitnesses = fitnesses_orig
+
+    test_acc = model_forward_api.eval(prompt_embedding=local_es.mean, test_data=test_data)
+    print(f"Local test acc after current_round {current_round}: {round(test_acc, 4)}")
+
+    print(f"client sigma: {local_sigmas}")
+
+    client_fitnesses_orig_dict[idx].append(copy.deepcopy(fitnesses_orig))
+    client_fitnesses_pert_dict[idx].append(copy.deepcopy(fitnesses_pert))
+
+    client_api_setting_list[idx] = model_forward_api.client_record()
+
+    global_api_setting = model_forward_api.client_record()
+
+    # construct trained FL model update
+    output_model = flare.FLModel(
+        params={
+            "solutions": solutions,
+            "fitnesses": fitnesses,
+            "local_sigmas": local_sigmas,
+            "local_cma_mu": local_cma_mu,
+        },
+        metrics={"global_test_accuracy": global_test_acc},
+    )
+    # send model back to NVFlare
+    flare.send(output_model)
+    print("Send params back", list(output_model.params.keys()))
diff --git a/research/fed-bpt/src/global_es.py b/research/fed-bpt/src/global_es.py
new file mode 100644
index 0000000000..572922e582
--- /dev/null
+++ b/research/fed-bpt/src/global_es.py
@@ -0,0 +1,147 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import copy
+
+import cma
+import numpy as np
+
+from nvflare.app_common.abstract.fl_model import FLModel
+from nvflare.app_common.workflows.fedavg import FedAvg
+
+
+class GlobalES(FedAvg):
+    """Controller for [FedBPT](https://arxiv.org/abs/2310.01467) Workflow.
+    Inherits arguments from the FedAvg base class.
+    *Note*: This class is based on the experimental `ModelController`.
+
+    Provides the implementations for the `run` routine, controlling the main workflow:
+        - def run(self)
+
+    The parent classes provide the default implementations for other routines.
+
+    Args:
+        min_clients (int, optional): The minimum number of clients responses before
+            Workflow starts to wait for `wait_time_after_min_received`. Note that the workflow will move forward
+            when all available clients have responded regardless of this value. Defaults to 1000.
+        num_rounds (int, optional): The total number of training rounds. Defaults to 5.
+        persistor_id (str, optional): ID of the persistor component. Defaults to "persistor".
+        ignore_result_error (bool, optional): whether this controller can proceed if client result has errors.
+            Defaults to False.
+        allow_empty_global_weights (bool, optional): whether to allow empty global weights. Some pipelines can have
+            empty global weights at first round, such that clients start training from scratch without any global info.
+            Defaults to False.
+        task_check_period (float, optional): interval for checking status of tasks. Defaults to 0.5.
+        persist_every_n_rounds (int, optional): persist the global model every n rounds. Defaults to 1.
+            If n is 0 then no persist.
+        frac: Fraction of the number of clients used to determine the parents selection parameter. Sets popsize. Defaults to 1.
+        sigma: initial standard deviation. Defaults to 1.
+        intrinsic_dim: intrinsic dimimension of the initial solution. Defaults to 500.
+        seed: Seed for CMAEvolutionStrategy. Defaults to 42.
+        bound: bounds set for CMAEvolutionStrategy solutions if non-zero. Defaults to 0, i.e. no bounds used.
+    """
+
+    def __init__(self, *args, frac=1, sigma=1, intrinsic_dim=500, seed=42, bound=0, **kwargs):
+        super().__init__(*args, **kwargs)
+
+        self.frac = frac
+        self.seed = seed
+        self.sigma = sigma
+        self.intrinsic_dim = intrinsic_dim
+        self.bound = bound
+
+    def run(self) -> None:
+        local_cma_mu = 0.0
+
+        m = max(int(self.frac * self._min_clients), 1)
+
+        self.info("Start FedBPT.")
+        cma_opts = {
+            "seed": self.seed,
+            "popsize": m,
+            "maxiter": self._num_rounds,  # args.epochs,
+            "verbose": -1,
+            "CMA_mu": m,
+        }
+        self.info(f"Start GlobalES with {cma_opts}")
+        if self.bound > 0:
+            cma_opts["bounds"] = [-1 * self.bound, 1 * self.bound]
+        global_es = cma.CMAEvolutionStrategy(self.intrinsic_dim * [0], self.sigma, inopts=cma_opts)
+
+        local_sigma_current = global_es.sigma
+
+        client_prompt_dict = {}
+        for c in range(self._min_clients):
+            client_prompt_dict[c] = [copy.deepcopy(global_es.mean)]
+        server_prompts = [copy.deepcopy(global_es.mean)]
+
+        # best_test_acc = 0
+        for self._current_round in range(self._num_rounds):
+            global_solutions = []
+            global_fitnesses = []
+            client_sigma_list = []
+
+            self.info(f"Round {self._current_round} started.")
+
+            clients = self.sample_clients(self._min_clients)
+
+            global_model = FLModel(params={"global_es": global_es})
+            results = self.send_model_and_wait(targets=clients, data=global_model)
+
+            # get solutions from clients
+            for result in results:
+                global_solutions.append(result.params["solutions"])
+                global_fitnesses.append(result.params["fitnesses"])
+                client_sigma_list.append(np.sum(np.array(result.params["local_sigmas"]) ** 2))
+                local_cma_mu = result.params["local_cma_mu"]
+
+            # Global update
+            global_solutions = np.concatenate(global_solutions, axis=0)
+            global_fitnesses = np.concatenate(global_fitnesses)
+            self.info(f"Received {len(global_solutions)} solutions and {len(global_fitnesses)} fitnesses from clients")
+            if len(global_solutions) != len(global_fitnesses):
+                raise ValueError(
+                    f"Mismatch between {len(global_solutions)} solutions and {len(global_fitnesses)} fitnesses!"
+                )
+
+            # calculate global sigma
+            global_sigma = np.sqrt(np.sum(np.array(client_sigma_list)) / m / local_cma_mu)
+
+            global_es.sigma = global_sigma
+            self.info(f"Check sigma before: {global_es.sigma}")
+            global_sigma_old = global_es.sigma
+
+            global_es.ask()
+            global_es.tell(global_solutions, global_fitnesses)
+
+            server_prompts.append(copy.deepcopy(global_es.mean))
+
+            self.info(f"Check sigma after: {global_es.sigma}")
+            global_sigma_new = global_es.sigma
+
+            # set local sigma
+            global_es.sigma = global_sigma_new / global_sigma_old * local_sigma_current
+
+            local_sigma_current = global_es.sigma
+
+            if global_es.sigma < 0.5:
+                global_es.sigma = 0.5
+                self.info("Set sigma local: 0.5")
+            if global_es.sigma > local_sigma_current:
+                global_es.sigma = local_sigma_current
+                self.info("Set sigma local: not change")
+
+            self.info(f"Check sigma local: {global_es.sigma}")
+
+        self.info("Finished FedBPT.")
diff --git a/research/fed-bpt/src/metrics/metrics.py b/research/fed-bpt/src/metrics/metrics.py
new file mode 100644
index 0000000000..191b6cc25d
--- /dev/null
+++ b/research/fed-bpt/src/metrics/metrics.py
@@ -0,0 +1,486 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Part of this code is adopted from BBT (https://github.com/txsun1997/Black-Box-Tuning)
+
+# MIT License
+#
+# Copyright (c) 2022 Tianxiang Sun
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+import torch
+import torch.nn as nn
+from fastNLP.core.metrics import MetricBase
+from fastNLP.core.utils import _get_func_signature
+from sklearn.metrics import accuracy_score, f1_score
+from transformers import RobertaTokenizer
+from utils import hinge_loss
+
+
+class SST2Metric(MetricBase):
+    def __init__(self, pred=None, target=None, seq_len=None, tokenizer=None):
+        super().__init__()
+        self._init_param_map(pred=pred, target=target, seq_len=seq_len)
+        self._pred = []
+        self._target = []
+        self.hinge = 0.0
+        self.ce_loss = 0.0
+        self.ce_fct = nn.CrossEntropyLoss(reduction="sum")
+        self.margin = 2
+        if tokenizer is None:
+            tokenizer = RobertaTokenizer.from_pretrained("roberta-large")
+        self.label_map = {
+            tokenizer.encode("bad", add_special_tokens=False)[0]: 0,  # negative
+            tokenizer.encode("great", add_special_tokens=False)[0]: 1,  # positive
+        }
+
+    def evaluate(self, pred, target, seq_len=None):
+        if not isinstance(pred, torch.Tensor):
+            raise TypeError(
+                f"`pred` in {_get_func_signature(self.evaluate)} must be torch.Tensor," f"got {type(pred)}."
+            )
+        if not isinstance(target, torch.Tensor):
+            raise TypeError(
+                f"`target` in {_get_func_signature(self.evaluate)} must be torch.Tensor," f"got {type(target)}."
+            )
+        # pred: batch_size x seq_len x vocab_size
+        self.ce_loss += self.ce_fct(pred, target).item()
+
+        # calculate hinge loss
+        hinge_target = target.clone()
+        for key, val in self.label_map.items():
+            hinge_target[target == key] = val
+
+        for t in hinge_target.cpu().numpy().tolist():
+            self._target.append(t)
+
+        interest_index = list(self.label_map.keys())
+        pred = pred[:, interest_index]
+        self.hinge += hinge_loss(pred, hinge_target, self.margin, reduction="sum").item()
+
+        pred = pred.argmax(dim=-1).detach().cpu().numpy().tolist()
+        self._pred.extend(pred)
+
+    def get_metric(self, reset=True):
+        acc = accuracy_score(self._target, self._pred)
+        hinge_loss = self.hinge / len(self._target)
+        ce_loss = self.ce_loss / len(self._target)
+        if reset:
+            self._target = []
+            self._pred = []
+            self.hinge = 0.0
+            self.ce_loss = 0.0
+        return {"acc": acc, "hinge": hinge_loss, "ce": ce_loss}
+
+
+class YelpPMetric(MetricBase):
+    def __init__(self, pred=None, target=None, seq_len=None, tokenizer=None):
+        super().__init__()
+        self._init_param_map(pred=pred, target=target, seq_len=seq_len)
+        self._pred = []
+        self._target = []
+        self.hinge = 0.0
+        self.ce_loss = 0.0
+        self.ce_fct = nn.CrossEntropyLoss(reduction="sum")
+        self.margin = 2
+        if tokenizer is None:
+            tokenizer = RobertaTokenizer.from_pretrained("roberta-large")
+        self.label_map = {
+            tokenizer.encode("bad", add_special_tokens=False)[0]: 0,  # negative
+            tokenizer.encode("great", add_special_tokens=False)[0]: 1,  # positive
+        }
+
+    def evaluate(self, pred, target, seq_len=None):
+        if not isinstance(pred, torch.Tensor):
+            raise TypeError(
+                f"`pred` in {_get_func_signature(self.evaluate)} must be torch.Tensor," f"got {type(pred)}."
+            )
+        if not isinstance(target, torch.Tensor):
+            raise TypeError(
+                f"`target` in {_get_func_signature(self.evaluate)} must be torch.Tensor," f"got {type(target)}."
+            )
+        # pred: batch_size x seq_len x vocab_size
+        self.ce_loss += self.ce_fct(pred, target).item()
+
+        # calculate hinge loss
+        hinge_target = target.clone()
+        for key, val in self.label_map.items():
+            hinge_target[target == key] = val
+
+        for t in hinge_target.cpu().numpy().tolist():
+            self._target.append(t)
+
+        interest_index = list(self.label_map.keys())
+        pred = pred[:, interest_index]
+        self.hinge += hinge_loss(pred, hinge_target, self.margin, reduction="sum").item()
+        pred = pred.argmax(dim=-1).detach().cpu().numpy().tolist()
+        self._pred.extend(pred)
+
+    def get_metric(self, reset=True):
+        acc = accuracy_score(self._target, self._pred)
+        hinge_loss = self.hinge / len(self._target)
+        ce_loss = self.ce_loss / len(self._target)
+        if reset:
+            self._target = []
+            self._pred = []
+            self.hinge = 0.0
+            self.ce_loss = 0.0
+        return {"acc": acc, "hinge": hinge_loss, "ce": ce_loss}
+
+
+class AGNewsMetric(MetricBase):
+    def __init__(self, pred=None, target=None, seq_len=None, tokenizer=None):
+        super().__init__()
+        self._init_param_map(pred=pred, target=target, seq_len=seq_len)
+        self._pred = []
+        self._target = []
+        self.hinge = 0.0
+        self.ce_loss = 0.0
+        self.ce_fct = nn.CrossEntropyLoss(reduction="sum")
+        self.margin = 2
+        if tokenizer is None:
+            tokenizer = RobertaTokenizer.from_pretrained("roberta-large")
+        self.label_map = {
+            tokenizer.encode("World", add_special_tokens=False)[0]: 0,
+            tokenizer.encode("Sports", add_special_tokens=False)[0]: 1,
+            tokenizer.encode("Business", add_special_tokens=False)[0]: 2,
+            tokenizer.encode("Tech", add_special_tokens=False)[0]: 3,
+        }
+
+    def evaluate(self, pred, target, seq_len=None):
+        if not isinstance(pred, torch.Tensor):
+            raise TypeError(
+                f"`pred` in {_get_func_signature(self.evaluate)} must be torch.Tensor," f"got {type(pred)}."
+            )
+        if not isinstance(target, torch.Tensor):
+            raise TypeError(
+                f"`target` in {_get_func_signature(self.evaluate)} must be torch.Tensor," f"got {type(target)}."
+            )
+        # pred: batch_size x seq_len x vocab_size
+        self.ce_loss += self.ce_fct(pred, target).item()
+
+        # calculate hinge loss
+        hinge_target = target.clone()
+        for key, val in self.label_map.items():
+            hinge_target[target == key] = val
+
+        for t in hinge_target.cpu().numpy().tolist():
+            self._target.append(t)
+
+        interest_index = list(self.label_map.keys())
+        pred = pred[:, interest_index]
+        self.hinge += hinge_loss(pred, hinge_target, self.margin, reduction="sum").item()
+        pred = pred.argmax(dim=-1).detach().cpu().numpy().tolist()
+        self._pred.extend(pred)
+
+    def get_metric(self, reset=True):
+        acc = accuracy_score(self._target, self._pred)
+        hinge_loss = self.hinge / len(self._target)
+        ce_loss = self.ce_loss / len(self._target)
+        if reset:
+            self._target = []
+            self._pred = []
+            self.hinge = 0.0
+            self.ce_loss = 0.0
+        return {"acc": acc, "hinge": hinge_loss, "ce": ce_loss}
+
+
+class DBPediaMetric(MetricBase):
+    def __init__(self, pred=None, target=None, seq_len=None, tokenizer=None):
+        super().__init__()
+        self._init_param_map(pred=pred, target=target, seq_len=seq_len)
+        self._pred = []
+        self._target = []
+        self.hinge = 0.0
+        self.ce_loss = 0.0
+        self.ce_fct = nn.CrossEntropyLoss(reduction="sum")
+        self.margin = 2
+        if tokenizer is None:
+            tokenizer = RobertaTokenizer.from_pretrained("roberta-large")
+        self.label_map = {
+            tokenizer.encode("Company", add_special_tokens=False)[0]: 0,
+            tokenizer.encode("Education", add_special_tokens=False)[0]: 1,
+            tokenizer.encode("Artist", add_special_tokens=False)[0]: 2,
+            tokenizer.encode("Athlete", add_special_tokens=False)[0]: 3,
+            tokenizer.encode("Office", add_special_tokens=False)[0]: 4,
+            tokenizer.encode("Transportation", add_special_tokens=False)[0]: 5,
+            tokenizer.encode("Building", add_special_tokens=False)[0]: 6,
+            tokenizer.encode("Natural", add_special_tokens=False)[0]: 7,
+            tokenizer.encode("Village", add_special_tokens=False)[0]: 8,
+            tokenizer.encode("Animal", add_special_tokens=False)[0]: 9,
+            tokenizer.encode("Plant", add_special_tokens=False)[0]: 10,
+            tokenizer.encode("Album", add_special_tokens=False)[0]: 11,
+            tokenizer.encode("Film", add_special_tokens=False)[0]: 12,
+            tokenizer.encode("Written", add_special_tokens=False)[0]: 13,
+        }
+
+    def evaluate(self, pred, target, seq_len=None):
+        if not isinstance(pred, torch.Tensor):
+            raise TypeError(
+                f"`pred` in {_get_func_signature(self.evaluate)} must be torch.Tensor," f"got {type(pred)}."
+            )
+        if not isinstance(target, torch.Tensor):
+            raise TypeError(
+                f"`target` in {_get_func_signature(self.evaluate)} must be torch.Tensor," f"got {type(target)}."
+            )
+        # pred: batch_size x seq_len x vocab_size
+        self.ce_loss += self.ce_fct(pred, target).item()
+
+        # calculate hinge loss
+        hinge_target = target.clone()
+        for key, val in self.label_map.items():
+            hinge_target[target == key] = val
+
+        for t in hinge_target.cpu().numpy().tolist():
+            self._target.append(t)
+
+        interest_index = list(self.label_map.keys())
+        pred = pred[:, interest_index]
+        self.hinge += hinge_loss(pred, hinge_target, self.margin, reduction="sum").item()
+        pred = pred.argmax(dim=-1).detach().cpu().numpy().tolist()
+        self._pred.extend(pred)
+
+    def get_metric(self, reset=True):
+        acc = accuracy_score(self._target, self._pred)
+        hinge_loss = self.hinge / len(self._target)
+        ce_loss = self.ce_loss / len(self._target)
+        if reset:
+            self._target = []
+            self._pred = []
+            self.hinge = 0.0
+            self.ce_loss = 0.0
+        return {"acc": acc, "hinge": hinge_loss, "ce": ce_loss}
+
+
+class MRPCMetric(MetricBase):
+    def __init__(self, pred=None, target=None, seq_len=None, tokenizer=None):
+        super().__init__()
+        self._init_param_map(pred=pred, target=target, seq_len=seq_len)
+        self._pred = []
+        self._target = []
+        self.hinge = 0.0
+        self.ce_loss = 0.0
+        self.ce_fct = nn.CrossEntropyLoss(reduction="sum")
+        self.margin = 2
+        if tokenizer is None:
+            tokenizer = RobertaTokenizer.from_pretrained("roberta-large")
+        self.label_map = {
+            tokenizer.encode("No", add_special_tokens=False)[0]: 0,  # not dumplicate
+            tokenizer.encode("Yes", add_special_tokens=False)[0]: 1,  # dumplicate
+        }
+
+    def evaluate(self, pred, target, seq_len=None):
+        if not isinstance(pred, torch.Tensor):
+            raise TypeError(
+                f"`pred` in {_get_func_signature(self.evaluate)} must be torch.Tensor," f"got {type(pred)}."
+            )
+        if not isinstance(target, torch.Tensor):
+            raise TypeError(
+                f"`target` in {_get_func_signature(self.evaluate)} must be torch.Tensor," f"got {type(target)}."
+            )
+        # pred: batch_size x seq_len x vocab_size
+        self.ce_loss += self.ce_fct(pred, target).item()
+
+        # calculate hinge loss
+        hinge_target = target.clone()
+        for key, val in self.label_map.items():
+            hinge_target[target == key] = val
+
+        for t in hinge_target.cpu().numpy().tolist():
+            self._target.append(t)
+
+        interest_index = list(self.label_map.keys())
+        pred = pred[:, interest_index]
+        self.hinge += hinge_loss(pred, hinge_target, self.margin, reduction="sum").item()
+        pred = pred.argmax(dim=-1).detach().cpu().numpy().tolist()
+        self._pred.extend(pred)
+
+    def get_metric(self, reset=True):
+        f1 = f1_score(self._target, self._pred)
+        hinge_loss = self.hinge / len(self._target)
+        ce_loss = self.ce_loss / len(self._target)
+        if reset:
+            self._target = []
+            self._pred = []
+            self.hinge = 0.0
+            self.ce_loss = 0.0
+        return {"f1": f1, "hinge": hinge_loss, "ce": ce_loss}
+
+
+class MNLIMetric(MetricBase):
+    def __init__(self, pred=None, target=None, seq_len=None, tokenizer=None):
+        super().__init__()
+        self._init_param_map(pred=pred, target=target, seq_len=seq_len)
+        self._pred = []
+        self._target = []
+        if tokenizer is None:
+            tokenizer = RobertaTokenizer.from_pretrained("roberta-large")
+        self.label_map = {
+            tokenizer.encode("Yes", add_special_tokens=False)[0]: 0,
+            tokenizer.encode("Maybe", add_special_tokens=False)[0]: 1,
+            tokenizer.encode("No", add_special_tokens=False)[0]: 2,
+        }
+
+    def evaluate(self, pred, target, seq_len=None):
+        if not isinstance(pred, torch.Tensor):
+            raise TypeError(
+                f"`pred` in {_get_func_signature(self.evaluate)} must be torch.Tensor," f"got {type(pred)}."
+            )
+        if not isinstance(target, torch.Tensor):
+            raise TypeError(
+                f"`target` in {_get_func_signature(self.evaluate)} must be torch.Tensor," f"got {type(target)}."
+            )
+
+        target = target.cpu().numpy().tolist()
+        for t in target:
+            self._target.append(self.label_map[t])
+
+        interest_index = list(self.label_map.keys())
+        pred = pred[:, interest_index].argmax(dim=-1).detach().cpu().numpy().tolist()
+        self._pred.extend(pred)
+
+    def get_metric(self, reset=True):
+        acc = accuracy_score(self._target, self._pred)
+        if reset:
+            self._target = []
+            self._pred = []
+        return {"acc": acc}
+
+
+class RTEMetric(MetricBase):
+    def __init__(self, pred=None, target=None, seq_len=None, tokenizer=None):
+        super().__init__()
+        self._init_param_map(pred=pred, target=target, seq_len=seq_len)
+        self._pred = []
+        self._target = []
+        self.hinge = 0.0
+        self.ce_loss = 0.0
+        self.ce_fct = nn.CrossEntropyLoss(reduction="sum")
+        self.margin = 2
+        if tokenizer is None:
+            tokenizer = RobertaTokenizer.from_pretrained("roberta-large")
+        self.label_map = {
+            tokenizer.encode("Yes", add_special_tokens=False)[0]: 0,
+            tokenizer.encode("No", add_special_tokens=False)[0]: 1,
+        }
+
+    def evaluate(self, pred, target, seq_len=None):
+        if not isinstance(pred, torch.Tensor):
+            raise TypeError(
+                f"`pred` in {_get_func_signature(self.evaluate)} must be torch.Tensor," f"got {type(pred)}."
+            )
+        if not isinstance(target, torch.Tensor):
+            raise TypeError(
+                f"`target` in {_get_func_signature(self.evaluate)} must be torch.Tensor," f"got {type(target)}."
+            )
+        # pred: batch_size x seq_len x vocab_size
+        self.ce_loss += self.ce_fct(pred, target).item()
+
+        # calculate hinge loss
+        hinge_target = target.clone()
+        for key, val in self.label_map.items():
+            hinge_target[target == key] = val
+
+        for t in hinge_target.cpu().numpy().tolist():
+            self._target.append(t)
+
+        interest_index = list(self.label_map.keys())
+        pred = pred[:, interest_index]
+        self.hinge += hinge_loss(pred, hinge_target, self.margin, reduction="sum").item()
+        pred = pred.argmax(dim=-1).detach().cpu().numpy().tolist()
+        self._pred.extend(pred)
+
+    def get_metric(self, reset=True):
+        acc = accuracy_score(self._target, self._pred)
+        hinge_loss = self.hinge / len(self._target)
+        ce_loss = self.ce_loss / len(self._target)
+        if reset:
+            self._target = []
+            self._pred = []
+            self.hinge = 0.0
+            self.ce_loss = 0.0
+        return {"acc": acc, "hinge": hinge_loss, "ce": ce_loss}
+
+
+class SNLIMetric(MetricBase):
+    def __init__(self, pred=None, target=None, seq_len=None, tokenizer=None):
+        super().__init__()
+        self._init_param_map(pred=pred, target=target, seq_len=seq_len)
+        self._pred = []
+        self._target = []
+        self.hinge = 0.0
+        self.ce_loss = 0.0
+        self.ce_fct = nn.CrossEntropyLoss(reduction="sum")
+        self.margin = 2
+        if tokenizer is None:
+            tokenizer = RobertaTokenizer.from_pretrained("roberta-large")
+        self.label_map = {
+            tokenizer.encode("Yes", add_special_tokens=False)[0]: 0,
+            tokenizer.encode("Maybe", add_special_tokens=False)[0]: 1,
+            tokenizer.encode("No", add_special_tokens=False)[0]: 2,
+        }
+
+    def evaluate(self, pred, target, seq_len=None):
+        if not isinstance(pred, torch.Tensor):
+            raise TypeError(
+                f"`pred` in {_get_func_signature(self.evaluate)} must be torch.Tensor," f"got {type(pred)}."
+            )
+        if not isinstance(target, torch.Tensor):
+            raise TypeError(
+                f"`target` in {_get_func_signature(self.evaluate)} must be torch.Tensor," f"got {type(target)}."
+            )
+        # pred: batch_size x seq_len x vocab_size
+        self.ce_loss += self.ce_fct(pred, target).item()
+
+        # calculate hinge loss
+        hinge_target = target.clone()
+        for key, val in self.label_map.items():
+            hinge_target[target == key] = val
+
+        for t in hinge_target.cpu().numpy().tolist():
+            self._target.append(t)
+
+        interest_index = list(self.label_map.keys())
+        pred = pred[:, interest_index]
+        self.hinge += hinge_loss(pred, hinge_target, self.margin, reduction="sum").item()
+        pred = pred.argmax(dim=-1).detach().cpu().numpy().tolist()
+        self._pred.extend(pred)
+
+    def get_metric(self, reset=True):
+        acc = accuracy_score(self._target, self._pred)
+        hinge_loss = self.hinge / len(self._target)
+        ce_loss = self.ce_loss / len(self._target)
+        if reset:
+            self._target = []
+            self._pred = []
+            self.hinge = 0.0
+            self.ce_loss = 0.0
+        return {"acc": acc, "hinge": hinge_loss, "ce": ce_loss}
diff --git a/research/fed-bpt/src/models/modeling_roberta.py b/research/fed-bpt/src/models/modeling_roberta.py
new file mode 100644
index 0000000000..548b0af4f9
--- /dev/null
+++ b/research/fed-bpt/src/models/modeling_roberta.py
@@ -0,0 +1,1703 @@
+# coding=utf-8
+# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
+# Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""PyTorch RoBERTa model. """
+import math
+
+import torch
+import torch.nn as nn
+from torch.nn import CrossEntropyLoss, MSELoss
+from transformers.activations import ACT2FN, gelu
+from transformers.file_utils import (
+    add_code_sample_docstrings,
+    add_start_docstrings,
+    add_start_docstrings_to_model_forward,
+    replace_return_docstrings,
+)
+from transformers.modeling_outputs import (
+    BaseModelOutputWithCrossAttentions,
+    BaseModelOutputWithPoolingAndCrossAttentions,
+    CausalLMOutputWithCrossAttentions,
+    MaskedLMOutput,
+    MultipleChoiceModelOutput,
+    QuestionAnsweringModelOutput,
+    SequenceClassifierOutput,
+    TokenClassifierOutput,
+)
+from transformers.modeling_utils import (
+    PreTrainedModel,
+    apply_chunking_to_forward,
+    find_pruneable_heads_and_indices,
+    prune_linear_layer,
+)
+from transformers.models.roberta.configuration_roberta import RobertaConfig
+from transformers.utils import logging
+
+# ort
+try:
+    import numpy as np
+    import onnxruntime as ort
+except ImportError:
+    pass
+
+logger = logging.get_logger(__name__)
+
+_CONFIG_FOR_DOC = "RobertaConfig"
+_TOKENIZER_FOR_DOC = "RobertaTokenizer"
+
+ROBERTA_PRETRAINED_MODEL_ARCHIVE_LIST = [
+    "roberta-base",
+    "roberta-large",
+    "roberta-large-mnli",
+    "distilroberta-base",
+    "roberta-base-openai-detector",
+    "roberta-large-openai-detector",
+    # See all RoBERTa models at https://huggingface.co/models?filter=roberta
+]
+
+
+class RobertaEmbeddings(nn.Module):
+    """
+    Same as BertEmbeddings with a tiny tweak for positional embeddings indexing.
+    """
+
+    # Copied from transformers.models.bert.modeling_bert.BertEmbeddings.__init__
+    def __init__(self, config):
+        super().__init__()
+        self.word_embeddings = nn.Embedding(config.vocab_size, config.hidden_size, padding_idx=config.pad_token_id)
+        self.position_embeddings = nn.Embedding(config.max_position_embeddings, config.hidden_size)
+        self.token_type_embeddings = nn.Embedding(config.type_vocab_size, config.hidden_size)
+
+        # self.LayerNorm is not snake-cased to stick with TensorFlow model variable name and be able to load
+        # any TensorFlow checkpoint file
+        self.LayerNorm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps)
+        self.dropout = nn.Dropout(config.hidden_dropout_prob)
+
+        # position_ids (1, len position emb) is contiguous in memory and exported when serialized
+        self.register_buffer("position_ids", torch.arange(config.max_position_embeddings).expand((1, -1)))
+        self.position_embedding_type = getattr(config, "position_embedding_type", "absolute")
+
+        # End copy
+        self.padding_idx = config.pad_token_id
+        self.position_embeddings = nn.Embedding(
+            config.max_position_embeddings, config.hidden_size, padding_idx=self.padding_idx
+        )
+
+    def forward(self, input_ids=None, token_type_ids=None, position_ids=None, inputs_embeds=None):
+        if position_ids is None:
+            if input_ids is not None:
+                # Create the position ids from the input token ids. Any padded tokens remain padded.
+                position_ids = create_position_ids_from_input_ids(input_ids, self.padding_idx).to(input_ids.device)
+            else:
+                position_ids = self.create_position_ids_from_inputs_embeds(inputs_embeds)
+
+        # Copied from transformers.models.bert.modeling_bert.BertEmbeddings.forward
+        if input_ids is not None:
+            input_shape = input_ids.size()
+        else:
+            input_shape = inputs_embeds.size()[:-1]
+
+        seq_length = input_shape[1]
+
+        if position_ids is None:
+            position_ids = self.position_ids[:, :seq_length]
+
+        if token_type_ids is None:
+            token_type_ids = torch.zeros(input_shape, dtype=torch.long, device=self.position_ids.device)
+
+        if inputs_embeds is None:
+            inputs_embeds = self.word_embeddings(input_ids)
+        token_type_embeddings = self.token_type_embeddings(token_type_ids)
+
+        embeddings = inputs_embeds + token_type_embeddings
+        if self.position_embedding_type == "absolute":
+            position_embeddings = self.position_embeddings(position_ids)
+            embeddings += position_embeddings
+        embeddings = self.LayerNorm(embeddings)
+        embeddings = self.dropout(embeddings)
+        return embeddings
+
+    def create_position_ids_from_inputs_embeds(self, inputs_embeds):
+        """
+        We are provided embeddings directly. We cannot infer which are padded so just generate sequential position ids.
+
+        Args:
+            inputs_embeds: torch.Tensor
+
+        Returns: torch.Tensor
+        """
+        input_shape = inputs_embeds.size()[:-1]
+        sequence_length = input_shape[1]
+
+        position_ids = torch.arange(
+            self.padding_idx + 1, sequence_length + self.padding_idx + 1, dtype=torch.long, device=inputs_embeds.device
+        )
+        return position_ids.unsqueeze(0).expand(input_shape)
+
+
+# Copied from transformers.models.bert.modeling_bert.BertSelfAttention with Bert->Roberta
+class RobertaSelfAttention(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size"):
+            raise ValueError(
+                "The hidden size (%d) is not a multiple of the number of attention "
+                "heads (%d)" % (config.hidden_size, config.num_attention_heads)
+            )
+
+        self.num_attention_heads = config.num_attention_heads
+        self.attention_head_size = int(config.hidden_size / config.num_attention_heads)
+        self.all_head_size = self.num_attention_heads * self.attention_head_size
+
+        self.query = nn.Linear(config.hidden_size, self.all_head_size)
+        self.key = nn.Linear(config.hidden_size, self.all_head_size)
+        self.value = nn.Linear(config.hidden_size, self.all_head_size)
+
+        self.dropout = nn.Dropout(config.attention_probs_dropout_prob)
+        self.position_embedding_type = getattr(config, "position_embedding_type", "absolute")
+        if self.position_embedding_type == "relative_key" or self.position_embedding_type == "relative_key_query":
+            self.max_position_embeddings = config.max_position_embeddings
+            self.distance_embedding = nn.Embedding(2 * config.max_position_embeddings - 1, self.attention_head_size)
+
+    def transpose_for_scores(self, x):
+        new_x_shape = x.size()[:-1] + (self.num_attention_heads, self.attention_head_size)
+        x = x.view(*new_x_shape)
+        return x.permute(0, 2, 1, 3)
+
+    def forward(
+        self,
+        hidden_states,
+        attention_mask=None,
+        head_mask=None,
+        encoder_hidden_states=None,
+        encoder_attention_mask=None,
+        output_attentions=False,
+    ):
+        mixed_query_layer = self.query(hidden_states)
+
+        # If this is instantiated as a cross-attention module, the keys
+        # and values come from an encoder; the attention mask needs to be
+        # such that the encoder's padding tokens are not attended to.
+        if encoder_hidden_states is not None:
+            mixed_key_layer = self.key(encoder_hidden_states)
+            mixed_value_layer = self.value(encoder_hidden_states)
+            attention_mask = encoder_attention_mask
+        else:
+            mixed_key_layer = self.key(hidden_states)
+            mixed_value_layer = self.value(hidden_states)
+
+        query_layer = self.transpose_for_scores(mixed_query_layer)
+        key_layer = self.transpose_for_scores(mixed_key_layer)
+        value_layer = self.transpose_for_scores(mixed_value_layer)
+
+        # Take the dot product between "query" and "key" to get the raw attention scores.
+        attention_scores = torch.matmul(query_layer, key_layer.transpose(-1, -2))
+
+        if self.position_embedding_type == "relative_key" or self.position_embedding_type == "relative_key_query":
+            seq_length = hidden_states.size()[1]
+            position_ids_l = torch.arange(seq_length, dtype=torch.long, device=hidden_states.device).view(-1, 1)
+            position_ids_r = torch.arange(seq_length, dtype=torch.long, device=hidden_states.device).view(1, -1)
+            distance = position_ids_l - position_ids_r
+            positional_embedding = self.distance_embedding(distance + self.max_position_embeddings - 1)
+            positional_embedding = positional_embedding.to(dtype=query_layer.dtype)  # fp16 compatibility
+
+            if self.position_embedding_type == "relative_key":
+                relative_position_scores = torch.einsum("bhld,lrd->bhlr", query_layer, positional_embedding)
+                attention_scores = attention_scores + relative_position_scores
+            elif self.position_embedding_type == "relative_key_query":
+                relative_position_scores_query = torch.einsum("bhld,lrd->bhlr", query_layer, positional_embedding)
+                relative_position_scores_key = torch.einsum("bhrd,lrd->bhlr", key_layer, positional_embedding)
+                attention_scores = attention_scores + relative_position_scores_query + relative_position_scores_key
+
+        attention_scores = attention_scores / math.sqrt(self.attention_head_size)
+        if attention_mask is not None:
+            # Apply the attention mask is (precomputed for all layers in RobertaModel forward() function)
+            attention_scores = attention_scores + attention_mask
+
+        # Normalize the attention scores to probabilities.
+        attention_probs = nn.Softmax(dim=-1)(attention_scores)
+
+        # This is actually dropping out entire tokens to attend to, which might
+        # seem a bit unusual, but is taken from the original Transformer paper.
+        attention_probs = self.dropout(attention_probs)
+
+        # Mask heads if we want to
+        if head_mask is not None:
+            attention_probs = attention_probs * head_mask
+
+        context_layer = torch.matmul(attention_probs, value_layer)
+
+        context_layer = context_layer.permute(0, 2, 1, 3).contiguous()
+        new_context_layer_shape = context_layer.size()[:-2] + (self.all_head_size,)
+        context_layer = context_layer.view(*new_context_layer_shape)
+
+        outputs = (context_layer, attention_probs) if output_attentions else (context_layer,)
+        return outputs
+
+
+# Copied from transformers.models.bert.modeling_bert.BertSelfOutput
+class RobertaSelfOutput(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.dense = nn.Linear(config.hidden_size, config.hidden_size)
+        self.LayerNorm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps)
+        self.dropout = nn.Dropout(config.hidden_dropout_prob)
+
+    def forward(self, hidden_states, input_tensor):
+        hidden_states = self.dense(hidden_states)
+        hidden_states = self.dropout(hidden_states)
+        hidden_states = self.LayerNorm(hidden_states + input_tensor)
+        return hidden_states
+
+
+# Copied from transformers.models.bert.modeling_bert.BertAttention with Bert->Roberta
+class RobertaAttention(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.self = RobertaSelfAttention(config)
+        self.output = RobertaSelfOutput(config)
+        self.pruned_heads = set()
+
+    def prune_heads(self, heads):
+        if len(heads) == 0:
+            return
+        heads, index = find_pruneable_heads_and_indices(
+            heads, self.self.num_attention_heads, self.self.attention_head_size, self.pruned_heads
+        )
+
+        # Prune linear layers
+        self.self.query = prune_linear_layer(self.self.query, index)
+        self.self.key = prune_linear_layer(self.self.key, index)
+        self.self.value = prune_linear_layer(self.self.value, index)
+        self.output.dense = prune_linear_layer(self.output.dense, index, dim=1)
+
+        # Update hyper params and store pruned heads
+        self.self.num_attention_heads = self.self.num_attention_heads - len(heads)
+        self.self.all_head_size = self.self.attention_head_size * self.self.num_attention_heads
+        self.pruned_heads = self.pruned_heads.union(heads)
+
+    def forward(
+        self,
+        hidden_states,
+        attention_mask=None,
+        head_mask=None,
+        encoder_hidden_states=None,
+        encoder_attention_mask=None,
+        output_attentions=False,
+    ):
+        self_outputs = self.self(
+            hidden_states,
+            attention_mask,
+            head_mask,
+            encoder_hidden_states,
+            encoder_attention_mask,
+            output_attentions,
+        )
+        attention_output = self.output(self_outputs[0], hidden_states)
+        outputs = (attention_output,) + self_outputs[1:]  # add attentions if we output them
+        return outputs
+
+
+# Copied from transformers.models.bert.modeling_bert.BertIntermediate
+class RobertaIntermediate(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.dense = nn.Linear(config.hidden_size, config.intermediate_size)
+        if isinstance(config.hidden_act, str):
+            self.intermediate_act_fn = ACT2FN[config.hidden_act]
+        else:
+            self.intermediate_act_fn = config.hidden_act
+
+    def forward(self, hidden_states):
+        hidden_states = self.dense(hidden_states)
+        hidden_states = self.intermediate_act_fn(hidden_states)
+        return hidden_states
+
+
+# Copied from transformers.models.bert.modeling_bert.BertOutput
+class RobertaOutput(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.dense = nn.Linear(config.intermediate_size, config.hidden_size)
+        self.LayerNorm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps)
+        self.dropout = nn.Dropout(config.hidden_dropout_prob)
+
+    def forward(self, hidden_states, input_tensor):
+        hidden_states = self.dense(hidden_states)
+        hidden_states = self.dropout(hidden_states)
+        hidden_states = self.LayerNorm(hidden_states + input_tensor)
+        return hidden_states
+
+
+# Copied from transformers.models.bert.modeling_bert.BertLayer with Bert->Roberta
+class RobertaLayer(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.chunk_size_feed_forward = config.chunk_size_feed_forward
+        self.seq_len_dim = 1
+        self.attention = RobertaAttention(config)
+        self.is_decoder = config.is_decoder
+        self.add_cross_attention = config.add_cross_attention
+        if self.add_cross_attention:
+            assert self.is_decoder, f"{self} should be used as a decoder model if cross attention is added"
+            self.crossattention = RobertaAttention(config)
+        self.intermediate = RobertaIntermediate(config)
+        self.output = RobertaOutput(config)
+
+    def forward(
+        self,
+        hidden_states,
+        attention_mask=None,
+        head_mask=None,
+        encoder_hidden_states=None,
+        encoder_attention_mask=None,
+        output_attentions=False,
+    ):
+        self_attention_outputs = self.attention(
+            hidden_states,
+            attention_mask,
+            head_mask,
+            output_attentions=output_attentions,
+        )
+        attention_output = self_attention_outputs[0]
+        outputs = self_attention_outputs[1:]  # add self attentions if we output attention weights
+        if self.is_decoder and encoder_hidden_states is not None:
+            assert hasattr(
+                self, "crossattention"
+            ), f"If `encoder_hidden_states` are passed, {self} has to be instantiated with cross-attention layers by setting `config.add_cross_attention=True`"
+            cross_attention_outputs = self.crossattention(
+                attention_output,
+                attention_mask,
+                head_mask,
+                encoder_hidden_states,
+                encoder_attention_mask,
+                output_attentions,
+            )
+            attention_output = cross_attention_outputs[0]
+            outputs = outputs + cross_attention_outputs[1:]  # add cross attentions if we output attention weights
+        layer_output = apply_chunking_to_forward(
+            self.feed_forward_chunk, self.chunk_size_feed_forward, self.seq_len_dim, attention_output
+        )
+        outputs = (layer_output,) + outputs
+        return outputs
+
+    def feed_forward_chunk(self, attention_output):
+        intermediate_output = self.intermediate(attention_output)
+        layer_output = self.output(intermediate_output, attention_output)
+        return layer_output
+
+
+# Copied from transformers.models.bert.modeling_bert.BertEncoder with Bert->Roberta
+class RobertaEncoder(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.config = config
+        self.layer = nn.ModuleList([RobertaLayer(config) for _ in range(config.num_hidden_layers)])
+
+    def forward(
+        self,
+        hidden_states,
+        attention_mask=None,
+        head_mask=None,
+        encoder_hidden_states=None,
+        encoder_attention_mask=None,
+        output_attentions=False,
+        output_hidden_states=False,
+        return_dict=True,
+    ):
+        all_hidden_states = () if output_hidden_states else None
+        all_self_attentions = () if output_attentions else None
+        all_cross_attentions = () if output_attentions and self.config.add_cross_attention else None
+        for i, layer_module in enumerate(self.layer):
+            if output_hidden_states:
+                all_hidden_states = all_hidden_states + (hidden_states,)
+
+            layer_head_mask = head_mask[i] if head_mask is not None else None
+            if getattr(self.config, "gradient_checkpointing", False):
+
+                def create_custom_forward(module):
+                    def custom_forward(*inputs):
+                        return module(*inputs, output_attentions)
+
+                    return custom_forward
+
+                layer_outputs = torch.utils.checkpoint.checkpoint(
+                    create_custom_forward(layer_module),
+                    hidden_states,
+                    attention_mask,
+                    layer_head_mask,
+                    encoder_hidden_states,
+                    encoder_attention_mask,
+                )
+            else:
+                layer_outputs = layer_module(
+                    hidden_states,
+                    attention_mask,
+                    layer_head_mask,
+                    encoder_hidden_states,
+                    encoder_attention_mask,
+                    output_attentions,
+                )
+            hidden_states = layer_outputs[0]
+            if output_attentions:
+                all_self_attentions = all_self_attentions + (layer_outputs[1],)
+                if self.config.add_cross_attention:
+                    all_cross_attentions = all_cross_attentions + (layer_outputs[2],)
+        if output_hidden_states:
+            all_hidden_states = all_hidden_states + (hidden_states,)
+
+        if not return_dict:
+            return tuple(
+                v
+                for v in [hidden_states, all_hidden_states, all_self_attentions, all_cross_attentions]
+                if v is not None
+            )
+        return BaseModelOutputWithCrossAttentions(
+            last_hidden_state=hidden_states,
+            hidden_states=all_hidden_states,
+            attentions=all_self_attentions,
+            cross_attentions=all_cross_attentions,
+        )
+
+
+# Copied from transformers.models.bert.modeling_bert.BertPooler
+class RobertaPooler(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.dense = nn.Linear(config.hidden_size, config.hidden_size)
+        self.activation = nn.Tanh()
+
+    def forward(self, hidden_states):
+        # We "pool" the model by simply taking the hidden state corresponding
+        # to the first token.
+        first_token_tensor = hidden_states[:, 0]
+        pooled_output = self.dense(first_token_tensor)
+        pooled_output = self.activation(pooled_output)
+        return pooled_output
+
+
+class RobertaPreTrainedModel(PreTrainedModel):
+    """
+    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
+    models.
+    """
+
+    config_class = RobertaConfig
+    base_model_prefix = "roberta"
+
+    # Copied from transformers.models.bert.modeling_bert.BertPreTrainedModel._init_weights
+    def _init_weights(self, module):
+        """Initialize the weights"""
+        if isinstance(module, (nn.Linear, nn.Embedding)):
+            # Slightly different from the TF version which uses truncated_normal for initialization
+            # cf https://github.com/pytorch/pytorch/pull/5617
+            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
+        elif isinstance(module, nn.LayerNorm):
+            module.bias.data.zero_()
+            module.weight.data.fill_(1.0)
+        if isinstance(module, nn.Linear) and module.bias is not None:
+            module.bias.data.zero_()
+
+
+ROBERTA_START_DOCSTRING = r"""
+
+    This model inherits from :class:`~transformers.PreTrainedModel`. Check the superclass documentation for the generic
+    methods the library implements for all its model (such as downloading or saving, resizing the input embeddings,
+    pruning heads etc.)
+
+    This model is also a PyTorch `torch.nn.Module <https://pytorch.org/docs/stable/nn.html#torch.nn.Module>`__
+    subclass. Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to
+    general usage and behavior.
+
+    Parameters:
+        config (:class:`~transformers.RobertaConfig`): Model configuration class with all the parameters of the
+            model. Initializing with a config file does not load the weights associated with the model, only the
+            configuration. Check out the :meth:`~transformers.PreTrainedModel.from_pretrained` method to load the model
+            weights.
+"""
+
+ROBERTA_INPUTS_DOCSTRING = r"""
+    Args:
+        input_ids (:obj:`torch.LongTensor` of shape :obj:`({0})`):
+            Indices of input sequence tokens in the vocabulary.
+
+            Indices can be obtained using :class:`~transformers.RobertaTokenizer`. See
+            :meth:`transformers.PreTrainedTokenizer.encode` and :meth:`transformers.PreTrainedTokenizer.__call__` for
+            details.
+
+            `What are input IDs? <../glossary.html#input-ids>`__
+        attention_mask (:obj:`torch.FloatTensor` of shape :obj:`({0})`, `optional`):
+            Mask to avoid performing attention on padding token indices. Mask values selected in ``[0, 1]``:
+
+            - 1 for tokens that are **not masked**,
+            - 0 for tokens that are **masked**.
+
+            `What are attention masks? <../glossary.html#attention-mask>`__
+        token_type_ids (:obj:`torch.LongTensor` of shape :obj:`({0})`, `optional`):
+            Segment token indices to indicate first and second portions of the inputs. Indices are selected in ``[0,
+            1]``:
+
+            - 0 corresponds to a `sentence A` token,
+            - 1 corresponds to a `sentence B` token.
+
+            `What are token type IDs? <../glossary.html#token-type-ids>`_
+        position_ids (:obj:`torch.LongTensor` of shape :obj:`({0})`, `optional`):
+            Indices of positions of each input sequence tokens in the position embeddings. Selected in the range ``[0,
+            config.max_position_embeddings - 1]``.
+
+            `What are position IDs? <../glossary.html#position-ids>`_
+        head_mask (:obj:`torch.FloatTensor` of shape :obj:`(num_heads,)` or :obj:`(num_layers, num_heads)`, `optional`):
+            Mask to nullify selected heads of the self-attention modules. Mask values selected in ``[0, 1]``:
+
+            - 1 indicates the head is **not masked**,
+            - 0 indicates the head is **masked**.
+
+        inputs_embeds (:obj:`torch.FloatTensor` of shape :obj:`({0}, hidden_size)`, `optional`):
+            Optionally, instead of passing :obj:`input_ids` you can choose to directly pass an embedded representation.
+            This is useful if you want more control over how to convert :obj:`input_ids` indices into associated
+            vectors than the model's internal embedding lookup matrix.
+        output_attentions (:obj:`bool`, `optional`):
+            Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under returned
+            tensors for more detail.
+        output_hidden_states (:obj:`bool`, `optional`):
+            Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors for
+            more detail.
+        return_dict (:obj:`bool`, `optional`):
+            Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple.
+"""
+
+
+@add_start_docstrings(
+    "The bare RoBERTa Model transformer outputting raw hidden-states without any specific head on top.",
+    ROBERTA_START_DOCSTRING,
+)
+class RobertaModel(RobertaPreTrainedModel):
+    """
+
+    The model can behave as an encoder (with only self-attention) as well as a decoder, in which case a layer of
+    cross-attention is added between the self-attention layers, following the architecture described in `Attention is
+    all you need`_ by Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N. Gomez, Lukasz
+    Kaiser and Illia Polosukhin.
+
+    To behave as an decoder the model needs to be initialized with the :obj:`is_decoder` argument of the configuration
+    set to :obj:`True`. To be used in a Seq2Seq model, the model needs to initialized with both :obj:`is_decoder`
+    argument and :obj:`add_cross_attention` set to :obj:`True`; an :obj:`encoder_hidden_states` is then expected as an
+    input to the forward pass.
+
+    .. _`Attention is all you need`: https://arxiv.org/abs/1706.03762
+
+    """
+
+    _keys_to_ignore_on_load_missing = [r"position_ids"]
+
+    # Copied from transformers.models.bert.modeling_bert.BertModel.__init__ with Bert->Roberta
+    def __init__(self, config, add_pooling_layer=True):
+        super().__init__(config)
+        self.config = config
+
+        self.embeddings = RobertaEmbeddings(config)
+        self.encoder = RobertaEncoder(config)
+
+        self.pooler = RobertaPooler(config) if add_pooling_layer else None
+
+        self.init_weights()
+
+    def get_input_embeddings(self):
+        return self.embeddings.word_embeddings
+
+    def set_input_embeddings(self, value):
+        self.embeddings.word_embeddings = value
+
+    def _prune_heads(self, heads_to_prune):
+        """
+        Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
+        class PreTrainedModel
+        """
+        for layer, heads in heads_to_prune.items():
+            self.encoder.layer[layer].attention.prune_heads(heads)
+
+    @add_start_docstrings_to_model_forward(ROBERTA_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
+    @add_code_sample_docstrings(
+        checkpoint="roberta-base",
+        output_type=BaseModelOutputWithPoolingAndCrossAttentions,
+        config_class=_CONFIG_FOR_DOC,
+    )
+    # Copied from transformers.models.bert.modeling_bert.BertModel.forward
+    def forward(
+        self,
+        input_ids=None,
+        attention_mask=None,
+        token_type_ids=None,
+        position_ids=None,
+        head_mask=None,
+        inputs_embeds=None,
+        encoder_hidden_states=None,
+        encoder_attention_mask=None,
+        output_attentions=None,
+        output_hidden_states=None,
+        return_dict=None,
+        prompt_embedding=None,
+        concat_prompt=False,
+    ):
+        r"""
+        encoder_hidden_states  (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, hidden_size)`, `optional`):
+            Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if
+            the model is configured as a decoder.
+        encoder_attention_mask (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
+            Mask to avoid performing attention on the padding token indices of the encoder input. This mask is used in
+            the cross-attention if the model is configured as a decoder. Mask values selected in ``[0, 1]``: ``1`` for
+            tokens that are NOT MASKED, ``0`` for MASKED tokens.
+        """
+        output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
+        output_hidden_states = (
+            output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
+        )
+        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+        if input_ids is not None and inputs_embeds is not None:
+            raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time")
+        elif input_ids is not None:
+            input_shape = input_ids.size()
+        elif inputs_embeds is not None:
+            input_shape = inputs_embeds.size()[:-1]
+        else:
+            raise ValueError("You have to specify either input_ids or inputs_embeds")
+
+        device = input_ids.device if input_ids is not None else inputs_embeds.device
+        if attention_mask is None:
+            attention_mask = torch.ones(input_shape, device=device)
+        if token_type_ids is None:
+            token_type_ids = torch.zeros(input_shape, dtype=torch.long, device=device)
+
+        # We can provide a self-attention mask of dimensions [batch_size, from_seq_length, to_seq_length]
+        # ourselves in which case we just need to make it broadcastable to all heads.
+        if (
+            concat_prompt and prompt_embedding is not None
+        ):  # if concat prompt embedding, we should also extend attention mask
+            n_prompt_tokens = prompt_embedding.shape[1]
+            attention_mask_padding = torch.ones(input_shape[0], n_prompt_tokens).to(attention_mask.device)
+            attention_mask = torch.cat([attention_mask_padding, attention_mask], 1).to(input_ids.device)
+            input_shape = attention_mask.shape
+        extended_attention_mask: torch.Tensor = self.get_extended_attention_mask(attention_mask, input_shape, device)
+        # If a 2D or 3D attention mask is provided for the cross-attention
+        # we need to make broadcastable to [batch_size, num_heads, seq_length, seq_length]
+        if self.config.is_decoder and encoder_hidden_states is not None:
+            encoder_batch_size, encoder_sequence_length, _ = encoder_hidden_states.size()
+            encoder_hidden_shape = (encoder_batch_size, encoder_sequence_length)
+            if encoder_attention_mask is None:
+                encoder_attention_mask = torch.ones(encoder_hidden_shape, device=device)
+            encoder_extended_attention_mask = self.invert_attention_mask(encoder_attention_mask)
+        else:
+            encoder_extended_attention_mask = None
+        # Prepare head mask if needed
+        # 1.0 in head_mask indicate we keep the head
+        # attention_probs has shape bsz x n_heads x N x N
+        # input head_mask has shape [num_heads] or [num_hidden_layers x num_heads]
+        # and head_mask is converted to shape [num_hidden_layers x batch x num_heads x seq_length x seq_length]
+        head_mask = self.get_head_mask(head_mask, self.config.num_hidden_layers)
+
+        embedding_output = self.embeddings(
+            input_ids=input_ids, position_ids=position_ids, token_type_ids=token_type_ids, inputs_embeds=inputs_embeds
+        )
+        # extend inputs_embeds
+        if prompt_embedding is not None:
+            if concat_prompt:  # concat prompt embedding with embedding_output
+                # prompt_embedding = prompt_embedding.repeat(input_shape[0], 1, 1).to(embedding_output.device)
+                embedding_output = torch.cat([prompt_embedding, embedding_output], dim=1)
+            else:
+                bsz, n_prompt_tokens, prompt_dim = prompt_embedding.shape
+                prompt_padding = torch.zeros(bsz, input_shape[1] - n_prompt_tokens - 1, prompt_dim).to(
+                    embedding_output.device
+                )
+                extended_prompt_embedding = torch.cat([prompt_embedding, prompt_padding], dim=1)
+                pre_padding = torch.zeros(bsz, 1, prompt_dim).to(embedding_output.device)
+                extended_prompt_embedding = torch.cat([pre_padding, extended_prompt_embedding], dim=1)  # for <CLS>
+                # extended_prompt_embedding = extended_prompt_embedding.repeat(input_shape[0], 1, 1)
+                embedding_output = embedding_output + extended_prompt_embedding
+        encoder_outputs = self.encoder(
+            embedding_output,
+            attention_mask=extended_attention_mask,
+            head_mask=head_mask,
+            encoder_hidden_states=encoder_hidden_states,
+            encoder_attention_mask=encoder_extended_attention_mask,
+            output_attentions=output_attentions,
+            output_hidden_states=output_hidden_states,
+            return_dict=return_dict,
+        )
+        sequence_output = encoder_outputs[0]
+        pooled_output = self.pooler(sequence_output) if self.pooler is not None else None
+
+        if not return_dict:
+            return (sequence_output, pooled_output) + encoder_outputs[1:]
+        return sequence_output
+
+
+@add_start_docstrings(
+    """RoBERTa Model with a `language modeling` head on top for CLM fine-tuning. """, ROBERTA_START_DOCSTRING
+)
+class RobertaForCausalLM(RobertaPreTrainedModel):
+    _keys_to_ignore_on_load_missing = [r"position_ids", r"predictions.decoder.bias"]
+    _keys_to_ignore_on_load_unexpected = [r"pooler"]
+
+    def __init__(self, config):
+        super().__init__(config)
+
+        if not config.is_decoder:
+            logger.warning("If you want to use `RobertaLMHeadModel` as a standalone, add `is_decoder=True.`")
+
+        self.roberta = RobertaModel(config, add_pooling_layer=False)
+        self.lm_head = RobertaLMHead(config)
+
+        self.init_weights()
+
+    def get_output_embeddings(self):
+        return self.lm_head.decoder
+
+    def set_output_embeddings(self, new_embeddings):
+        self.lm_head.decoder = new_embeddings
+
+    @add_start_docstrings_to_model_forward(ROBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
+    @replace_return_docstrings(output_type=CausalLMOutputWithCrossAttentions, config_class=_CONFIG_FOR_DOC)
+    def forward(
+        self,
+        input_ids=None,
+        attention_mask=None,
+        token_type_ids=None,
+        position_ids=None,
+        head_mask=None,
+        inputs_embeds=None,
+        encoder_hidden_states=None,
+        encoder_attention_mask=None,
+        labels=None,
+        output_attentions=None,
+        output_hidden_states=None,
+        return_dict=None,
+    ):
+        r"""
+        encoder_hidden_states  (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, hidden_size)`, `optional`):
+            Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if
+            the model is configured as a decoder.
+        encoder_attention_mask (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
+            Mask to avoid performing attention on the padding token indices of the encoder input. This mask is used in
+            the cross-attention if the model is configured as a decoder. Mask values selected in ``[0, 1]``:
+
+            - 1 for tokens that are **not masked**,
+            - 0 for tokens that are **masked**.
+
+        labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
+            Labels for computing the left-to-right language modeling loss (next word prediction). Indices should be in
+            ``[-100, 0, ..., config.vocab_size]`` (see ``input_ids`` docstring) Tokens with indices set to ``-100`` are
+            ignored (masked), the loss is only computed for the tokens with labels in ``[0, ..., config.vocab_size]``
+
+        Returns:
+
+        Example::
+
+            >>> from transformers import RobertaTokenizer, RobertaForCausalLM, RobertaConfig
+            >>> import torch
+
+            >>> tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
+            >>> config = RobertaConfig.from_pretrained("roberta-base")
+            >>> config.is_decoder = True
+            >>> model = RobertaForCausalLM.from_pretrained('roberta-base', config=config)
+
+            >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
+            >>> outputs = model(**inputs)
+
+            >>> prediction_logits = outputs.logits
+        """
+        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+
+        outputs = self.roberta(
+            input_ids,
+            attention_mask=attention_mask,
+            token_type_ids=token_type_ids,
+            position_ids=position_ids,
+            head_mask=head_mask,
+            inputs_embeds=inputs_embeds,
+            encoder_hidden_states=encoder_hidden_states,
+            encoder_attention_mask=encoder_attention_mask,
+            output_attentions=output_attentions,
+            output_hidden_states=output_hidden_states,
+            return_dict=return_dict,
+        )
+
+        sequence_output = outputs[0]
+        prediction_scores = self.lm_head(sequence_output)
+
+        lm_loss = None
+        if labels is not None:
+            # we are doing next-token prediction; shift prediction scores and input ids by one
+            shifted_prediction_scores = prediction_scores[:, :-1, :].contiguous()
+            labels = labels[:, 1:].contiguous()
+            loss_fct = CrossEntropyLoss()
+            lm_loss = loss_fct(shifted_prediction_scores.view(-1, self.config.vocab_size), labels.view(-1))
+
+        if not return_dict:
+            output = (prediction_scores,) + outputs[2:]
+            return ((lm_loss,) + output) if lm_loss is not None else output
+
+        return CausalLMOutputWithCrossAttentions(
+            loss=lm_loss,
+            logits=prediction_scores,
+            hidden_states=outputs.hidden_states,
+            attentions=outputs.attentions,
+            cross_attentions=outputs.cross_attentions,
+        )
+
+    def prepare_inputs_for_generation(self, input_ids, attention_mask=None, **model_kwargs):
+        input_shape = input_ids.shape
+
+        # if model is used as a decoder in encoder-decoder model, the decoder attention mask is created on the fly
+        if attention_mask is None:
+            attention_mask = input_ids.new_ones(input_shape)
+
+        return {"input_ids": input_ids, "attention_mask": attention_mask}
+
+
+@add_start_docstrings("""RoBERTa Model with a `language modeling` head on top. """, ROBERTA_START_DOCSTRING)
+class RobertaForMaskedLM(RobertaPreTrainedModel):
+    _keys_to_ignore_on_load_missing = [r"position_ids", r"predictions.decoder.bias"]
+    _keys_to_ignore_on_load_unexpected = [r"pooler"]
+
+    def __init__(self, config, n_prompt_tokens, inference_framework="pt", onnx_model_path=None):
+        super().__init__(config)
+
+        if config.is_decoder:
+            logger.warning(
+                "If you want to use `RobertaForMaskedLM` make sure `config.is_decoder=False` for "
+                "bi-directional self-attention."
+            )
+
+        self.lm_head = RobertaLMHead(config)
+        self.roberta = RobertaModel(config, add_pooling_layer=False)
+        self.init_weights()
+        # if inference_framework == 'ort':
+        #     del self.roberta
+
+        # elif inference_framework == 'ort':
+        #     self._init_weight(self.lm_head)
+
+        self.n_prompt_tokens = n_prompt_tokens
+        self.prompt_embedding = None
+        self.concat_prompt = False
+
+        self.inference_framework = inference_framework
+
+        if inference_framework == "ort":
+            ort_option = ort.SessionOptions()
+            self.ort_session = ort.InferenceSession(
+                onnx_model_path, ort_option, providers=["CUDAExecutionProvider", "CPUExecutionProvider"]
+            )
+
+    def set_prompt_embedding(self, prompt_embedding):
+        self.prompt_embedding = prompt_embedding
+
+    def set_concat_prompt(self, flag=True):
+        self.concat_prompt = flag
+
+    def get_output_embeddings(self):
+        return self.lm_head.decoder
+
+    def set_output_embeddings(self, new_embeddings):
+        self.lm_head.decoder = new_embeddings
+
+    @add_start_docstrings_to_model_forward(ROBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
+    @add_code_sample_docstrings(
+        checkpoint="roberta-base",
+        output_type=MaskedLMOutput,
+        config_class=_CONFIG_FOR_DOC,
+        mask="<mask>",
+    )
+    def forward(
+        self,
+        input_ids=None,
+        attention_mask=None,
+        token_type_ids=None,
+        position_ids=None,
+        head_mask=None,
+        inputs_embeds=None,
+        encoder_hidden_states=None,
+        encoder_attention_mask=None,
+        labels=None,
+        output_attentions=None,
+        output_hidden_states=None,
+        return_dict=None,
+        mask_pos=None,
+        prompt_embedding=None,
+    ):
+        r"""
+        labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
+            Labels for computing the masked language modeling loss. Indices should be in ``[-100, 0, ...,
+            config.vocab_size]`` (see ``input_ids`` docstring) Tokens with indices set to ``-100`` are ignored
+            (masked), the loss is only computed for the tokens with labels in ``[0, ..., config.vocab_size]``
+        kwargs (:obj:`Dict[str, any]`, optional, defaults to `{}`):
+            Used to hide legacy arguments that have been deprecated.
+        """
+        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+        if prompt_embedding is None:
+            prompt_embedding = self.prompt_embedding
+
+        if prompt_embedding is not None:
+            bsz = input_ids.shape[0]
+            prompt_dim = prompt_embedding.shape[-1]
+            prompt_embedding = prompt_embedding.reshape(-1, self.n_prompt_tokens, prompt_dim)[:bsz].to(input_ids.device)
+            if self.concat_prompt:
+                mask_pos = self.n_prompt_tokens + mask_pos
+
+        if self.inference_framework == "pt":
+            outputs = self.roberta(
+                input_ids,
+                attention_mask=attention_mask,
+                token_type_ids=token_type_ids,
+                position_ids=position_ids,
+                head_mask=head_mask,
+                inputs_embeds=inputs_embeds,
+                encoder_hidden_states=encoder_hidden_states,
+                encoder_attention_mask=encoder_attention_mask,
+                output_attentions=output_attentions,
+                output_hidden_states=output_hidden_states,
+                return_dict=return_dict,
+                prompt_embedding=prompt_embedding,
+                concat_prompt=self.concat_prompt,
+            )
+
+        elif self.inference_framework == "ort":
+            bsz, max_seq_len = input_ids.shape
+            outputs = torch.empty((bsz, max_seq_len, self.config.hidden_size), dtype=torch.float32, device="cuda")
+            io_binding = self.ort_session.io_binding()
+            io_binding.bind_input(
+                name="input_ids",
+                device_type="cuda",
+                device_id=0,
+                element_type=np.longlong,
+                shape=(bsz, max_seq_len),
+                buffer_ptr=input_ids.data_ptr(),
+            )
+            io_binding.bind_input(
+                name="attention_mask",
+                device_type="cuda",
+                device_id=0,
+                element_type=np.longlong,
+                shape=attention_mask.shape,
+                buffer_ptr=attention_mask.data_ptr(),
+            )
+            io_binding.bind_input(
+                name="prompt_embedding",
+                device_type="cuda",
+                device_id=0,
+                element_type=np.float32,
+                shape=prompt_embedding.shape,
+                buffer_ptr=prompt_embedding.data_ptr(),
+            )
+            io_binding.bind_output(
+                "logits",
+                device_type="cuda",
+                device_id=0,
+                element_type=np.float32,
+                shape=outputs.shape,
+                buffer_ptr=outputs.data_ptr(),
+            )
+            self.ort_session.run_with_iobinding(io_binding)
+
+        return {
+            "logits": self.lm_head(outputs[torch.arange(outputs.size(0)), mask_pos]),
+        }
+        # masked_lm_loss = None
+        # if labels is not None:
+        #     loss_fct = CrossEntropyLoss()
+        #     masked_lm_loss = loss_fct(prediction_scores.view(-1, self.config.vocab_size), labels.view(-1))
+        #
+        # if not return_dict:
+        #     output = (prediction_scores,) + outputs[2:]
+        #     return ((masked_lm_loss,) + output) if masked_lm_loss is not None else output
+        #
+        # return MaskedLMOutput(
+        #     loss=masked_lm_loss,
+        #     logits=prediction_scores,
+        #     hidden_states=outputs.hidden_states,
+        #     attentions=outputs.attentions,
+        # )
+
+
+class RobertaForIntrinsicTuning(RobertaPreTrainedModel):
+    _keys_to_ignore_on_load_missing = [r"position_ids", r"predictions.decoder.bias"]
+    _keys_to_ignore_on_load_unexpected = [r"pooler"]
+
+    def __init__(self, config, n_prompt_tokens, intrinsic_dim, inference_framework="pt", onnx_model_path=None):
+        super().__init__(config)
+
+        if config.is_decoder:
+            logger.warning(
+                "If you want to use `RobertaForMaskedLM` make sure `config.is_decoder=False` for "
+                "bi-directional self-attention."
+            )
+
+        self.lm_head = RobertaLMHead(config)
+        self.roberta = RobertaModel(config, add_pooling_layer=False)
+        self.init_weights()
+        # if inference_framework == 'ort':
+        #     del self.roberta
+
+        # elif inference_framework == 'ort':
+        #     self._init_weight(self.lm_head)
+
+        self.n_prompt_tokens = n_prompt_tokens
+        self.prompt_embedding = None
+        self.concat_prompt = False
+        self.intrinsic_embedding = nn.Parameter(torch.Tensor(intrinsic_dim))
+        nn.init.normal_(self.intrinsic_embedding, mean=0, std=0.5)
+        self.linear = torch.nn.Linear(intrinsic_dim, n_prompt_tokens * config.hidden_size, bias=False)
+
+        self.inference_framework = inference_framework
+
+        if inference_framework == "ort":
+            ort_option = ort.SessionOptions()
+            self.ort_session = ort.InferenceSession(
+                onnx_model_path, ort_option, providers=["CUDAExecutionProvider", "CPUExecutionProvider"]
+            )
+
+    def set_prompt_embedding(self, prompt_embedding):
+        self.prompt_embedding = prompt_embedding
+
+    def set_concat_prompt(self, flag=True):
+        self.concat_prompt = flag
+
+    def get_output_embeddings(self):
+        return self.lm_head.decoder
+
+    def set_output_embeddings(self, new_embeddings):
+        self.lm_head.decoder = new_embeddings
+
+    def forward(
+        self,
+        input_ids=None,
+        attention_mask=None,
+        token_type_ids=None,
+        position_ids=None,
+        head_mask=None,
+        inputs_embeds=None,
+        encoder_hidden_states=None,
+        encoder_attention_mask=None,
+        labels=None,
+        output_attentions=None,
+        output_hidden_states=None,
+        return_dict=None,
+        mask_pos=None,
+    ):
+        r"""
+        labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
+            Labels for computing the masked language modeling loss. Indices should be in ``[-100, 0, ...,
+            config.vocab_size]`` (see ``input_ids`` docstring) Tokens with indices set to ``-100`` are ignored
+            (masked), the loss is only computed for the tokens with labels in ``[0, ..., config.vocab_size]``
+        kwargs (:obj:`Dict[str, any]`, optional, defaults to `{}`):
+            Used to hide legacy arguments that have been deprecated.
+        """
+        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+        prompt_embedding = self.linear(self.intrinsic_embedding)
+
+        bsz = input_ids.shape[0]
+        prompt_embedding = prompt_embedding.reshape(self.n_prompt_tokens, -1).repeat(bsz, 1, 1).to(input_ids.device)
+        if self.concat_prompt:
+            mask_pos = self.n_prompt_tokens + mask_pos
+
+        if self.inference_framework == "pt":
+            outputs = self.roberta(
+                input_ids,
+                attention_mask=attention_mask,
+                token_type_ids=token_type_ids,
+                position_ids=position_ids,
+                head_mask=head_mask,
+                inputs_embeds=inputs_embeds,
+                encoder_hidden_states=encoder_hidden_states,
+                encoder_attention_mask=encoder_attention_mask,
+                output_attentions=output_attentions,
+                output_hidden_states=output_hidden_states,
+                return_dict=return_dict,
+                prompt_embedding=prompt_embedding,
+                concat_prompt=self.concat_prompt,
+            )
+
+        elif self.inference_framework == "ort":
+            bsz, max_seq_len = input_ids.shape
+            outputs = torch.empty((bsz, max_seq_len, self.config.hidden_size), dtype=torch.float32, device="cuda")
+            io_binding = self.ort_session.io_binding()
+            io_binding.bind_input(
+                name="input_ids",
+                device_type="cuda",
+                device_id=0,
+                element_type=np.longlong,
+                shape=(bsz, max_seq_len),
+                buffer_ptr=input_ids.data_ptr(),
+            )
+            io_binding.bind_input(
+                name="attention_mask",
+                device_type="cuda",
+                device_id=0,
+                element_type=np.longlong,
+                shape=attention_mask.shape,
+                buffer_ptr=attention_mask.data_ptr(),
+            )
+            io_binding.bind_input(
+                name="prompt_embedding",
+                device_type="cuda",
+                device_id=0,
+                element_type=np.float32,
+                shape=prompt_embedding.shape,
+                buffer_ptr=prompt_embedding.data_ptr(),
+            )
+            io_binding.bind_output(
+                "logits",
+                device_type="cuda",
+                device_id=0,
+                element_type=np.float32,
+                shape=outputs.shape,
+                buffer_ptr=outputs.data_ptr(),
+            )
+            self.ort_session.run_with_iobinding(io_binding)
+
+        return {
+            "logits": self.lm_head(outputs[torch.arange(outputs.size(0)), mask_pos]),
+        }
+        # masked_lm_loss = None
+        # if labels is not None:
+        #     loss_fct = CrossEntropyLoss()
+        #     masked_lm_loss = loss_fct(prediction_scores.view(-1, self.config.vocab_size), labels.view(-1))
+        #
+        # if not return_dict:
+        #     output = (prediction_scores,) + outputs[2:]
+        #     return ((masked_lm_loss,) + output) if masked_lm_loss is not None else output
+        #
+        # return MaskedLMOutput(
+        #     loss=masked_lm_loss,
+        #     logits=prediction_scores,
+        #     hidden_states=outputs.hidden_states,
+        #     attentions=outputs.attentions,
+        # )
+
+
+class RobertaForPromptTuning(RobertaPreTrainedModel):
+    _keys_to_ignore_on_load_missing = [r"position_ids", r"predictions.decoder.bias"]
+    _keys_to_ignore_on_load_unexpected = [r"pooler"]
+
+    def __init__(self, config, n_prompt_tokens):
+        super().__init__(config)
+
+        if config.is_decoder:
+            logger.warning(
+                "If you want to use `RobertaForPromptTuning` make sure `config.is_decoder=False` for "
+                "bi-directional self-attention."
+            )
+
+        self.roberta = RobertaModel(config, add_pooling_layer=False)
+        self.lm_head = RobertaLMHead(config)
+
+        self.init_weights()
+
+        self.n_prompt_tokens = n_prompt_tokens
+        self.prompt_embedding = nn.Embedding(n_prompt_tokens, config.hidden_size)
+
+    def get_output_embeddings(self):
+        return self.lm_head.decoder
+
+    def set_output_embeddings(self, new_embeddings):
+        self.lm_head.decoder = new_embeddings
+
+    def forward(
+        self,
+        input_ids=None,
+        attention_mask=None,
+        token_type_ids=None,
+        position_ids=None,
+        head_mask=None,
+        inputs_embeds=None,
+        encoder_hidden_states=None,
+        encoder_attention_mask=None,
+        labels=None,
+        output_attentions=None,
+        output_hidden_states=None,
+        return_dict=None,
+        mask_pos=None,
+    ):
+        r"""
+        labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
+            Labels for computing the masked language modeling loss. Indices should be in ``[-100, 0, ...,
+            config.vocab_size]`` (see ``input_ids`` docstring) Tokens with indices set to ``-100`` are ignored
+            (masked), the loss is only computed for the tokens with labels in ``[0, ..., config.vocab_size]``
+        kwargs (:obj:`Dict[str, any]`, optional, defaults to `{}`):
+            Used to hide legacy arguments that have been deprecated.
+        """
+        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+
+        prompt_embedding = self.prompt_embedding(torch.arange(0, self.n_prompt_tokens).to(input_ids.device))
+        mask_pos = mask_pos + self.n_prompt_tokens
+
+        outputs = self.roberta(
+            input_ids,
+            attention_mask=attention_mask,
+            token_type_ids=token_type_ids,
+            position_ids=position_ids,
+            head_mask=head_mask,
+            inputs_embeds=inputs_embeds,
+            encoder_hidden_states=encoder_hidden_states,
+            encoder_attention_mask=encoder_attention_mask,
+            output_attentions=output_attentions,
+            output_hidden_states=output_hidden_states,
+            return_dict=return_dict,
+            prompt_embedding=prompt_embedding,
+            concat_prompt=True,
+        )
+        sequence_output = outputs[0]
+        sequence_mask_output = sequence_output[torch.arange(sequence_output.size(0)), mask_pos]
+        prediction_scores = self.lm_head(sequence_mask_output)
+
+        masked_lm_loss = None
+        if labels is not None:
+            loss_fct = CrossEntropyLoss()
+            masked_lm_loss = loss_fct(prediction_scores.view(-1, self.config.vocab_size), labels.view(-1))
+
+        if not return_dict:
+            output = (prediction_scores,) + outputs[2:]
+            return ((masked_lm_loss,) + output) if masked_lm_loss is not None else output
+
+        return MaskedLMOutput(
+            loss=masked_lm_loss,
+            logits=prediction_scores,
+            hidden_states=outputs.hidden_states,
+            attentions=outputs.attentions,
+        )
+
+
+class RobertaLMHead(nn.Module):
+    """Roberta Head for masked language modeling."""
+
+    def __init__(self, config):
+        super().__init__()
+        self.dense = nn.Linear(config.hidden_size, config.hidden_size)
+        self.layer_norm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps)
+
+        self.decoder = nn.Linear(config.hidden_size, config.vocab_size, bias=False)
+        self.bias = nn.Parameter(torch.zeros(config.vocab_size))
+
+        # Need a link between the two variables so that the bias is correctly resized with `resize_token_embeddings`
+        self.decoder.bias = self.bias
+
+    def forward(self, features, **kwargs):
+        x = self.dense(features)
+        x = gelu(x)
+        x = self.layer_norm(x)
+
+        # project back to size of vocabulary with bias
+        x = self.decoder(x)
+
+        return x
+
+
+@add_start_docstrings(
+    """
+    RoBERTa Model transformer with a sequence classification/regression head on top (a linear layer on top of the
+    pooled output) e.g. for GLUE tasks.
+    """,
+    ROBERTA_START_DOCSTRING,
+)
+class RobertaForSequenceClassification(RobertaPreTrainedModel):
+    _keys_to_ignore_on_load_missing = [r"position_ids"]
+
+    def __init__(self, config):
+        super().__init__(config)
+        self.num_labels = config.num_labels
+
+        self.roberta = RobertaModel(config, add_pooling_layer=False)
+        self.classifier = RobertaClassificationHead(config)
+
+        self.init_weights()
+
+    @add_start_docstrings_to_model_forward(ROBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
+    @add_code_sample_docstrings(
+        checkpoint="roberta-base",
+        output_type=SequenceClassifierOutput,
+        config_class=_CONFIG_FOR_DOC,
+    )
+    def forward(
+        self,
+        input_ids=None,
+        attention_mask=None,
+        token_type_ids=None,
+        position_ids=None,
+        head_mask=None,
+        inputs_embeds=None,
+        labels=None,
+        output_attentions=None,
+        output_hidden_states=None,
+        return_dict=None,
+    ):
+        r"""
+        labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size,)`, `optional`):
+            Labels for computing the sequence classification/regression loss. Indices should be in :obj:`[0, ...,
+            config.num_labels - 1]`. If :obj:`config.num_labels == 1` a regression loss is computed (Mean-Square loss),
+            If :obj:`config.num_labels > 1` a classification loss is computed (Cross-Entropy).
+        """
+        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+
+        outputs = self.roberta(
+            input_ids,
+            attention_mask=attention_mask,
+            token_type_ids=token_type_ids,
+            position_ids=position_ids,
+            head_mask=head_mask,
+            inputs_embeds=inputs_embeds,
+            output_attentions=output_attentions,
+            output_hidden_states=output_hidden_states,
+            return_dict=return_dict,
+        )
+        sequence_output = outputs[0]
+        logits = self.classifier(sequence_output)
+
+        loss = None
+        if labels is not None:
+            if self.num_labels == 1:
+                #  We are doing regression
+                loss_fct = MSELoss()
+                loss = loss_fct(logits.view(-1), labels.view(-1))
+            else:
+                loss_fct = CrossEntropyLoss()
+                loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
+
+        if not return_dict:
+            output = (logits,) + outputs[2:]
+            return ((loss,) + output) if loss is not None else output
+
+        return SequenceClassifierOutput(
+            loss=loss,
+            logits=logits,
+            hidden_states=outputs.hidden_states,
+            attentions=outputs.attentions,
+        )
+
+
+@add_start_docstrings(
+    """
+    Roberta Model with a multiple choice classification head on top (a linear layer on top of the pooled output and a
+    softmax) e.g. for RocStories/SWAG tasks.
+    """,
+    ROBERTA_START_DOCSTRING,
+)
+class RobertaForMultipleChoice(RobertaPreTrainedModel):
+    _keys_to_ignore_on_load_missing = [r"position_ids"]
+
+    def __init__(self, config):
+        super().__init__(config)
+
+        self.roberta = RobertaModel(config)
+        self.dropout = nn.Dropout(config.hidden_dropout_prob)
+        self.classifier = nn.Linear(config.hidden_size, 1)
+
+        self.init_weights()
+
+    @add_start_docstrings_to_model_forward(ROBERTA_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length"))
+    @add_code_sample_docstrings(
+        checkpoint="roberta-base",
+        output_type=MultipleChoiceModelOutput,
+        config_class=_CONFIG_FOR_DOC,
+    )
+    def forward(
+        self,
+        input_ids=None,
+        token_type_ids=None,
+        attention_mask=None,
+        labels=None,
+        position_ids=None,
+        head_mask=None,
+        inputs_embeds=None,
+        output_attentions=None,
+        output_hidden_states=None,
+        return_dict=None,
+    ):
+        r"""
+        labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size,)`, `optional`):
+            Labels for computing the multiple choice classification loss. Indices should be in ``[0, ...,
+            num_choices-1]`` where :obj:`num_choices` is the size of the second dimension of the input tensors. (See
+            :obj:`input_ids` above)
+        """
+        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+        num_choices = input_ids.shape[1] if input_ids is not None else inputs_embeds.shape[1]
+
+        flat_input_ids = input_ids.view(-1, input_ids.size(-1)) if input_ids is not None else None
+        flat_position_ids = position_ids.view(-1, position_ids.size(-1)) if position_ids is not None else None
+        flat_token_type_ids = token_type_ids.view(-1, token_type_ids.size(-1)) if token_type_ids is not None else None
+        flat_attention_mask = attention_mask.view(-1, attention_mask.size(-1)) if attention_mask is not None else None
+        flat_inputs_embeds = (
+            inputs_embeds.view(-1, inputs_embeds.size(-2), inputs_embeds.size(-1))
+            if inputs_embeds is not None
+            else None
+        )
+
+        outputs = self.roberta(
+            flat_input_ids,
+            position_ids=flat_position_ids,
+            token_type_ids=flat_token_type_ids,
+            attention_mask=flat_attention_mask,
+            head_mask=head_mask,
+            inputs_embeds=flat_inputs_embeds,
+            output_attentions=output_attentions,
+            output_hidden_states=output_hidden_states,
+            return_dict=return_dict,
+        )
+        pooled_output = outputs[1]
+
+        pooled_output = self.dropout(pooled_output)
+        logits = self.classifier(pooled_output)
+        reshaped_logits = logits.view(-1, num_choices)
+
+        loss = None
+        if labels is not None:
+            loss_fct = CrossEntropyLoss()
+            loss = loss_fct(reshaped_logits, labels)
+
+        if not return_dict:
+            output = (reshaped_logits,) + outputs[2:]
+            return ((loss,) + output) if loss is not None else output
+
+        return MultipleChoiceModelOutput(
+            loss=loss,
+            logits=reshaped_logits,
+            hidden_states=outputs.hidden_states,
+            attentions=outputs.attentions,
+        )
+
+
+@add_start_docstrings(
+    """
+    Roberta Model with a token classification head on top (a linear layer on top of the hidden-states output) e.g. for
+    Named-Entity-Recognition (NER) tasks.
+    """,
+    ROBERTA_START_DOCSTRING,
+)
+class RobertaForTokenClassification(RobertaPreTrainedModel):
+    _keys_to_ignore_on_load_unexpected = [r"pooler"]
+    _keys_to_ignore_on_load_missing = [r"position_ids"]
+
+    def __init__(self, config):
+        super().__init__(config)
+        self.num_labels = config.num_labels
+
+        self.roberta = RobertaModel(config, add_pooling_layer=False)
+        self.dropout = nn.Dropout(config.hidden_dropout_prob)
+        self.classifier = nn.Linear(config.hidden_size, config.num_labels)
+
+        self.init_weights()
+
+    @add_start_docstrings_to_model_forward(ROBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
+    @add_code_sample_docstrings(
+        checkpoint="roberta-base",
+        output_type=TokenClassifierOutput,
+        config_class=_CONFIG_FOR_DOC,
+    )
+    def forward(
+        self,
+        input_ids=None,
+        attention_mask=None,
+        token_type_ids=None,
+        position_ids=None,
+        head_mask=None,
+        inputs_embeds=None,
+        labels=None,
+        output_attentions=None,
+        output_hidden_states=None,
+        return_dict=None,
+    ):
+        r"""
+        labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
+            Labels for computing the token classification loss. Indices should be in ``[0, ..., config.num_labels -
+            1]``.
+        """
+        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+
+        outputs = self.roberta(
+            input_ids,
+            attention_mask=attention_mask,
+            token_type_ids=token_type_ids,
+            position_ids=position_ids,
+            head_mask=head_mask,
+            inputs_embeds=inputs_embeds,
+            output_attentions=output_attentions,
+            output_hidden_states=output_hidden_states,
+            return_dict=return_dict,
+        )
+
+        sequence_output = outputs[0]
+
+        sequence_output = self.dropout(sequence_output)
+        logits = self.classifier(sequence_output)
+
+        loss = None
+        if labels is not None:
+            loss_fct = CrossEntropyLoss()
+            # Only keep active parts of the loss
+            if attention_mask is not None:
+                active_loss = attention_mask.view(-1) == 1
+                active_logits = logits.view(-1, self.num_labels)
+                active_labels = torch.where(
+                    active_loss, labels.view(-1), torch.tensor(loss_fct.ignore_index).type_as(labels)
+                )
+                loss = loss_fct(active_logits, active_labels)
+            else:
+                loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
+
+        if not return_dict:
+            output = (logits,) + outputs[2:]
+            return ((loss,) + output) if loss is not None else output
+
+        return TokenClassifierOutput(
+            loss=loss,
+            logits=logits,
+            hidden_states=outputs.hidden_states,
+            attentions=outputs.attentions,
+        )
+
+
+class RobertaClassificationHead(nn.Module):
+    """Head for sentence-level classification tasks."""
+
+    def __init__(self, config):
+        super().__init__()
+        self.dense = nn.Linear(config.hidden_size, config.hidden_size)
+        self.dropout = nn.Dropout(config.hidden_dropout_prob)
+        self.out_proj = nn.Linear(config.hidden_size, config.num_labels)
+
+    def forward(self, features, **kwargs):
+        x = features[:, 0, :]  # take <s> token (equiv. to [CLS])
+        x = self.dropout(x)
+        x = self.dense(x)
+        x = torch.tanh(x)
+        x = self.dropout(x)
+        x = self.out_proj(x)
+        return x
+
+
+@add_start_docstrings(
+    """
+    Roberta Model with a span classification head on top for extractive question-answering tasks like SQuAD (a linear
+    layers on top of the hidden-states output to compute `span start logits` and `span end logits`).
+    """,
+    ROBERTA_START_DOCSTRING,
+)
+class RobertaForQuestionAnswering(RobertaPreTrainedModel):
+    _keys_to_ignore_on_load_unexpected = [r"pooler"]
+    _keys_to_ignore_on_load_missing = [r"position_ids"]
+
+    def __init__(self, config):
+        super().__init__(config)
+        self.num_labels = config.num_labels
+
+        self.roberta = RobertaModel(config, add_pooling_layer=False)
+        self.qa_outputs = nn.Linear(config.hidden_size, config.num_labels)
+
+        self.init_weights()
+
+    @add_start_docstrings_to_model_forward(ROBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
+    @add_code_sample_docstrings(
+        checkpoint="roberta-base",
+        output_type=QuestionAnsweringModelOutput,
+        config_class=_CONFIG_FOR_DOC,
+    )
+    def forward(
+        self,
+        input_ids=None,
+        attention_mask=None,
+        token_type_ids=None,
+        position_ids=None,
+        head_mask=None,
+        inputs_embeds=None,
+        start_positions=None,
+        end_positions=None,
+        output_attentions=None,
+        output_hidden_states=None,
+        return_dict=None,
+    ):
+        r"""
+        start_positions (:obj:`torch.LongTensor` of shape :obj:`(batch_size,)`, `optional`):
+            Labels for position (index) of the start of the labelled span for computing the token classification loss.
+            Positions are clamped to the length of the sequence (:obj:`sequence_length`). Position outside of the
+            sequence are not taken into account for computing the loss.
+        end_positions (:obj:`torch.LongTensor` of shape :obj:`(batch_size,)`, `optional`):
+            Labels for position (index) of the end of the labelled span for computing the token classification loss.
+            Positions are clamped to the length of the sequence (:obj:`sequence_length`). Position outside of the
+            sequence are not taken into account for computing the loss.
+        """
+        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+
+        outputs = self.roberta(
+            input_ids,
+            attention_mask=attention_mask,
+            token_type_ids=token_type_ids,
+            position_ids=position_ids,
+            head_mask=head_mask,
+            inputs_embeds=inputs_embeds,
+            output_attentions=output_attentions,
+            output_hidden_states=output_hidden_states,
+            return_dict=return_dict,
+        )
+
+        sequence_output = outputs[0]
+
+        logits = self.qa_outputs(sequence_output)
+        start_logits, end_logits = logits.split(1, dim=-1)
+        start_logits = start_logits.squeeze(-1)
+        end_logits = end_logits.squeeze(-1)
+
+        total_loss = None
+        if start_positions is not None and end_positions is not None:
+            # If we are on multi-GPU, split add a dimension
+            if len(start_positions.size()) > 1:
+                start_positions = start_positions.squeeze(-1)
+            if len(end_positions.size()) > 1:
+                end_positions = end_positions.squeeze(-1)
+            # sometimes the start/end positions are outside our model inputs, we ignore these terms
+            ignored_index = start_logits.size(1)
+            start_positions.clamp_(0, ignored_index)
+            end_positions.clamp_(0, ignored_index)
+
+            loss_fct = CrossEntropyLoss(ignore_index=ignored_index)
+            start_loss = loss_fct(start_logits, start_positions)
+            end_loss = loss_fct(end_logits, end_positions)
+            total_loss = (start_loss + end_loss) / 2
+
+        if not return_dict:
+            output = (start_logits, end_logits) + outputs[2:]
+            return ((total_loss,) + output) if total_loss is not None else output
+
+        return QuestionAnsweringModelOutput(
+            loss=total_loss,
+            start_logits=start_logits,
+            end_logits=end_logits,
+            hidden_states=outputs.hidden_states,
+            attentions=outputs.attentions,
+        )
+
+
+def create_position_ids_from_input_ids(input_ids, padding_idx):
+    """
+    Replace non-padding symbols with their position numbers. Position numbers begin at padding_idx+1. Padding symbols
+    are ignored. This is modified from fairseq's `utils.make_positions`.
+
+    Args:
+        x: torch.Tensor x:
+
+    Returns: torch.Tensor
+    """
+    # The series of casts and type-conversions here are carefully balanced to both work with ONNX export and XLA.
+    mask = input_ids.ne(padding_idx).int()
+    incremental_indices = torch.cumsum(mask, dim=1).type_as(mask) * mask
+    return incremental_indices.long() + padding_idx
diff --git a/research/fed-bpt/src/utils.py b/research/fed-bpt/src/utils.py
new file mode 100644
index 0000000000..1537806c8e
--- /dev/null
+++ b/research/fed-bpt/src/utils.py
@@ -0,0 +1,71 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Part of this code is adopted from BBT (https://github.com/txsun1997/Black-Box-Tuning)
+
+# MIT License
+#
+# Copyright (c) 2022 Tianxiang Sun
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+import copy
+
+import torch
+
+REDUCE_FN_MAPPINGS = {"sum": torch.sum, "mean": torch.mean, "none": lambda x: x}
+
+
+def hinge_loss(logit, target, margin, reduction="sum"):
+    """
+    Args:
+        logit (torch.Tensor): (N, C, d_1, d_2, ..., d_K)
+        target (torch.Tensor): (N, d_1, d_2, ..., d_K)
+        margin (float):
+    """
+    target = target.unsqueeze(1)
+    tgt_logit = torch.gather(logit, dim=1, index=target)
+    loss = logit - tgt_logit + margin
+    loss = torch.masked_fill(loss, loss < 0, 0)
+    loss = torch.scatter(loss, dim=1, index=target, value=0)
+    reduce_fn = REDUCE_FN_MAPPINGS[reduction]
+    return reduce_fn(loss)
+
+
+def average_weights(w):
+    """
+    Returns the average of the weights.
+    """
+    w_avg = copy.deepcopy(w[0])
+    for key in w_avg.keys():
+        for i in range(1, len(w)):
+            w_avg[key] += w[i][key]
+        w_avg[key] = torch.div(w_avg[key], len(w))
+    return w_avg
diff --git a/research/one-shot-vfl/README.md b/research/one-shot-vfl/README.md
index 836a559ffc..ffc153d535 100644
--- a/research/one-shot-vfl/README.md
+++ b/research/one-shot-vfl/README.md
@@ -1,7 +1,7 @@
 # One-shot Vertical Federated Learning with CIFAR-10
 
 This example includes instructions on how to run [one-shot vertical federated learning](https://arxiv.org/abs/2303.16270) using the 
-CIFAR-10 dataset and the [FL simulator](https://nvflare.readthedocs.io/en/latest/user_guide/fl_simulator.html).
+CIFAR-10 dataset and the [FL simulator](https://nvflare.readthedocs.io/en/latest/user_guide/nvflare_cli/fl_simulator.html).
 
 We assume one client holds the images, and the other client holds the labels to compute losses and accuracy metrics. 
 Activations and corresponding gradients are being exchanged between the clients using NVFlare.
@@ -20,7 +20,10 @@ The code in this directory is released under Apache v2 License.
 ## 1. Setup
 This examples uses [JupyterLab](https://jupyter.org).
 
-We recommend creating a [virtual environment](../../examples/README.md#set-up-a-virtual-environment).
+We recommend creating a [virtual environment](../../examples/README.md#set-up-a-virtual-environment) and installing the requirements.
+```
+pip install -r requirements.txt
+```
 
 ## 2. Start JupyterLab
 To run the example, we recommend a GPU with at least 16 GB of memory.
@@ -40,3 +43,18 @@ An example local training curve with an overlap of 10,000 samples is shown below
 One-shot VFL only requires the client to conduct two uploads and one download, which reduces the communication cost significantly. This CIFAR10 example can achieve a test accuracy of 79.0%, which is nearly the same as the results of vanilla [single-client VFL (split learning)](https://github.com/jeremy313/NVFlare/tree/dev/examples/advanced/vertical_federated_learning/cifar10-splitnn).
 
 <img src="./figs/oneshotVFL_results.png" alt="One-shot VFL results" width="600"/>
+
+## Citation
+
+> Sun, Jingwei, et al. "Communication-efficient vertical federated learning with limited overlapping samples." Proceedings of the IEEE/CVF International Conference on Computer Vision. 2023.
+
+BibTeX
+```
+@inproceedings{sun2023communication,
+  title={Communication-efficient vertical federated learning with limited overlapping samples},
+  author={Sun, Jingwei and Xu, Ziyue and Yang, Dong and Nath, Vishwesh and Li, Wenqi and Zhao, Can and Xu, Daguang and Chen, Yiran and Roth, Holger R},
+  booktitle={Proceedings of the IEEE/CVF International Conference on Computer Vision},
+  pages={5203--5212},
+  year={2023}
+}
+```