cpu benchmarking scripts

mpc-msri · May 28, 2024 · 222db9a · 222db9a
1 parent 54c62ae
commit 222db9a
Show file tree

Hide file tree

Showing 10 changed files with 2,046 additions and 0 deletions.
diff --git a/GPU-MPC/ext/sytorch/CMakeLists.txt b/GPU-MPC/ext/sytorch/CMakeLists.txt
@@ -370,3 +370,52 @@ add_executable(
 )
 
 target_link_libraries(gptneobenchmark ${PROJECT_NAME})
+
+add_executable(
+    benchmark-bert-tiny
+    benchmarks/bert-tiny.cpp
+)
+
+target_link_libraries(benchmark-bert-tiny ${PROJECT_NAME})
+
+add_executable(
+    benchmark-bert-base
+    benchmarks/bert-base.cpp
+)
+
+target_link_libraries(benchmark-bert-base ${PROJECT_NAME})
+
+add_executable(
+    benchmark-bert-large
+    benchmarks/bert-large.cpp
+)
+
+target_link_libraries(benchmark-bert-large ${PROJECT_NAME})
+
+add_executable(
+    benchmark-gpt2
+    benchmarks/gpt2.cpp
+)
+
+target_link_libraries(benchmark-gpt2 ${PROJECT_NAME})
+
+add_executable(
+    benchmark-gptneo
+    benchmarks/gptneo.cpp
+)
+
+target_link_libraries(benchmark-gptneo ${PROJECT_NAME})
+
+add_executable(
+    benchmark-llama-7b
+    benchmarks/llama-7b.cpp
+)
+
+target_link_libraries(benchmark-llama-7b ${PROJECT_NAME})
+
+add_executable(
+    benchmark-llama-13b
+    benchmarks/llama-13b.cpp
+)
+
+target_link_libraries(benchmark-llama-13b ${PROJECT_NAME})
diff --git a/GPU-MPC/ext/sytorch/benchmarks/bert-base.cpp b/GPU-MPC/ext/sytorch/benchmarks/bert-base.cpp
@@ -0,0 +1,256 @@
+// Authors: Kanav Gupta, Neha Jawalkar
+// Copyright:
+//
+// Copyright (c) 2024 Microsoft Research
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+
+#include <sytorch/backend/llama_extended.h>
+#include <sytorch/backend/llama_transformer.h>
+#include <sytorch/backend/piranha_cleartext.h>
+#include <sytorch/backend/secureml_cleartext.h>
+#include <sytorch/backend/float.h>
+#include <sytorch/layers/layers.h>
+#include <sytorch/module.h>
+#include <llama/utils.h>
+#include <llama/api.h>
+
+template <typename T>
+class FFN : public SytorchModule<T>
+{
+public:
+    using SytorchModule<T>::gelu;
+
+    u64 in;
+    u64 hidden;
+
+public:
+    FC<T> *up;
+    FC<T> *down;
+
+    FFN(u64 in, u64 hidden) : in(in), hidden(hidden)
+    {
+        up = new FC<T>(in, hidden, true);
+        down = new FC<T>(hidden, in, true);
+    }
+
+    Tensor<T> &_forward(Tensor<T> &input)
+    {
+        return down->forward(gelu(up->forward(input)));
+    }
+};
+
+template <typename T>
+class MultiHeadAttention : public SytorchModule<T>
+{
+public:
+    using SytorchModule<T>::split;
+    using SytorchModule<T>::view;
+    using SytorchModule<T>::add;
+    using SytorchModule<T>::transpose;
+    using SytorchModule<T>::matmul;
+    using SytorchModule<T>::scalarmul;
+    using SytorchModule<T>::invsqrt;
+    using SytorchModule<T>::softmax;
+    using SytorchModule<T>::concat;
+    using SytorchModule<T>::attention_mask;
+
+public:
+    FC<T> *c_attn;
+    FC<T> *c_proj;
+
+    u64 n_heads;
+    u64 n_embd;
+
+    MultiHeadAttention(u64 n_heads, u64 n_embd) : n_heads(n_heads), n_embd(n_embd)
+    {
+        always_assert(n_embd % n_heads == 0);
+        c_attn = new FC<T>(n_embd, 3 * n_embd, true);
+        c_proj = new FC<T>(n_embd, n_embd, true);
+    }
+
+    Tensor<T> &_forward(Tensor<T> &input)
+    {
+        auto &x = c_attn->forward(input);
+        auto &qkv_heads = split(x, 3);
+        auto &q_heads = view(qkv_heads, 0);
+        auto &k_heads = view(qkv_heads, 1);
+        auto &v_heads = view(qkv_heads, 2);
+        auto &qs = split(q_heads, n_heads);
+        auto &ks = split(k_heads, n_heads);
+        auto &vs = split(v_heads, n_heads);
+
+        double divisor = 1 / sqrt(double(n_embd) / double(n_heads));
+
+        std::vector<Tensor<T> *> qks_sm_vs;
+        for (u64 i = 0; i < n_heads; ++i)
+        {
+            auto &q = view(qs, i);
+            auto &k = view(ks, i);
+            auto &v = view(vs, i);
+            auto &kt = transpose(k);
+            auto &qk = matmul(q, kt);
+            auto &qks = scalarmul(qk, divisor);
+
+            auto &qks_sm = softmax(qks);
+
+            auto &qks_sm_v = matmul(qks_sm, v);
+            qks_sm_vs.push_back(&qks_sm_v);
+        }
+
+        auto &qks_sm_vs_cat = concat(qks_sm_vs);
+        auto &res = c_proj->forward(qks_sm_vs_cat);
+        return res;
+    }
+};
+
+template <typename T>
+class TransformerBlock : public SytorchModule<T>
+{
+public:
+    using SytorchModule<T>::add;
+
+    MultiHeadAttention<T> *attn;
+    FFN<T> *ffn;
+    LayerNorm<T> *ln0;
+    LayerNorm<T> *ln1;
+
+    u64 n_heads, n_embd;
+
+public:
+    TransformerBlock(u64 n_heads, u64 n_embd) : n_heads(n_heads), n_embd(n_embd)
+    {
+        attn = new MultiHeadAttention<T>(n_heads, n_embd);
+        ffn = new FFN<T>(n_embd, 4 * n_embd);
+        ln0 = new LayerNorm<T>(n_embd);
+        ln1 = new LayerNorm<T>(n_embd);
+    }
+
+    Tensor<T> &_forward(Tensor<T> &input)
+    {
+        auto &attn_out = attn->forward(input);
+        auto &add0_out = add(attn_out, input);
+        auto &ln0_out = ln0->forward(add0_out);
+
+        auto &ffn_out = ffn->forward(ln0_out);
+        auto &add1_out = add(ffn_out, ln0_out);
+        auto &ln1_out = ln1->forward(add1_out);
+        return ln1_out;
+    }
+};
+
+template <typename T>
+class BERT : public SytorchModule<T>
+{
+public:
+    using SytorchModule<T>::tanh;
+    using SytorchModule<T>::view;
+    using SytorchModule<T>::add;
+    using SytorchModule<T>::unsqueeze;
+    std::vector<TransformerBlock<T> *> blocks;
+    LayerNorm<T> *ln_f;
+    FC<T> *pool;
+    u64 n_layer, n_heads, n_embd;
+
+public:
+    BERT(u64 n_layer, u64 n_heads, u64 n_embd) : n_layer(n_layer), n_heads(n_heads), n_embd(n_embd)
+    {
+        for (u64 i = 0; i < n_layer; ++i)
+        {
+            blocks.push_back(new TransformerBlock<T>(n_heads, n_embd));
+        }
+        ln_f = new LayerNorm<T>(n_embd);
+        pool = new FC<T>(n_embd, n_embd, true);
+    }
+
+    Tensor<T> &_forward(Tensor<T> &input)
+    {
+        auto &y = ln_f->forward(input);
+        Tensor<T> *x = &y;
+        // Tensor<T> *x = &input;
+
+        for (u64 i = 0; i < n_layer; ++i)
+        {
+            auto &block = blocks[i];
+            auto &x_out = block->forward(*x);
+            x = &x_out;
+        }
+
+        return *x;
+    }
+};
+
+int main(int __argc, char **__argv)
+{
+    sytorch_init();
+
+    // bert base
+    const u64 n_embd = 768;
+    const u64 n_head = 12;
+    const u64 n_layer = 12;
+    const u64 scale = 12;
+    const u64 bw = 51;
+    const u64 n_seq = 128;
+
+    int party = atoi(__argv[1]);
+    std::string ip = "127.0.0.1";
+    if (__argc > 2)
+        ip = __argv[2];
+
+    using LlamaVersion = LlamaTransformer<u64>;
+    LlamaVersion *llama = new LlamaVersion();
+    srand(time(NULL));
+
+    LlamaConfig::bitlength = bw;
+    LlamaConfig::party = party;
+
+    llama->init(ip, true);
+
+    BERT<u64> net(n_layer, n_head, n_embd);
+    Tensor<u64> input({n_seq, n_embd});
+    net.init(scale, input);
+    net.setBackend(llama);
+    net.optimize();
+    if (party != DEALER)
+    {
+        // net.load("bert-tiny-weights.dat");
+        // input.load("15469.dat", scale);
+        input.fill(1LL << (scale - 2));
+    }
+    else
+    {
+        net.zero();
+    }
+
+    llama->initializeInferencePartyA(net.root);
+    llama->initializeInferencePartyB(input);
+
+    llama::start();
+    net.forward(input);
+    llama::end();
+
+    auto &output = net.activation;
+    llama->outputA(output);
+    llama->finalize();
+
+    if (party == CLIENT)
+    {
+        auto signedAct = Tensor<i64>((i64*) net.activation.data, net.activation.shape);
+        print(signedAct, scale, bw);
+    }
+    return 0;
+}