Skip to content

Commit

Permalink
cpu benchmarking scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
kanav99 committed May 28, 2024
1 parent 54c62ae commit 222db9a
Show file tree
Hide file tree
Showing 10 changed files with 2,046 additions and 0 deletions.
49 changes: 49 additions & 0 deletions GPU-MPC/ext/sytorch/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -370,3 +370,52 @@ add_executable(
)

target_link_libraries(gptneobenchmark ${PROJECT_NAME})

add_executable(
benchmark-bert-tiny
benchmarks/bert-tiny.cpp
)

target_link_libraries(benchmark-bert-tiny ${PROJECT_NAME})

add_executable(
benchmark-bert-base
benchmarks/bert-base.cpp
)

target_link_libraries(benchmark-bert-base ${PROJECT_NAME})

add_executable(
benchmark-bert-large
benchmarks/bert-large.cpp
)

target_link_libraries(benchmark-bert-large ${PROJECT_NAME})

add_executable(
benchmark-gpt2
benchmarks/gpt2.cpp
)

target_link_libraries(benchmark-gpt2 ${PROJECT_NAME})

add_executable(
benchmark-gptneo
benchmarks/gptneo.cpp
)

target_link_libraries(benchmark-gptneo ${PROJECT_NAME})

add_executable(
benchmark-llama-7b
benchmarks/llama-7b.cpp
)

target_link_libraries(benchmark-llama-7b ${PROJECT_NAME})

add_executable(
benchmark-llama-13b
benchmarks/llama-13b.cpp
)

target_link_libraries(benchmark-llama-13b ${PROJECT_NAME})
256 changes: 256 additions & 0 deletions GPU-MPC/ext/sytorch/benchmarks/bert-base.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,256 @@
// Authors: Kanav Gupta, Neha Jawalkar
// Copyright:
//
// Copyright (c) 2024 Microsoft Research
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.

#include <sytorch/backend/llama_extended.h>
#include <sytorch/backend/llama_transformer.h>
#include <sytorch/backend/piranha_cleartext.h>
#include <sytorch/backend/secureml_cleartext.h>
#include <sytorch/backend/float.h>
#include <sytorch/layers/layers.h>
#include <sytorch/module.h>
#include <llama/utils.h>
#include <llama/api.h>

template <typename T>
class FFN : public SytorchModule<T>
{
public:
using SytorchModule<T>::gelu;

u64 in;
u64 hidden;

public:
FC<T> *up;
FC<T> *down;

FFN(u64 in, u64 hidden) : in(in), hidden(hidden)
{
up = new FC<T>(in, hidden, true);
down = new FC<T>(hidden, in, true);
}

Tensor<T> &_forward(Tensor<T> &input)
{
return down->forward(gelu(up->forward(input)));
}
};

template <typename T>
class MultiHeadAttention : public SytorchModule<T>
{
public:
using SytorchModule<T>::split;
using SytorchModule<T>::view;
using SytorchModule<T>::add;
using SytorchModule<T>::transpose;
using SytorchModule<T>::matmul;
using SytorchModule<T>::scalarmul;
using SytorchModule<T>::invsqrt;
using SytorchModule<T>::softmax;
using SytorchModule<T>::concat;
using SytorchModule<T>::attention_mask;

public:
FC<T> *c_attn;
FC<T> *c_proj;

u64 n_heads;
u64 n_embd;

MultiHeadAttention(u64 n_heads, u64 n_embd) : n_heads(n_heads), n_embd(n_embd)
{
always_assert(n_embd % n_heads == 0);
c_attn = new FC<T>(n_embd, 3 * n_embd, true);
c_proj = new FC<T>(n_embd, n_embd, true);
}

Tensor<T> &_forward(Tensor<T> &input)
{
auto &x = c_attn->forward(input);
auto &qkv_heads = split(x, 3);
auto &q_heads = view(qkv_heads, 0);
auto &k_heads = view(qkv_heads, 1);
auto &v_heads = view(qkv_heads, 2);
auto &qs = split(q_heads, n_heads);
auto &ks = split(k_heads, n_heads);
auto &vs = split(v_heads, n_heads);

double divisor = 1 / sqrt(double(n_embd) / double(n_heads));

std::vector<Tensor<T> *> qks_sm_vs;
for (u64 i = 0; i < n_heads; ++i)
{
auto &q = view(qs, i);
auto &k = view(ks, i);
auto &v = view(vs, i);
auto &kt = transpose(k);
auto &qk = matmul(q, kt);
auto &qks = scalarmul(qk, divisor);

auto &qks_sm = softmax(qks);

auto &qks_sm_v = matmul(qks_sm, v);
qks_sm_vs.push_back(&qks_sm_v);
}

auto &qks_sm_vs_cat = concat(qks_sm_vs);
auto &res = c_proj->forward(qks_sm_vs_cat);
return res;
}
};

template <typename T>
class TransformerBlock : public SytorchModule<T>
{
public:
using SytorchModule<T>::add;

MultiHeadAttention<T> *attn;
FFN<T> *ffn;
LayerNorm<T> *ln0;
LayerNorm<T> *ln1;

u64 n_heads, n_embd;

public:
TransformerBlock(u64 n_heads, u64 n_embd) : n_heads(n_heads), n_embd(n_embd)
{
attn = new MultiHeadAttention<T>(n_heads, n_embd);
ffn = new FFN<T>(n_embd, 4 * n_embd);
ln0 = new LayerNorm<T>(n_embd);
ln1 = new LayerNorm<T>(n_embd);
}

Tensor<T> &_forward(Tensor<T> &input)
{
auto &attn_out = attn->forward(input);
auto &add0_out = add(attn_out, input);
auto &ln0_out = ln0->forward(add0_out);

auto &ffn_out = ffn->forward(ln0_out);
auto &add1_out = add(ffn_out, ln0_out);
auto &ln1_out = ln1->forward(add1_out);
return ln1_out;
}
};

template <typename T>
class BERT : public SytorchModule<T>
{
public:
using SytorchModule<T>::tanh;
using SytorchModule<T>::view;
using SytorchModule<T>::add;
using SytorchModule<T>::unsqueeze;
std::vector<TransformerBlock<T> *> blocks;
LayerNorm<T> *ln_f;
FC<T> *pool;
u64 n_layer, n_heads, n_embd;

public:
BERT(u64 n_layer, u64 n_heads, u64 n_embd) : n_layer(n_layer), n_heads(n_heads), n_embd(n_embd)
{
for (u64 i = 0; i < n_layer; ++i)
{
blocks.push_back(new TransformerBlock<T>(n_heads, n_embd));
}
ln_f = new LayerNorm<T>(n_embd);
pool = new FC<T>(n_embd, n_embd, true);
}

Tensor<T> &_forward(Tensor<T> &input)
{
auto &y = ln_f->forward(input);
Tensor<T> *x = &y;
// Tensor<T> *x = &input;

for (u64 i = 0; i < n_layer; ++i)
{
auto &block = blocks[i];
auto &x_out = block->forward(*x);
x = &x_out;
}

return *x;
}
};

int main(int __argc, char **__argv)
{
sytorch_init();

// bert base
const u64 n_embd = 768;
const u64 n_head = 12;
const u64 n_layer = 12;
const u64 scale = 12;
const u64 bw = 51;
const u64 n_seq = 128;

int party = atoi(__argv[1]);
std::string ip = "127.0.0.1";
if (__argc > 2)
ip = __argv[2];

using LlamaVersion = LlamaTransformer<u64>;
LlamaVersion *llama = new LlamaVersion();
srand(time(NULL));

LlamaConfig::bitlength = bw;
LlamaConfig::party = party;

llama->init(ip, true);

BERT<u64> net(n_layer, n_head, n_embd);
Tensor<u64> input({n_seq, n_embd});
net.init(scale, input);
net.setBackend(llama);
net.optimize();
if (party != DEALER)
{
// net.load("bert-tiny-weights.dat");
// input.load("15469.dat", scale);
input.fill(1LL << (scale - 2));
}
else
{
net.zero();
}

llama->initializeInferencePartyA(net.root);
llama->initializeInferencePartyB(input);

llama::start();
net.forward(input);
llama::end();

auto &output = net.activation;
llama->outputA(output);
llama->finalize();

if (party == CLIENT)
{
auto signedAct = Tensor<i64>((i64*) net.activation.data, net.activation.shape);
print(signedAct, scale, bw);
}
return 0;
}
Loading

0 comments on commit 222db9a

Please sign in to comment.