diff --git a/GPU-MPC/experiments/sigma/run_experiment.py b/GPU-MPC/experiments/sigma/run_experiment.py index 3821a822..bcc93e90 100644 --- a/GPU-MPC/experiments/sigma/run_experiment.py +++ b/GPU-MPC/experiments/sigma/run_experiment.py @@ -117,7 +117,8 @@ def run_perf(party, dealer_gpu, eval_gpu, dealer_key_dir, peer_ip, cpu_threads): with open('output/P{}/Fig11_data.csv'.format(party),'w') as out_file: online_time = list(map(lambda model: stats['evaluator'][model]['total']['time'], ['gpt-neo', 'gpt-neo-large', 'llama7b', 'llama13b'])) X = ('1.3', '2.7', '7', '13') - plt.plot(X, online_time, marker='s') + plt.plot(X, online_time, marker='s', label='SIGMA-GPU') + plt.legend(loc='upper left') plt.xlabel('Number of parameters (in billions)') plt.ylabel('Time (s)') plt.savefig("output/P{}/Fig11.png".format(party), dpi=300, bbox_inches='tight') diff --git a/GPU-MPC/ext/sytorch/examples/llama7b.cpp b/GPU-MPC/ext/sytorch/examples/llama7b.cpp index b8e0e661..1c3efba3 100644 --- a/GPU-MPC/ext/sytorch/examples/llama7b.cpp +++ b/GPU-MPC/ext/sytorch/examples/llama7b.cpp @@ -251,11 +251,11 @@ void ct_main(std::string inpName) ct->bw = 48; llama_model.setBackend(ct); - // llama_model.load("/home/t-nejawalkar/ananta/meta_llama2_7b.dat"); - llama_model.load("/home/t-nejawalkar/ananta/meta_llama2_13b.dat"); + // llama_model.load("meta_llama2_7b.dat"); + llama_model.load("meta_llama2_13b.dat"); - // std::string fname = std::string("/home/t-nejawalkar/ananta/lambada-meta-llama2-7b/") + /*std::to_string(i)*/ +"999.dat"; - std::string fname = std::string("/home/t-nejawalkar/ananta/lambada-meta-llama2-13b/") + /*std::to_string(i)*/ inpName; + // std::string fname = std::string("lambada-meta-llama2-7b/") + /*std::to_string(i)*/ +"999.dat"; + std::string fname = std::string("lambada-meta-llama2-13b/") + /*std::to_string(i)*/ inpName; input.load(fname, scale); auto &res = llama_model.forward(input); auto signedAct = Tensor((i64 *)res.data, res.shape); @@ -282,7 +282,7 @@ void lt_main(std::string inpName, int party) const u64 n_ctx = 4096; const u64 n_embd = 5120; const u64 n_head = 40; // 40; - const u64 n_layer = 1; // 40; + const u64 n_layer = 40; // 40; const u64 intermediate_size = 13824; const u64 scale = 12; @@ -303,13 +303,13 @@ void lt_main(std::string inpName, int party) if (party != DEALER) { - // llama_model.load("/home/t-nejawalkar/ananta/meta_llama2_7b.dat"); - llama_model.load("/home/t-nejawalkar/ananta/meta_llama2_13b.dat"); - std::string fname = std::string("/home/t-nejawalkar/ananta/lambada-meta-llama2-13b/") + /*std::to_string(i)*/ inpName; + // llama_model.load("meta_llama2_7b.dat"); + llama_model.load("meta_llama2_13b.dat"); + std::string fname = std::string("lambada-meta-llama2-13b/") + /*std::to_string(i)*/ inpName; input.load(fname, scale); } - // std::string fname = std::string("/home/t-nejawalkar/ananta/lambada-meta-llama2-7b/") + /*std::to_string(i)*/ +"999.dat"; + // std::string fname = std::string("lambada-meta-llama2-7b/") + /*std::to_string(i)*/ +"999.dat"; llama->initializeInferencePartyA(llama_model.root); llama->initializeInferencePartyB(input);