From ccf165a4bd247dbb27212e31f4134a1212613b51 Mon Sep 17 00:00:00 2001 From: YdrMaster Date: Fri, 17 Jan 2025 16:02:13 +0800 Subject: [PATCH] =?UTF-8?q?refactor:=20nv=20=E6=94=B9=E4=B8=BA=20cuda?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: YdrMaster --- Cargo.toml | 2 +- docs/user-guide/doc.md | 4 ++-- models/llama/{nvidia-gpu => cuda}/Cargo.toml | 2 +- models/llama/{nvidia-gpu => cuda}/build.rs | 0 models/llama/{nvidia-gpu => cuda}/src/infer.rs | 0 models/llama/{nvidia-gpu => cuda}/src/lib.rs | 1 + models/llama/{nvidia-gpu => cuda}/src/nccl_parallel.rs | 0 models/llama/infini/src/infer.rs | 4 ++++ 8 files changed, 9 insertions(+), 4 deletions(-) rename models/llama/{nvidia-gpu => cuda}/Cargo.toml (96%) rename models/llama/{nvidia-gpu => cuda}/build.rs (100%) rename models/llama/{nvidia-gpu => cuda}/src/infer.rs (100%) rename models/llama/{nvidia-gpu => cuda}/src/lib.rs (99%) rename models/llama/{nvidia-gpu => cuda}/src/nccl_parallel.rs (100%) diff --git a/Cargo.toml b/Cargo.toml index 4b66133..ac5b288 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,7 +9,7 @@ members = [ "models/llama/common-cpu", "models/llama/opencl", "models/llama/infini", - "models/llama/nvidia-gpu", + "models/llama/cuda", "models/clip/common", "models/clip/common-cpu", diff --git a/docs/user-guide/doc.md b/docs/user-guide/doc.md index 450f715..c65fac7 100644 --- a/docs/user-guide/doc.md +++ b/docs/user-guide/doc.md @@ -166,6 +166,6 @@ cargo test --release --package `model` --lib -- `test` --exact --nocapture | `llama-cpu` | `infer::test_infer` | 默认值“1”。任意间隔的正整数数组,表示每个线程分布模型的份数,数组的项数必须是 2 的幂 | 纯 cpu 后端,不需要任何额外依赖 | `llama-infini` | `infer::test_infer` | 默认值“cpu;0”。格式“硬件类型; 卡号”,硬件类型目前支持 `cpu`、`nv`、`ascend` | 九源统一软件栈后端 | `llama-cl` | `infer::test_infer` | TODO | OpenCL 后端 -| `llama-nv` | `infer::test_infer` | 默认值“0”。单个非负整数,推理使用的卡号 | 原生 CUDA Toolkit 后端 -| `llama-nv` | `nccl_parallel::test_infer` | 默认值“0”。任意间隔的非负整数集合,参与分布式推理的卡号 | 原生 CUDA Toolkit 后端,同时依赖 NCCL 实现分布式 +| `llama-cuda` | `infer::test_infer` | 默认值“0”。单个非负整数,推理使用的卡号 | 原生 CUDA Toolkit 后端 +| `llama-cuda` | `nccl_parallel::test_infer` | 默认值“0”。任意间隔的非负整数集合,参与分布式推理的卡号 | 原生 CUDA Toolkit 后端,同时依赖 NCCL 实现分布式 | `gpt2-cpu` | `infer::test_infer` | TODO | 纯 cpu 后端,不需要任何额外依赖 diff --git a/models/llama/nvidia-gpu/Cargo.toml b/models/llama/cuda/Cargo.toml similarity index 96% rename from models/llama/nvidia-gpu/Cargo.toml rename to models/llama/cuda/Cargo.toml index 0c65f97..ef3356c 100644 --- a/models/llama/nvidia-gpu/Cargo.toml +++ b/models/llama/cuda/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "llama-nv" +name = "llama-cuda" version = "0.0.0" edition = "2021" authors = ["YdrMaster "] diff --git a/models/llama/nvidia-gpu/build.rs b/models/llama/cuda/build.rs similarity index 100% rename from models/llama/nvidia-gpu/build.rs rename to models/llama/cuda/build.rs diff --git a/models/llama/nvidia-gpu/src/infer.rs b/models/llama/cuda/src/infer.rs similarity index 100% rename from models/llama/nvidia-gpu/src/infer.rs rename to models/llama/cuda/src/infer.rs diff --git a/models/llama/nvidia-gpu/src/lib.rs b/models/llama/cuda/src/lib.rs similarity index 99% rename from models/llama/nvidia-gpu/src/lib.rs rename to models/llama/cuda/src/lib.rs index 11222dd..286fe8f 100644 --- a/models/llama/nvidia-gpu/src/lib.rs +++ b/models/llama/cuda/src/lib.rs @@ -197,6 +197,7 @@ impl<'blk> Weights<'blk> { push! { attn_norm attn_qkv + attn_qkv_bias attn_o ffn_norm ffn_gate_up diff --git a/models/llama/nvidia-gpu/src/nccl_parallel.rs b/models/llama/cuda/src/nccl_parallel.rs similarity index 100% rename from models/llama/nvidia-gpu/src/nccl_parallel.rs rename to models/llama/cuda/src/nccl_parallel.rs diff --git a/models/llama/infini/src/infer.rs b/models/llama/infini/src/infer.rs index fc5e6c1..ff3882f 100644 --- a/models/llama/infini/src/infer.rs +++ b/models/llama/infini/src/infer.rs @@ -72,6 +72,10 @@ fn test_infer() { infini_rt::init(infini_rt::DEVICE_NVIDIA); WorkerSeed::new(InfiniNode::nv_gpu(&indices)) } + "cambricon" => { + infini_rt::init(infini_rt::DEVICE_CAMBRICON); + WorkerSeed::new(InfiniNode::cambricon_mlu(&indices)) + } "ascend" => { infini_rt::init(infini_rt::DEVICE_ASCEND); WorkerSeed::new(InfiniNode::ascend_npu(&indices))