Skip to content

Commit

Permalink
build: update operators-rs
Browse files Browse the repository at this point in the history
Signed-off-by: YdrMaster <[email protected]>
  • Loading branch information
YdrMaster committed Jul 15, 2024
1 parent 7847e93 commit 4251763
Show file tree
Hide file tree
Showing 13 changed files with 108 additions and 103 deletions.
5 changes: 5 additions & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,11 @@ jobs:
- name: Checkout code
uses: actions/checkout@v4

- name: cuda-toolkit
uses: Jimver/[email protected]
with:
method: 'network'

- name: Check format
run: cargo fmt --check

Expand Down
38 changes: 19 additions & 19 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 2 additions & 3 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ tokio = { version = "1.38", features = ["rt-multi-thread", "sync"] }
digit-layout = "0.0"
build-script-cfg = "0.0"

operators = { git = "https://github.com/YdrMaster/operators-rs", rev = "04e71d5", default-features = false }
nccl = { git = "https://github.com/YdrMaster/cuda-driver", rev = "343b0e0" }
search-cuda-tools = { git = "https://github.com/YdrMaster/cuda-driver", rev = "343b0e0" }
operators = { git = "https://github.com/YdrMaster/operators-rs", rev = "9e621e6", default-features = false }
search-cuda-tools = { git = "https://github.com/YdrMaster/cuda-driver", rev = "fb088b6" }
search-neuware-tools = "0.0"
6 changes: 5 additions & 1 deletion devices/nvidia-gpu/build.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
fn main() {
use build_script_cfg::Cfg;
use search_cuda_tools::find_cuda_root;
use search_cuda_tools::{find_nccl_root, find_cuda_root};

let cuda = Cfg::new("detected_cuda");
let nccl = Cfg::new("detected_nccl");
if find_cuda_root().is_some() {
cuda.define();
if find_nccl_root().is_some() {
nccl.define();
}
println!("cargo:rerun-if-changed=src/sample.cu");
cc::Build::new()
.cuda(true)
Expand Down
7 changes: 6 additions & 1 deletion devices/nvidia-gpu/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ pub use operators::{cuda, nvidia_gpu::Handle as Gpu};
pub use sample::{sample_cpu, sample_nv};
pub use tensor::{reslice, reslice_mut, slice, split, udim, LocalSplitable, Tensor};

#[cfg(detected_nccl)]
pub use operators::nccl;

pub struct NvidiaKernels(HashMap<i32, Internal>);

struct Internal {
Expand Down Expand Up @@ -186,7 +189,9 @@ impl KernelsB for NvidiaKernels {
}

pub fn synchronize() {
cuda::init();
if let Err(cuda::NoDevice) = cuda::init() {
return;
}
for i in 0..cuda::Device::count() {
cuda::Device::new(i as _)
.retain_primary()
Expand Down
1 change: 0 additions & 1 deletion models/llama/nvidia-gpu-distributed/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ common-nv = { path = "../../../devices/nvidia-gpu" }
causal-lm = { path = "../../../causal-lm" }
llama = { path = "../common" }
digit-layout.workspace = true
nccl.workspace = true
log.workspace = true
itertools.workspace = true

Expand Down
10 changes: 6 additions & 4 deletions models/llama/nvidia-gpu-distributed/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,12 @@ use common_nv::{
AsRaw, Context, ContextResource, ContextSpore, DevByte, DevMem, DevMemSpore, Device,
HostMemSpore, Stream, StreamSpore,
},
nccl::{CommunicatorGroup, ReduceType},
sample_nv, slice, split, udim, KernelsA, KernelsB, LocalSplitable, NvidiaKernels, Tensor,
};
use digit_layout::types::F16;
use itertools::izip;
use llama::InferenceConfig;
use nccl::CommunicatorGroup;
use parameters::{Layer, ParameterMatrix};
use std::{
iter::{repeat, zip},
Expand Down Expand Up @@ -312,7 +312,7 @@ impl CausalLM for Transformer {
x.physical_mut(),
None,
self.config.dt,
nccl::ReduceType::ncclSum,
ReduceType::ncclSum,
stream,
);

Expand All @@ -321,7 +321,7 @@ impl CausalLM for Transformer {
x.physical_mut(),
None,
self.config.dt,
nccl::ReduceType::ncclSum,
ReduceType::ncclSum,
stream,
);
}
Expand Down Expand Up @@ -623,7 +623,9 @@ fn malloc_all(contexts: &[Context], len: usize) -> Vec<DevMemSpore> {

#[test]
fn test_infer() {
cuda::init();
if let Err(cuda::NoDevice) = cuda::init() {
return;
}
if cuda::Device::count() >= 2 {
causal_lm::test_impl::<Transformer>(
[0, 1].map(cuda::Device::new).into_iter().collect(),
Expand Down
4 changes: 3 additions & 1 deletion models/llama/nvidia-gpu-distributed/src/parameters.rs
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,9 @@ fn test_load() {

const N: usize = 1;

cuda::init();
if let Err(cuda::NoDevice) = cuda::init() {
return;
}
if Device::count() < N {
return;
}
Expand Down
27 changes: 14 additions & 13 deletions models/llama/nvidia-gpu/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -550,17 +550,18 @@ impl Drop for LayerLoader<'_> {

#[test]
fn test_infer() {
cuda::init();
if let Some(device) = cuda::Device::fetch() {
causal_lm::test_impl::<Transformer>(
ModelLoadMeta {
device,
load_layers: 20,
},
&[
29966, 29989, 1792, 29989, 29958, 13, 29903, 388, 376, 18567, 29908, 304, 592,
21106, 29879, 5299, 29989, 465, 22137, 29989, 29958, 13,
],
);
};
if let Err(cuda::NoDevice) = cuda::init() {
return;
}
let device = cuda::Device::new(0);
causal_lm::test_impl::<Transformer>(
ModelLoadMeta {
device,
load_layers: 20,
},
&[
29966, 29989, 1792, 29989, 29958, 13, 29903, 388, 376, 18567, 29908, 304, 592, 21106,
29879, 5299, 29989, 465, 22137, 29989, 29958, 13,
],
);
}
6 changes: 3 additions & 3 deletions web-api/src/manager.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,12 +46,12 @@ where
m.content = general_purpose::STANDARD
.decode(content)
.map(String::from_utf8)
.map_err(|_| Error::ContentError(format!("Decode failed: {content}")))?
.map_err(|_| Error::ContentError(format!("Decode failed: {content}")))?;
.map_err(|_| Error::InvalidContent(format!("Decode failed: {content}")))?
.map_err(|_| Error::InvalidContent(format!("Decode failed: {content}")))?;
}
}
Some("text") => {}
Some(e) => return Err(Error::ContentError(format!("Unknown encoding: {e}"))),
Some(e) => return Err(Error::InvalidContent(format!("Unknown encoding: {e}"))),
};

async fn infer<M: CausalLM>(
Expand Down
6 changes: 3 additions & 3 deletions web-api/src/schemas.rs
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ impl Success for DropSuccess {
pub(crate) enum Error {
Session(SessionError),
WrongJson(serde_json::Error),
ContentError(String),
InvalidContent(String),
InvalidDialogPos(usize),
}

Expand All @@ -103,7 +103,7 @@ impl Error {
Self::Session(Busy) => StatusCode::NOT_ACCEPTABLE,
Self::Session(Duplicate) => StatusCode::CONFLICT,
Self::WrongJson(_) => StatusCode::BAD_REQUEST,
Self::ContentError(_) => StatusCode::BAD_REQUEST,
Self::InvalidContent(_) => StatusCode::BAD_REQUEST,
Self::InvalidDialogPos(_) => StatusCode::RANGE_NOT_SATISFIABLE,
}
}
Expand Down Expand Up @@ -131,7 +131,7 @@ impl Error {
Self::Session(Busy) => json(error!(0, "Session is busy")),
Self::Session(Duplicate) => json(error!(0, "Session ID already exists")),
Self::WrongJson(e) => json(error!(0, e.to_string())),
Self::ContentError(e) => json(error!(1, e)),
Self::InvalidContent(e) => json(error!(1, e)),
&Self::InvalidDialogPos(current_dialog_pos) => {
#[derive(serde::Serialize)]
struct ErrorBodyExtra {
Expand Down
4 changes: 3 additions & 1 deletion xtask/src/list_turbo.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@
fn list_nv() {
use llama_nv::cuda::{self, Device as Gpu};

cuda::init();
if let Err(cuda::NoDevice) = cuda::init() {
return;
}
println!("NVidia CUDA environment detected, use `--turbo nv:` to select.");
for i in 0..Gpu::count() {
let gpu = Gpu::new(i as _);
Expand Down
Loading

0 comments on commit 4251763

Please sign in to comment.