From 04d450b253f8fb35b8356df4eab48d804f5863ab Mon Sep 17 00:00:00 2001 From: YdrMaster Date: Tue, 20 Aug 2024 04:58:22 +0800 Subject: [PATCH] style: cleanup Signed-off-by: YdrMaster --- models/llama/common-cpu/src/lib.rs | 15 +++--- models/llama/common/src/lib.rs | 1 - .../llama/nvidia-gpu-distributed/src/lib.rs | 2 +- models/llama/nvidia-gpu/src/lib.rs | 2 +- tensor/src/compatibility.rs | 46 ------------------- 5 files changed, 10 insertions(+), 56 deletions(-) delete mode 100644 tensor/src/compatibility.rs diff --git a/models/llama/common-cpu/src/lib.rs b/models/llama/common-cpu/src/lib.rs index d8fd5a5b..c914c638 100644 --- a/models/llama/common-cpu/src/lib.rs +++ b/models/llama/common-cpu/src/lib.rs @@ -4,7 +4,10 @@ use common_cpu::{ tensor::{reslice, slice, udim, Tensor}, CpuKernels, Kernels, KernelsA, KernelsB, ThisThread, }; -use llama::{ComputeConst, ComputeStream, Handle, LayerStorage, QueueOf, SliceOn, Storage, Weight}; +use llama::{ + ComputeConst, ComputeStream, Handle, InferenceConfig, LayerStorage, QueueOf, SliceOn, Storage, + Weight, +}; use std::{iter::repeat, ops::Deref, path::Path, slice::from_raw_parts}; pub struct Transformer { @@ -133,12 +136,10 @@ impl CausalLM for Transformer { } #[inline] fn duplicate_cache(&self, cache: &Tensor, pos: upos) -> Tensor { - self.s - .config - .duplicate_cache(cache, pos, Blob::new, |dst, src| { - src.map_physical(|u| &**u) - .reform_to(&mut dst.map_physical(|u| &mut **u)) - }) + InferenceConfig::duplicate_cache(cache, pos, Blob::new, |dst, src| { + src.map_physical(|u| &**u) + .reform_to(&mut dst.map_physical(|u| &mut **u)) + }) } fn token_embed(&self, queries: impl IntoIterator) -> Tensor { diff --git a/models/llama/common/src/lib.rs b/models/llama/common/src/lib.rs index 1aad54c9..cec9bdd3 100644 --- a/models/llama/common/src/lib.rs +++ b/models/llama/common/src/lib.rs @@ -84,7 +84,6 @@ impl InferenceConfig { } pub fn duplicate_cache( - &self, cache: &Tensor, pos: upos, malloc: impl FnOnce(usize) -> S, diff --git a/models/llama/nvidia-gpu-distributed/src/lib.rs b/models/llama/nvidia-gpu-distributed/src/lib.rs index 658d52a6..90b6cf24 100644 --- a/models/llama/nvidia-gpu-distributed/src/lib.rs +++ b/models/llama/nvidia-gpu-distributed/src/lib.rs @@ -143,7 +143,7 @@ impl CausalLM for Transformer { fn duplicate_cache(&self, cache: &Tensor, pos: upos) -> Tensor { let contexts = Arc::new(self.comms.contexts().collect::>()); - self.config.duplicate_cache( + InferenceConfig::duplicate_cache( cache, pos, |len| Cache { diff --git a/models/llama/nvidia-gpu/src/lib.rs b/models/llama/nvidia-gpu/src/lib.rs index 5ccc4f4e..31ab75f5 100644 --- a/models/llama/nvidia-gpu/src/lib.rs +++ b/models/llama/nvidia-gpu/src/lib.rs @@ -175,7 +175,7 @@ impl CausalLM for Transformer { } fn duplicate_cache(&self, cache: &Tensor, pos: upos) -> Tensor { - self.0.config.duplicate_cache( + InferenceConfig::duplicate_cache( cache, pos, |len| self.cache(len), diff --git a/tensor/src/compatibility.rs b/tensor/src/compatibility.rs deleted file mode 100644 index b9344035..00000000 --- a/tensor/src/compatibility.rs +++ /dev/null @@ -1,46 +0,0 @@ -use crate::Tensor; -use std::iter::zip; - -#[derive(Clone, Copy, PartialEq, Eq, Debug)] -#[repr(u8)] -pub enum Compatibility { - Same, - Squeeze, - Reform, - None, -} - -impl Compatibility { - pub fn between(a: &Tensor, b: &Tensor) -> Self { - if a.layout != b.layout { - return Self::None; - } - - let mut actual_a = zip(&a.shape, a.pattern.0.as_slice()).filter(|(&d, _)| d > 1); - let mut actual_b = zip(&b.shape, b.pattern.0.as_slice()).filter(|(&d, _)| d > 1); - let mut squeeze = true; - loop { - match (actual_a.next(), actual_b.next()) { - (Some((da, sa)), Some((db, sb))) => { - if da != db { - return Self::None; - } - if sa != sb { - squeeze = false; - } - } - (Some(_), None) | (None, Some(_)) => return Self::None, - (None, None) => break, - } - } - if squeeze { - if a.shape == b.shape { - Self::Same - } else { - Self::Squeeze - } - } else { - Self::Reform - } - } -}