feat(tensor): 如果张量数据在主存中，允许直接访问

Signed-off-by: YdrMaster <[email protected]>
InfiniTensor · Feb 20, 2024 · ff9360b · ff9360b
1 parent 1f69ae2
commit ff9360b
Show file tree

Hide file tree

Showing 7 changed files with 50 additions and 47 deletions.
diff --git a/model-parameters/src/lib.rs b/model-parameters/src/lib.rs
@@ -75,17 +75,22 @@ struct ConfigJson {
     pub torch_dtype: DataType,
 }
 
-type Blob = dyn 'static + AsRef<[u8]>;
-
 #[derive(Clone)]
 pub struct Storage {
-    data: Arc<Blob>,
+    data: Arc<dyn AsRef<[u8]>>,
     range: Range<usize>,
 }
 
+impl AsRef<[u8]> for Storage {
+    #[inline]
+    fn as_ref(&self) -> &[u8] {
+        &self.data.as_ref().as_ref()[self.range.clone()]
+    }
+}
+
 impl Storage {
     #[inline]
-    pub fn new(data: Arc<Blob>, offset: usize, len: usize) -> Self {
+    pub fn new(data: Arc<dyn AsRef<[u8]>>, offset: usize, len: usize) -> Self {
         Self {
             data,
             range: offset..offset + len,
@@ -100,9 +105,4 @@ impl Storage {
             range: 0..len,
         }
     }
-
-    #[inline]
-    pub fn as_slice(&self) -> &[u8] {
-        &self.data.as_ref().as_ref()[self.range.clone()]
-    }
 }
diff --git a/model-parameters/src/memory/cast.rs b/model-parameters/src/memory/cast.rs
@@ -43,7 +43,7 @@ fn cast(src: Tensor<Storage>, new_dtype: DataType) -> Tensor<Storage> {
         return src;
     }
 
-    let src_data = src.physical().as_slice();
+    let src_data = src.as_slice();
     let mut data = vec![0u8; src.size() * new_dtype.size()];
 
     macro_rules! cast {

diff --git a/model-parameters/src/memory/mod.rs b/model-parameters/src/memory/mod.rs
@@ -108,11 +108,7 @@ impl Llama2 for Memory {
         let dt = self.config.torch_dtype.size();
         let mut physical = self.layers[layer].w_qkv.physical().clone();
         physical.range.end = physical.range.start + d * d * dt;
-        Tensor::new(
-            self.config.torch_dtype,
-            Shape::from_slice(&[d as _, d as _]),
-            physical,
-        )
+        Tensor::new(self.config.torch_dtype, &[d, d], physical)
     }
 
     #[inline]
@@ -123,11 +119,7 @@ impl Llama2 for Memory {
         let mut physical = self.layers[layer].w_qkv.physical().clone();
         physical.range.start += d * d * dt;
         physical.range.end = physical.range.start + dkv * d * dt;
-        Tensor::new(
-            self.config.torch_dtype,
-            Shape::from_slice(&[dkv as _, d as _]),
-            physical,
-        )
+        Tensor::new(self.config.torch_dtype, &[dkv, d], physical)
     }
 
     #[inline]
@@ -138,11 +130,7 @@ impl Llama2 for Memory {
         let mut physical = self.layers[layer].w_qkv.physical().clone();
         physical.range.start += (d + dkv) * d * dt;
         physical.range.end = physical.range.start + dkv * d * dt;
-        Tensor::new(
-            self.config.torch_dtype,
-            Shape::from_slice(&[dkv as _, d as _]),
-            physical,
-        )
+        Tensor::new(self.config.torch_dtype, &[dkv, d], physical)
     }
 
     #[inline]
@@ -199,11 +187,12 @@ fn concat0(tensors: &[&Tensor<Storage>]) -> Tensor<Storage> {
     let mut offset = 0;
     for t in tensors {
         let len = t.size() * data_type.size();
-        data[offset..][..len].copy_from_slice(t.physical().as_slice());
+        data[offset..][..len].copy_from_slice(t.as_slice());
         offset += len;
     }
 
-    Tensor::new(data_type, shape, Storage::from_blob(data))
+    let shape = shape.iter().map(|&d| d as usize).collect::<Vec<_>>();
+    Tensor::new(data_type, &shape, Storage::from_blob(data))
 }
 
 #[test]

diff --git a/model-parameters/src/memory/safe_tensors.rs b/model-parameters/src/memory/safe_tensors.rs
@@ -49,7 +49,7 @@ impl Memory {
             debug_assert_eq!(data_type, config.torch_dtype);
             Tensor::new(
                 data_type,
-                info.shape.iter().map(|&d| d as _).collect(),
+                &info.shape,
                 Storage::new(mmap.clone(), start, end - start),
             )
         };

diff --git a/model-parameters/src/save.rs b/model-parameters/src/save.rs
@@ -52,7 +52,7 @@ pub fn save(model: &dyn Llama2, dir: impl AsRef<Path>) -> io::Result<()> {
         shape: tensor.shape().iter().map(|&d| d as _).collect(),
         data_offsets: {
             let start = offset;
-            offset += tensor.physical().as_slice().len();
+            offset += tensor.as_slice().len();
             (start, offset)
         },
     };
@@ -112,17 +112,17 @@ pub fn save(model: &dyn Llama2, dir: impl AsRef<Path>) -> io::Result<()> {
             write.write_all(&[32])?;
         }
     }
-    write.write_all(model.embed_tokens().physical().as_slice())?;
+    write.write_all(model.embed_tokens().as_slice())?;
     for layer in 0..model.num_hidden_layers() {
-        write.write_all(model.input_layernorm(layer).physical().as_slice())?;
-        write.write_all(model.w_qkv(layer).physical().as_slice())?;
-        write.write_all(model.self_attn_o_proj(layer).physical().as_slice())?;
-        write.write_all(model.post_attention_layernorm(layer).physical().as_slice())?;
-        write.write_all(model.mlp_gate(layer).physical().as_slice())?;
-        write.write_all(model.mlp_down(layer).physical().as_slice())?;
-        write.write_all(model.mlp_up(layer).physical().as_slice())?;
+        write.write_all(model.input_layernorm(layer).as_slice())?;
+        write.write_all(model.w_qkv(layer).as_slice())?;
+        write.write_all(model.self_attn_o_proj(layer).as_slice())?;
+        write.write_all(model.post_attention_layernorm(layer).as_slice())?;
+        write.write_all(model.mlp_gate(layer).as_slice())?;
+        write.write_all(model.mlp_down(layer).as_slice())?;
+        write.write_all(model.mlp_up(layer).as_slice())?;
     }
-    write.write_all(model.model_norm().physical().as_slice())?;
-    write.write_all(model.lm_head().physical().as_slice())?;
+    write.write_all(model.model_norm().as_slice())?;
+    write.write_all(model.lm_head().as_slice())?;
     Ok(())
 }
diff --git a/tensor/src/tensor.rs b/tensor/src/tensor.rs
@@ -11,7 +11,8 @@ pub struct Tensor<Physical> {
 }
 
 impl<Physical: Clone> Tensor<Physical> {
-    pub fn new(data_type: DataType, shape: Shape, physical: Physical) -> Self {
+    pub fn new(data_type: DataType, shape: &[usize], physical: Physical) -> Self {
+        let shape = Shape::from_iter(shape.iter().map(|&d| d as udim));
         Self {
             data_type,
             pattern: Pattern::from_shape(&shape),
@@ -35,6 +36,11 @@ impl<Physical: Clone> Tensor<Physical> {
         &self.physical
     }
 
+    #[inline]
+    pub fn physical_mut(&mut self) -> &mut Physical {
+        &mut self.physical
+    }
+
     #[inline]
     pub fn size(&self) -> usize {
         self.shape.iter().map(|&d| d as usize).product()
@@ -85,6 +91,18 @@ impl<Physical: Clone> Tensor<Physical> {
     }
 }
 
+impl<Physical: AsRef<[u8]>> Tensor<Physical> {
+    pub fn as_slice(&self) -> &[u8] {
+        self.physical.as_ref()
+    }
+}
+
+impl<Physical: AsMut<[u8]>> Tensor<Physical> {
+    pub fn as_mut_slice(&mut self) -> &mut [u8] {
+        self.physical.as_mut()
+    }
+}
+
 pub type Shape = SmallVec<[udim; 4]>;
 pub type Affine = DMatrix<idim>;
 
@@ -108,7 +126,7 @@ fn test() {
     use super::Transpose;
     use smallvec::smallvec;
 
-    let t = Tensor::new(DataType::F32, Shape::from_slice(&[2, 3, 4, 5]), ());
+    let t = Tensor::new(DataType::F32, &[2, 3, 4, 5], ());
     assert_eq!(t.shape(), &[2, 3, 4, 5]);
     assert_eq!(t.pattern.0.as_slice(), &[60, 20, 5, 1, 0]);
     assert_eq!(t.is_contiguous(), true);

diff --git a/transformer-cpu/src/lib.rs b/transformer-cpu/src/lib.rs
@@ -38,11 +38,7 @@ impl Transformer {
         let dt = self.model.data_type();
 
         let mut a = vec![0u8; seq_len * d * dt.size()];
-        gather(
-            &mut a,
-            self.model.embed_tokens().physical().as_slice(),
-            tokens,
-        );
+        gather(&mut a, self.model.embed_tokens().as_slice(), tokens);
 
         let mut b = vec![0u8; seq_len * d * dt.size()];
         for l in 0..self.model.num_hidden_layers() {
@@ -51,7 +47,7 @@ impl Transformer {
                 let o = &mut b;
                 let x = &a;
                 let w = self.model.input_layernorm(l);
-                let w = w.physical().as_slice();
+                let w = w.as_slice();
                 let theta = self.model.rope_theta();
                 rms_norm(o, x, w, theta, dt);
             }