Skip to content

Commit

Permalink
feat(llama-infini): 适配 moe
Browse files Browse the repository at this point in the history
Signed-off-by: YdrMaster <[email protected]>
  • Loading branch information
YdrMaster committed Dec 31, 2024
1 parent 256d219 commit a29f9e3
Showing 1 changed file with 11 additions and 5 deletions.
16 changes: 11 additions & 5 deletions models/llama/infini/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ pub struct Operators<N = InfiniNode, R = InfiniAllReduce>(PhantomData<(N, R)>);
pub type RandomSample = llama::RandomSample<Device, RandomSampleNpu>;

pub struct Weights {
nexp: usize,
blks: Box<[LlamaBlkStorage<DevBlob>]>,
output_norm: DevBlob,
output: DevBlob,
Expand Down Expand Up @@ -78,6 +79,7 @@ impl Weights {
let device = stream.get_device();
let mut loader = None;
Self {
nexp: model.meta.nexp,
blks: model
.blocks
.iter()
Expand Down Expand Up @@ -172,14 +174,18 @@ impl WeightLoader for Weights {
fn load_moe<'a>(
&'a self,
which: BlkWeight,
_iblk: usize,
_iexp: usize,
iblk: usize,
iexp: usize,
_queue: &'a QueueOf<Self::Hardware>,
) -> Self::Weight<'a> {
match which {
BlkWeight::FfnGateUp | BlkWeight::FfnDown => todo!(),
let blk = &self.blks[iblk];
let w = match which {
BlkWeight::FfnGateUp => &blk.ffn_gate_up,
BlkWeight::FfnDown => &blk.ffn_down,
_ => unreachable!(),
}
};
let one = w.len() / self.nexp;
&w[iexp * one..][..one]
}

#[inline]
Expand Down

0 comments on commit a29f9e3

Please sign in to comment.