Skip to content

Commit

Permalink
feat(xtask): 过滤空的输入,统计首字延迟和解码速度
Browse files Browse the repository at this point in the history
Signed-off-by: YdrMaster <[email protected]>
  • Loading branch information
YdrMaster committed Mar 19, 2024
1 parent 4d7efdf commit e6bfb38
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 13 deletions.
13 changes: 10 additions & 3 deletions service/src/cpu.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,22 +47,29 @@ impl CpuTask {
let max_seq_len = self.transformer.max_seq_len();
let eos = self.transformer.eos_token_id();

let time = Instant::now();
let t0 = Instant::now();
let mut token = self.transformer.decode(
vec![ctx.request(&prompt, max_seq_len)],
&*self.sample.lock().unwrap(),
)[0]
.1;
info!("prefill transformer ... {:?}", time.elapsed());

let t1 = Instant::now();
let mut len = 0;
while token != eos {
responsing.send(token).unwrap();
token = self.transformer.decode(
vec![ctx.request(&[token], max_seq_len)],
&*self.sample.lock().unwrap(),
)[0]
.1;
len += 1;
}
let t2 = Instant::now();
info!(
"First token delay: {:?}, average speed = {:?}/tok",
t1 - t0,
(t2 - t1).div_f32(len as _)
);
}
Command::Drop { id } => {
self.sessions.remove(&id);
Expand Down
13 changes: 10 additions & 3 deletions service/src/nvidia.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,16 +56,16 @@ pub fn task(
.entry(id)
.or_insert_with_key(|&id| SessionContext::new(&transformer, id, &transfer));

let time = Instant::now();
let t0 = Instant::now();
let mut token = transformer.decode(
vec![ctx.request(&prompt, max_seq_len)],
&*sample.lock().unwrap(),
&compute,
&transfer,
)[0]
.1;
info!("prefill transformer ... {:?}", time.elapsed());

let t1 = Instant::now();
let mut len = 0;
while token != eos {
responsing.send(token).unwrap();
token = transformer.decode(
Expand All @@ -75,7 +75,14 @@ pub fn task(
&transfer,
)[0]
.1;
len += 1;
}
let t2 = Instant::now();
info!(
"First token delay: {:?}, average speed = {:?}/tok",
t1 - t0,
(t2 - t1).div_f32(len as _)
);
}
Command::Drop { id } => {
sessions.remove(&id);
Expand Down
16 changes: 9 additions & 7 deletions xtask/src/chat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,18 +19,20 @@ impl InferenceArgs {
Chating::print_help();
println!("=====================================");

let mut input = String::new();
loop {
chating.print_session();
let mut input = String::new();
std::io::stdin()
.read_line(&mut input)
.expect("Unable to read line.");

// 以 / 开头则为用户指令
if input.trim_start().starts_with('/') {
chating.execute_command(&input);
} else {
chating.infer(&input);
let input = input.trim();
if !input.is_empty() {
// 以 / 开头则为用户指令
if input.starts_with('/') {
chating.execute_command(&input);
} else {
chating.infer(&input);
}
}
}
}
Expand Down

0 comments on commit e6bfb38

Please sign in to comment.