Skip to content

Commit

Permalink
feat(xtask): 通过参数控制是否添加 bos
Browse files Browse the repository at this point in the history
Signed-off-by: YdrMaster <[email protected]>
  • Loading branch information
YdrMaster committed Feb 29, 2024
1 parent fa62f1a commit cff122d
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 10 deletions.
5 changes: 4 additions & 1 deletion model-parameters/src/memory/safe_tensors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,10 @@ impl Memory {
let mmap = Arc::new(model);
let offset = BASE_OFFSET + len;
let tensor = |name: &str| {
let info = &header.tensors[name];
let info = header
.tensors
.get(name)
.unwrap_or_else(|| panic!("missing tensor: {name}"));
let (start, end) = info.data_offsets;
let data_type = match info.dtype {
Dtype::BOOL => DataType::Bool,
Expand Down
5 changes: 0 additions & 5 deletions tokenizer/src/bpe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -92,11 +92,6 @@ impl Tokenizer for BPE {

fn encode(&self, text: &str) -> Vec<utok> {
let mut tokens = Vec::new();
if let Some(c) = text.chars().next() {
if c.is_alphabetic() {
tokens.push(self.find_piece("▁").unwrap())
}
}

text.chars().map(|c| c.to_string()).for_each(|c| {
if let Some(index) = self.find_piece(&c) {
Expand Down
2 changes: 1 addition & 1 deletion transformer-nvidia/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ impl<'a> Transformer<'a> {

#[inline]
pub fn new_cache<'b>(&self, stream: &'b Stream) -> Vec<LayerCache<'b>> {
LayerCache::new_layers(&*self.host, &stream)
LayerCache::new_layers(self.host, stream)
}

pub fn update<'b>(
Expand Down
20 changes: 17 additions & 3 deletions xtask/src/generate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ pub(crate) struct GenerateArgs {
/// Prompt.
#[clap(short, long)]
prompt: String,
/// Tokenizer file.
#[clap(short, long)]
tokenizer: Option<String>,
/// Max steps.
Expand All @@ -32,6 +33,9 @@ pub(crate) struct GenerateArgs {
/// Copy model parameters inside memory.
#[clap(long)]
inside_mem: bool,
/// Add bos before first token.
#[clap(long)]
insert_bos: bool,
/// Log level, may be "off", "trace", "debug", "info" or "error".
#[clap(long)]
log: Option<String>,
Expand Down Expand Up @@ -91,11 +95,21 @@ impl GenerateArgs {
let tokenizer = tokenizer(self.tokenizer, &model_dir);
info!("build tokenizer ... {:?}", time.elapsed());

let mut prompt = String::new();
if self.insert_bos {
prompt.push_str("<s>");
}
match self.prompt.chars().next() {
Some(c) if c.is_ascii_alphabetic() => prompt.push(' '),
_ => {}
}
prompt.push_str(&self.prompt);

if self.nvidia {
let preload_layers = if self.inside_mem { usize::MAX } else { 3 };
on_nvidia_gpu(model_dir, tokenizer, self.prompt, step, preload_layers)
on_nvidia_gpu(model_dir, tokenizer, prompt, step, preload_layers)
} else {
on_host(model_dir, tokenizer, self.prompt, step, self.inside_mem)
on_host(model_dir, tokenizer, prompt, step, self.inside_mem)
}
}
}
Expand Down Expand Up @@ -264,7 +278,7 @@ fn on_nvidia_gpu(
let time = Instant::now();
while pos < step {
let logits = transformer.forward(token, &kv_cache, pos as _, &compute, &transfer);
let next = argmax(&logits);
let next = argmax(logits);

token = next;
pos += 1;
Expand Down

0 comments on commit cff122d

Please sign in to comment.