Skip to content

Commit

Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
refactor(xtask): 重构服务的结构
Browse files Browse the repository at this point in the history
Signed-off-by: YdrMaster <ydrml@hotmail.com>
YdrMaster committed Mar 1, 2024
1 parent 78c8001 commit f2a60e5
Showing 10 changed files with 65 additions and 86 deletions.
6 changes: 6 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -9,3 +9,9 @@ members = [
"xtask",
]
resolver = "2"

[workspace.dependencies]
find_cuda_helper = "0.2"
half = "2.4"
serde_json = "1.0"
serde = "1.0"
6 changes: 3 additions & 3 deletions model-parameters/Cargo.toml
Original file line number Diff line number Diff line change
@@ -9,9 +9,9 @@ authors = ["YdrMaster <ydrml@hotmail.com>"]
[dependencies]
common = { path = "../common" }
tensor = { path = "../tensor" }
half = "2.4"
half.workspace = true
rayon = "1.9"
memmap2 = "0.9"
safetensors = "0.4"
serde_json = "1.0"
serde = { version = "1.0", features = ["derive"] }
serde = { workspace = true, features = ["derive"] }
serde_json.workspace = true
4 changes: 2 additions & 2 deletions tensor/Cargo.toml
Original file line number Diff line number Diff line change
@@ -7,8 +7,8 @@ authors = ["YdrMaster <ydrml@hotmail.com>"]
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
half = "2.4"
half.workspace = true
smallvec = "1.13"
nalgebra = "0.32"
rayon = "1.9"
serde = "1.0"
serde.workspace = true
7 changes: 4 additions & 3 deletions transformer-nvidia/Cargo.toml
Original file line number Diff line number Diff line change
@@ -10,12 +10,13 @@ authors = ["YdrMaster <ydrml@hotmail.com>"]
common = { path = "../common" }
tensor = { path = "../tensor" }
model-parameters = { path = "../model-parameters" }
cuda = { git = "https://github.com/YdrMaster/cuda-bench" }
cuda = { path = "../../cuda-bench/cuda" }
# cuda = { git = "https://github.com/YdrMaster/cuda-bench" }
cublas = { git = "https://github.com/YdrMaster/cuda-bench" }
half = "2.4"
half.workspace = true

[dev-dependencies]
tokenizer = { path = "../tokenizer" }

[build-dependencies]
find_cuda_helper = "0.2"
find_cuda_helper.workspace = true
4 changes: 3 additions & 1 deletion xtask/Cargo.toml
Original file line number Diff line number Diff line change
@@ -12,9 +12,11 @@ tensor = { path = "../tensor" }
tokenizer = { path = "../tokenizer" }
transformer-cpu = { path = "../transformer-cpu" }
transformer-nvidia = { path = "../transformer-nvidia" }
serde = { workspace = true, features = ["derive"] }
serde_json.workspace = true
log = "0.4"
simple_logger = "4.3"
clap = { version = "4.5", features = ["derive"] }

[build-dependencies]
find_cuda_helper = "0.2"
find_cuda_helper.workspace = true
3 changes: 2 additions & 1 deletion xtask/src/common.rs
Original file line number Diff line number Diff line change
@@ -4,8 +4,9 @@ use simple_logger::SimpleLogger;
use std::{io::ErrorKind::NotFound, path::Path};
use tokenizer::{Tokenizer, VocabTxt, BPE};

pub(crate) fn logger_init(log_level: Option<String>) {
pub(crate) fn logger_init(log_level: &Option<String>) {
let log = log_level
.as_ref()
.and_then(|log| match log.to_lowercase().as_str() {
"off" | "none" => Some(LevelFilter::Off),
"trace" => Some(LevelFilter::Trace),
2 changes: 1 addition & 1 deletion xtask/src/generate.rs
Original file line number Diff line number Diff line change
@@ -73,7 +73,7 @@ impl Allocator for NormalAllocator {

impl GenerateArgs {
pub fn invoke(self) {
logger_init(self.log);
logger_init(&self.log);

let model_dir = PathBuf::from(self.model);
let step = self.step.unwrap_or(usize::MAX);
100 changes: 39 additions & 61 deletions xtask/src/service/cpu.rs
Original file line number Diff line number Diff line change
@@ -1,80 +1,58 @@
use super::ServiceArgs;
use crate::common::{argmax, logger_init, tokenizer};
use crate::common::{argmax, tokenizer};
use common::upos;
use std::{collections::HashMap, path::Path, time::Instant};
use tokenizer::Tokenizer;
use transformer_cpu::{model_parameters::Memory, LayerCache, Transformer};

pub(super) struct CpuService {
transformer: Transformer,
sessions: HashMap<usize, SessionContext>,
tokenizer: Box<dyn Tokenizer>,
}

struct SessionContext {
pos: upos,
kv_cache: Vec<LayerCache>,
}

impl From<ServiceArgs> for CpuService {
fn from(args: ServiceArgs) -> Self {
logger_init(args.log);

let model_dir = Path::new(&args.model);
pub(super) fn run(args: ServiceArgs) {
let model_dir = Path::new(&args.model);

let time = Instant::now();
let tokenizer = tokenizer(args.tokenizer, &model_dir);
info!("build tokenizer ... {:?}", time.elapsed());
let time = Instant::now();
let tokenizer = tokenizer(args.tokenizer, &model_dir);
info!("build tokenizer ... {:?}", time.elapsed());

let time = Instant::now();
let model = Box::new(Memory::load_safetensors_from_dir(model_dir).unwrap());
info!("load model ... {:?}", time.elapsed());
let time = Instant::now();
let model = Box::new(Memory::load_safetensors_from_dir(model_dir).unwrap());
info!("load model ... {:?}", time.elapsed());

let time = Instant::now();
let transformer = Transformer::new(model);
info!("build transformer ... {:?}", time.elapsed());
let time = Instant::now();
let mut transformer = Transformer::new(model);
info!("build transformer ... {:?}", time.elapsed());

Self {
transformer,
sessions: HashMap::new(),
tokenizer,
}
struct SessionContext {
pos: upos,
kv_cache: Vec<LayerCache>,
}
}

impl CpuService {
pub fn run(mut self) {
loop {
let id = 0;
let prompt = "The quick brown fox jumps over the lazy dog";
let mut sessions = HashMap::<usize, SessionContext>::new();

let session = self.sessions.entry(id).or_insert_with(|| SessionContext {
pos: 0,
kv_cache: self.transformer.new_cache(),
});
loop {
let id = 0;
let prompt = "The quick brown fox jumps over the lazy dog";

let prompt_tokens = self.tokenizer.encode(&prompt.trim());
let (last, tokens) = prompt_tokens.split_last().expect("prompt is empty");
if !tokens.is_empty() {
self.transformer
.update(tokens, &mut session.kv_cache, session.pos as _);
session.pos += tokens.len() as upos;
}
let session = sessions.entry(id).or_insert_with(|| SessionContext {
pos: 0,
kv_cache: transformer.new_cache(),
});

let prompt_tokens = tokenizer.encode(&prompt.trim());
let (last, tokens) = prompt_tokens.split_last().expect("prompt is empty");
if !tokens.is_empty() {
transformer.update(tokens, &mut session.kv_cache, session.pos as _);
session.pos += tokens.len() as upos;
}

let mut token = *last;
let max_pos = self.transformer.max_seq_len() as upos;
let mut out = String::new();
while session.pos < max_pos {
let logits =
self.transformer
.forward(token, &mut session.kv_cache, session.pos as _);
let next = argmax(logits);
let mut token = *last;
let max_pos = transformer.max_seq_len() as upos;
let mut out = String::new();
while session.pos < max_pos {
let logits = transformer.forward(token, &mut session.kv_cache, session.pos as _);
let next = argmax(logits);

token = next;
session.pos += 1;
token = next;
session.pos += 1;

out.push_str(&self.tokenizer.decode(next).replace('▁', " "));
}
out.push_str(&tokenizer.decode(next).replace('▁', " "));
}
}
}
5 changes: 3 additions & 2 deletions xtask/src/service/mod.rs
Original file line number Diff line number Diff line change
@@ -21,17 +21,18 @@ pub(crate) struct ServiceArgs {

impl ServiceArgs {
pub fn launch(self) {
crate::common::logger_init(&self.log);
if self.nvidia {
#[cfg(detected_cuda)]
{
nvidia::NvidiaService::from(self).run();
nvidia::run(self);
}
#[cfg(not(detected_cuda))]
{
panic!("Nvidia GPU is not available");
}
} else {
cpu::CpuService::from(self).run();
cpu::run(self);
}
}
}
14 changes: 2 additions & 12 deletions xtask/src/service/nvidia.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,5 @@
use super::ServiceArgs;

pub(super) struct NvidiaService {}

impl From<ServiceArgs> for NvidiaService {
fn from(_: ServiceArgs) -> Self {
todo!()
}
}

impl NvidiaService {
pub fn run(self) {
todo!()
}
pub(super) fn run(args: ServiceArgs) {
todo!()
}

0 comments on commit f2a60e5

Please sign in to comment.