From 96e27f15900aa953a1b5310a8515aec3e83f70aa Mon Sep 17 00:00:00 2001 From: Daniel Boros Date: Sat, 14 Sep 2024 01:16:46 +0200 Subject: [PATCH] feat: add ai modul --- Cargo.toml | 3 +- README.md | 74 ------- stochastic-rs-ai/Cargo.toml | 18 ++ stochastic-rs-ai/src/fou_lstm.rs | 3 + stochastic-rs-ai/src/fou_lstm/datasets.rs | 128 +++++++++++ .../src/fou_lstm/lstm_model_1_d.rs | 201 ++++++++++++++++++ .../src/fou_lstm/lstm_model_2_d.rs | 190 +++++++++++++++++ stochastic-rs-ai/src/lib.rs | 1 + stochastic-rs-ml/Cargo.toml | 6 - stochastic-rs-ml/src/lib.rs | 14 -- 10 files changed, 542 insertions(+), 96 deletions(-) delete mode 100644 README.md create mode 100644 stochastic-rs-ai/Cargo.toml create mode 100644 stochastic-rs-ai/src/fou_lstm.rs create mode 100644 stochastic-rs-ai/src/fou_lstm/datasets.rs create mode 100644 stochastic-rs-ai/src/fou_lstm/lstm_model_1_d.rs create mode 100644 stochastic-rs-ai/src/fou_lstm/lstm_model_2_d.rs create mode 100644 stochastic-rs-ai/src/lib.rs delete mode 100644 stochastic-rs-ml/Cargo.toml delete mode 100644 stochastic-rs-ml/src/lib.rs diff --git a/Cargo.toml b/Cargo.toml index 4bd9c05..1c9e660 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,6 @@ [workspace] -members = [ +members = [ "stochastic-rs-ai", "stochastic-rs-core", - "stochastic-rs-ml", "stochastic-rs-quant", "stochastic-rs-stats", ] diff --git a/README.md b/README.md deleted file mode 100644 index 745a1f5..0000000 --- a/README.md +++ /dev/null @@ -1,74 +0,0 @@ -![build workflow](https://github.com/dancixx/stochastic-rs/actions/workflows/rust.yml/badge.svg) -[![Crates.io](https://img.shields.io/crates/v/stochastic-rs?style=flat-square)](https://crates.io/crates/stochastic-rs) -![Crates.io](https://img.shields.io/crates/l/stochastic-rs?style=flat-square) - -# Stochastic-rs - -A Rust library for stochastic processes and models. The main goal to provide a simple and easy to use high performance library for stochastic processes and models. This library is still in development and breaking changes may occur. 🚧 - -Documentation is available at [stochastic-rs](https://docs.rs/stochastic-rs/). - - -# Implementations - -- [Rust](https://github.com/dancixx/stochastic-rs) -- [Typescript](https://github.com/dancixx/stochastic-js) - -# Stochastic processes -- [x] Gaussian noise -- [x] Correlated Gaussian noise -- [x] Brownian motion -- [x] Correlated Brownian motion -- [x] Geometric Brownian motion -- [x] Cox-Ingersoll-Ross process -- [x] Ornstein-Uhlenbeck process -- [x] Jacobi process - -# Jumps and Levy processes (unstable) -- [x] Poisson process -- [x] Compound Poisson process -- [x] Fractional Ornstein-Uhlenbeck process with jumps -- [x] Levy jump diffusion -- [x] Inverse Gaussian -- [x] Normal Inverse Gaussian -- [x] Variance Gamma - - -# Stochastic models -- [x] Heston model -- [x] Merton model -- [x] Bates model -- [x] Vasicek model -- [x] SABR model (unstable) -- [x] Duffie-Kan model (unstable) - - -# Fractional Stochastic processes -- [x] Fractional Gaussian noise -- [x] Correlated Gaussian noise -- [x] Fractional Brownian motion -- [x] Correlated Fractional Brownian motion -- [x] Fractional Geometric Brownian motion -- [x] Fractional Ornstein-Uhlenbeck process -- [x] Fractional Cox-Ingersoll-Ross process -- [x] Fractional Jacobi process - -# Features -- [ ] Rough Heston model -- [ ] Bergomi model -- [ ] Rough Bergomi model -- [ ] Hull-White model -- [ ] Barndorff-Nielsen & Shephard model -- [ ] Alpha-stable models -- [ ] CGMY model -- [ ] CIR model -- [ ] Multi-factor CIR model -- [ ] BGM model -- [ ] Wu-Zhang model -- [ ] Affine model -- [ ] Heath-Jarrow-Morton model & Multi-factor Heath-Jarrow-Morton model - -## Future work -- [x] Add more tests -- [x] Add more examples -- [x] Full documentation diff --git a/stochastic-rs-ai/Cargo.toml b/stochastic-rs-ai/Cargo.toml new file mode 100644 index 0000000..074a165 --- /dev/null +++ b/stochastic-rs-ai/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "stochastic-rs-ai" +version = "0.1.0" +edition = "2021" + +[dependencies] +anyhow = "1.0.88" +candle-core = {version = "0.6.0", features = ["accelerate", "metal"]} +candle-datasets = {version = "0.6.0"} +candle-metal-kernels = "0.6.0" +candle-nn = {version = "0.6.0", features = ["accelerate", "metal"]} +indicatif = "0.17.7" +ndarray = "0.16.1" +ndarray-rand = "0.15.0" +polars = "0.43.1" +polars-io = {version = "0.43.1", features = ["csv"]} +rand_distr = "0.4.3" +stochastic-rs = {version = "0.8.0", path = "../stochastic-rs-core"} diff --git a/stochastic-rs-ai/src/fou_lstm.rs b/stochastic-rs-ai/src/fou_lstm.rs new file mode 100644 index 0000000..cc86451 --- /dev/null +++ b/stochastic-rs-ai/src/fou_lstm.rs @@ -0,0 +1,3 @@ +pub mod datasets; +pub mod lstm_model_1_d; +pub mod lstm_model_2_d; diff --git a/stochastic-rs-ai/src/fou_lstm/datasets.rs b/stochastic-rs-ai/src/fou_lstm/datasets.rs new file mode 100644 index 0000000..1bdcce4 --- /dev/null +++ b/stochastic-rs-ai/src/fou_lstm/datasets.rs @@ -0,0 +1,128 @@ +use anyhow::Result; +use candle_core::{Device, Tensor}; +use candle_datasets::{batcher::IterResult2, Batcher}; +use indicatif::{ProgressBar, ProgressStyle}; +use ndarray::{s, Array1}; +use ndarray_rand::RandomExt; +use rand_distr::Uniform; +use std::vec::IntoIter; +use stochastic_rs::{diffusion::fou::Fou, Sampling}; + +pub fn test_vasicek_1_d( + epoch_size: usize, + batch_size: usize, + n: usize, + device: &Device, +) -> Result<( + Batcher>>>, + Vec, +)> { + let mut paths = Vec::with_capacity(epoch_size); + let mu = 2.8; + let sigma = 1.0; + let thetas = Array1::random(epoch_size, Uniform::new(0.0, 10.0)).to_vec(); + let hursts = Array1::random(epoch_size, Uniform::new(0.01, 0.99)).to_vec(); + let progress_bar = ProgressBar::new(epoch_size as u64); + progress_bar.set_style( + ProgressStyle::with_template( + "{spinner:.green} [{elapsed_precise}] [{wide_bar:.cyan/blue}] ({eta})", + )? + .progress_chars("#>-"), + ); + for idx in 0..epoch_size { + let hurst = hursts[idx]; + let theta = thetas[idx]; + let fou = Fou::new(&Fou { + hurst, + mu, + sigma, + theta, + n, + x0: Some(0.0), + t: Some(16.0), + ..Default::default() + }); + let mut path = fou.sample(); + let mean = path.mean().unwrap(); + let std = path.std(0.0); + path = (path - mean) / std; + + paths.push(Ok(( + Tensor::from_iter(path, device)?, + Tensor::new(&[thetas[idx]], device)?, + ))); + progress_bar.inc(1); + } + progress_bar.finish(); + + let batcher = Batcher::new_r2(paths.into_iter()) + .batch_size(batch_size) + .return_last_incomplete_batch(false); + + Ok((batcher, hursts)) +} + +pub fn test_vasicek_2_d( + epoch_size: usize, + batch_size: usize, + n: usize, + device: &Device, +) -> Result<( + Batcher>>>, + Vec, +)> { + let mut paths = Vec::with_capacity(epoch_size); + let mu = 2.8; + let sigma = 1.0; + let thetas = Array1::random(epoch_size, Uniform::new(0.0, 10.0)).to_vec(); + let hursts = Array1::random(epoch_size, Uniform::new(0.01, 0.99)).to_vec(); + let progress_bar = ProgressBar::new(epoch_size as u64); + progress_bar.set_style( + ProgressStyle::with_template( + "{spinner:.green} [{elapsed_precise}] [{wide_bar:.cyan/blue}] ({eta})", + )? + .progress_chars("#>-"), + ); + for idx in 0..epoch_size { + let hurst = hursts[idx]; + let theta = thetas[idx]; + let fou = Fou::new(&Fou { + hurst, + mu, + sigma, + theta, + n, + x0: Some(0.0), + t: Some(16.0), + ..Default::default() + }); + let mut path = fou.sample(); + let mean = path.mean().unwrap(); + let std = path.std(0.0); + path = (path - mean) / std; + + let diff = &path.slice(s![1..]) - &path.slice(s![..-1]); + let path = path.slice(s![..-1]); + let paired = path.iter().zip(diff.iter()).collect::>(); + let paired_tensors = paired + .iter() + .map(|pair| { + let (x, y) = *pair; + Tensor::new(&[*x, *y], device).unwrap() + }) + .collect::>(); + + paths.push(Ok(( + Tensor::stack(&paired_tensors, 0)?, + Tensor::new(&[thetas[idx]], device)?, + ))); + progress_bar.inc(1); + } + progress_bar.finish(); + + let batcher = Batcher::new_r2(paths.into_iter()) + .batch_size(batch_size) + .return_last_incomplete_batch(false); + + Ok((batcher, hursts)) +} diff --git a/stochastic-rs-ai/src/fou_lstm/lstm_model_1_d.rs b/stochastic-rs-ai/src/fou_lstm/lstm_model_1_d.rs new file mode 100644 index 0000000..68d9513 --- /dev/null +++ b/stochastic-rs-ai/src/fou_lstm/lstm_model_1_d.rs @@ -0,0 +1,201 @@ +use std::{fs::File, time::Instant}; + +use candle_core::{DType, Device, Module, Result, Tensor}; +use candle_nn::{ + layer_norm, linear, loss::mse, lstm, prelu, seq, AdamW, Dropout, LSTMConfig, LayerNorm, + LayerNormConfig, Linear, Optimizer, PReLU, ParamsAdamW, Sequential, VarBuilder, VarMap, LSTM, + RNN, +}; +use polars::prelude::*; + +use super::datasets::test_vasicek_1_d; + +pub struct Model { + is_train: bool, + use_dropout: bool, + linear1: Linear, + linear2: Linear, + dropout: Dropout, + prelu: PReLU, + lstm: Vec, + layer_norm: LayerNorm, + mlp: Sequential, +} + +impl Model { + #[must_use = "new is necessary to create a new instance of Model"] + pub fn new( + vs: VarBuilder, + lstm_features: usize, + hidden_dim: usize, + out_dim: usize, + num_lstm_layers: Option, + use_dropout: Option, + droput_rate: Option, + ) -> Result { + let linear1 = linear(lstm_features, hidden_dim, vs.pp("linear-1"))?; + let linear2 = linear(hidden_dim, hidden_dim, vs.pp("linear-2"))?; + let dropout = Dropout::new(droput_rate.unwrap_or(0.25)); + let prelu = prelu(None, vs.pp("prelu"))?; + let mut lstm_layers = Vec::with_capacity(num_lstm_layers.unwrap_or(2)); + for i in 0..num_lstm_layers.unwrap_or(2) { + lstm_layers.push(lstm( + hidden_dim, + hidden_dim, + LSTMConfig { + layer_idx: i, + ..Default::default() + }, + vs.pp(&format!("lstm-{}", i)), + )?); + } + let layer_n = layer_norm(hidden_dim, LayerNormConfig::default(), vs.pp("layer-norm"))?; + let mlp = seq() + .add(linear(hidden_dim, hidden_dim, vs.pp("mpl-linear-1"))?) + .add_fn(|x| x.relu()) + .add(linear(hidden_dim, hidden_dim / 2, vs.pp("mpl-linear-2"))?) + .add_fn(|x| x.relu()) + .add(linear(hidden_dim / 2, out_dim, vs.pp("mpl-linear-3"))?); + + Ok(Self { + is_train: true, + use_dropout: use_dropout.unwrap_or(true), + linear1, + linear2, + dropout, + prelu, + lstm: lstm_layers, + layer_norm: layer_n, + mlp, + }) + } + + pub fn forward(&self, x: &Tensor) -> Result { + let mut x = x.clone().unsqueeze(1)?; + x = self.prelu.forward(&x)?; + x = self.linear1.forward(&x)?; + x = self.prelu.forward(&x)?; + x = self.linear2.forward(&x)?; + x = self.prelu.forward(&x)?; + if self.use_dropout { + x = self.dropout.forward(&x, self.is_train)?; + } + for (idx, lstm) in self.lstm.iter().enumerate() { + if idx > 0 { + x = x.unsqueeze(1)?; + } + let states = lstm.seq(&x)?; + x = lstm.states_to_tensor(&states)?; + } + x = self.layer_norm.forward(&x)?; + if self.use_dropout { + x = self.dropout.forward(&x, self.is_train)?; + } + let out = self.mlp.forward(&x)?; + Ok(out) + } + + pub fn eval(&mut self) { + self.is_train = false; + } +} + +pub fn test() -> anyhow::Result<()> { + let device = Device::cuda_if_available(0).unwrap_or(Device::Cpu); + let varmap = VarMap::new(); + let vs = VarBuilder::from_varmap(&varmap, DType::F64, &device); + + let epochs = 50_usize; + let epoch_size = 12_800_usize; + let lstm_features = 1_600_usize; + let hidden_dim = 64_usize; + let out_dim = 1_usize; + let batch_size = 64; + let mut net = Model::new( + vs, + lstm_features, + hidden_dim, + out_dim, + Some(3), + Some(false), + Some(0.25), + ) + .unwrap(); + let adamw_params = ParamsAdamW { + lr: 1e-3, + beta1: 0.9, + beta2: 0.999, + eps: 1e-8, + weight_decay: 0.01, + }; + let mut opt = AdamW::new(varmap.all_vars(), adamw_params)?; + + let n: usize = 1600_usize; + let start = Instant::now(); + + for epoch in 0..epochs { + let (batcher, _) = test_vasicek_1_d(epoch_size, batch_size, n, &device)?; + + 'inner: for (batch_idx, batch) in batcher.enumerate() { + match batch { + Ok((x, target)) => { + let inp = net.forward(&x)?; + let loss = mse(&inp, &target)?; + opt.backward_step(&loss)?; + println!( + "Epoch: {}, Batch: {}, Loss: {:?}", + epoch + 1, + batch_idx + 1, + loss.to_scalar::()? + ); + } + Err(_) => break 'inner, + } + } + + println!("Epoch {} took {:?}", epoch + 1, start.elapsed()); + } + + net.eval(); + + // test the model + let (batcher, hursts) = test_vasicek_1_d(epoch_size, batch_size, n, &device)?; + let mut theta = Vec::with_capacity(epoch_size); + let mut est_theta = Vec::with_capacity(epoch_size); + + for batch in batcher { + match batch { + Ok((x, target)) => { + let inp = net.forward(&x)?; + let inp_vec = inp + .to_vec2::()? + .into_iter() + .flatten() + .collect::>(); + let target_vec = target + .to_vec2::()? + .into_iter() + .flatten() + .collect::>(); + theta.push(target_vec); + est_theta.push(inp_vec); + } + Err(_) => break, + } + } + + let theta = theta.into_iter().flatten().collect::>(); + let est_theta = est_theta.into_iter().flatten().collect::>(); + + let mut dataframe = df!( + "alpha" => theta, + "est_alpha" => est_theta, + "hurst" => hursts + )?; + + let writer = File::create("vasicek_hurst=0.01..0.99_alpha=-0.5..10.0_init=0.0_slice=300.csv")?; + let mut csv_writer = CsvWriter::new(writer); + csv_writer.finish(&mut dataframe)?; + + Ok(()) +} diff --git a/stochastic-rs-ai/src/fou_lstm/lstm_model_2_d.rs b/stochastic-rs-ai/src/fou_lstm/lstm_model_2_d.rs new file mode 100644 index 0000000..8a8b645 --- /dev/null +++ b/stochastic-rs-ai/src/fou_lstm/lstm_model_2_d.rs @@ -0,0 +1,190 @@ +use std::{fs::File, time::Instant}; + +use candle_core::{DType, Device, Module, Result, Tensor}; +use candle_nn::{ + layer_norm, linear, loss::mse, lstm, prelu, seq, AdamW, Dropout, LSTMConfig, LayerNorm, + LayerNormConfig, Optimizer, PReLU, ParamsAdamW, Sequential, VarBuilder, VarMap, LSTM, RNN, +}; +use polars::prelude::*; + +use super::datasets::test_vasicek_2_d; + +pub struct Model { + is_train: bool, + use_dropout: bool, + dropout: Dropout, + prelu: PReLU, + lstm: Vec, + layer_norm: LayerNorm, + mlp: Sequential, +} + +impl Model { + #[must_use = "new is necessary to create a new instance of Model"] + pub fn new( + vs: VarBuilder, + lstm_features: usize, + hidden_dim: usize, + out_dim: usize, + num_lstm_layers: Option, + use_dropout: Option, + droput_rate: Option, + ) -> Result { + let dropout = Dropout::new(droput_rate.unwrap_or(0.25)); + let prelu = prelu(None, vs.pp("prelu"))?; + let mut lstm_layers = Vec::with_capacity(num_lstm_layers.unwrap_or(2)); + for i in 0..num_lstm_layers.unwrap_or(2) { + lstm_layers.push(lstm( + if i == 0 { lstm_features } else { hidden_dim }, + hidden_dim, + LSTMConfig { + layer_idx: i, + ..Default::default() + }, + vs.pp(&format!("lstm-{}", i)), + )?); + } + let layer_n = layer_norm(hidden_dim, LayerNormConfig::default(), vs.pp("layer-norm"))?; + let mlp = seq() + .add(linear(hidden_dim, hidden_dim, vs.pp("mpl-linear-1"))?) + .add_fn(|x| x.relu()) + .add(linear(hidden_dim, hidden_dim / 2, vs.pp("mpl-linear-2"))?) + .add_fn(|x| x.relu()) + .add(linear(hidden_dim / 2, out_dim, vs.pp("mpl-linear-3"))?); + + Ok(Self { + is_train: true, + use_dropout: use_dropout.unwrap_or(true), + dropout, + prelu, + lstm: lstm_layers, + layer_norm: layer_n, + mlp, + }) + } + + pub fn forward(&self, x: &Tensor) -> Result { + let mut x = x.clone(); + x = self.prelu.forward(&x)?; + if self.use_dropout { + x = self.dropout.forward(&x, self.is_train)?; + } + for lstm in self.lstm.iter() { + let states = lstm.seq(&x)?; + x = lstm + .states_to_tensor(&states)? + .reshape(&[x.dims()[0], x.dims()[1], 64])?; + } + if self.use_dropout { + x = self.dropout.forward(&x, self.is_train)?; + } + x = self.layer_norm.forward(&x)?; + let out = self.mlp.forward(&x)?; + Ok(out) + } + + pub fn eval(&mut self) { + self.is_train = false; + } +} + +pub fn test() -> anyhow::Result<()> { + let device = Device::cuda_if_available(0).unwrap_or(Device::Cpu); + let varmap = VarMap::new(); + let vs = VarBuilder::from_varmap(&varmap, DType::F64, &device); + + let epochs = 50_usize; + let epoch_size = 12_800_usize; + let lstm_features = 2_usize; + let hidden_dim = 64_usize; + let out_dim = 1_usize; + let batch_size = 64; + let mut net = Model::new( + vs, + lstm_features, + hidden_dim, + out_dim, + Some(3), + Some(false), + Some(0.25), + ) + .unwrap(); + let adamw_params = ParamsAdamW { + lr: 1e-3, + beta1: 0.9, + beta2: 0.999, + eps: 1e-8, + weight_decay: 0.01, + }; + + let mut opt = AdamW::new(varmap.all_vars(), adamw_params)?; + + let n: usize = 1600_usize; + let start = Instant::now(); + + for epoch in 0..epochs { + let (batcher, _) = test_vasicek_2_d(epoch_size, batch_size, n, &device)?; + + 'inner: for (batch_idx, batch) in batcher.enumerate() { + match batch { + Ok((x, target)) => { + let inp = net.forward(&x)?; + let loss = mse(&inp.mean(1)?, &target)?; + opt.backward_step(&loss)?; + println!( + "Epoch: {}, Batch: {}, Loss: {:?}", + epoch + 1, + batch_idx + 1, + loss.to_scalar::()? + ); + } + Err(_) => break 'inner, + } + } + + println!("Epoch {} took {:?}", epoch + 1, start.elapsed()); + } + + net.eval(); + + // test the model + let (batcher, hursts) = test_vasicek_2_d(epoch_size, batch_size, n, &device)?; + let mut theta = Vec::with_capacity(epoch_size); + let mut est_theta = Vec::with_capacity(epoch_size); + + for batch in batcher { + match batch { + Ok((x, target)) => { + let inp = net.forward(&x)?; + let inp_vec = inp + .to_vec2::()? + .into_iter() + .flatten() + .collect::>(); + let target_vec = target + .to_vec2::()? + .into_iter() + .flatten() + .collect::>(); + theta.push(target_vec); + est_theta.push(inp_vec); + } + Err(_) => break, + } + } + + let theta = theta.into_iter().flatten().collect::>(); + let est_theta = est_theta.into_iter().flatten().collect::>(); + + let mut dataframe = df!( + "alpha" => theta, + "est_alpha" => est_theta, + "hurst" => hursts + )?; + + let writer = File::create("vasicek_hurst=0.01..0.99_alpha=-0.5..10.0_init=0.0_slice=300.csv")?; + let mut csv_writer = CsvWriter::new(writer); + csv_writer.finish(&mut dataframe)?; + + Ok(()) +} diff --git a/stochastic-rs-ai/src/lib.rs b/stochastic-rs-ai/src/lib.rs new file mode 100644 index 0000000..65b7647 --- /dev/null +++ b/stochastic-rs-ai/src/lib.rs @@ -0,0 +1 @@ +pub mod fou_lstm; diff --git a/stochastic-rs-ml/Cargo.toml b/stochastic-rs-ml/Cargo.toml deleted file mode 100644 index 8ee56da..0000000 --- a/stochastic-rs-ml/Cargo.toml +++ /dev/null @@ -1,6 +0,0 @@ -[package] -name = "stochastic-rs-ml" -version = "0.1.0" -edition = "2021" - -[dependencies] diff --git a/stochastic-rs-ml/src/lib.rs b/stochastic-rs-ml/src/lib.rs deleted file mode 100644 index 78ba4fe..0000000 --- a/stochastic-rs-ml/src/lib.rs +++ /dev/null @@ -1,14 +0,0 @@ -pub fn add(left: u64, right: u64) -> u64 { - left + right -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn it_works() { - let result = add(2, 2); - assert_eq!(result, 4); - } -}