diff --git a/src/chess/board.rs b/src/chess/board.rs index 39d4210..4602a0f 100644 --- a/src/chess/board.rs +++ b/src/chess/board.rs @@ -104,14 +104,14 @@ impl Board { board.ep = ep; board.castling = castling; - board.hash = board.generate_zobrist_hash(); - for (idx, pc) in pieces.iter().enumerate() { for sq in *pc { board.piece_at[sq] = Piece::from(idx); } } + board.hash = board.generate_zobrist_hash(); + board } diff --git a/src/nets.rs b/src/nets.rs index 3b18ed1..91e27f0 100644 --- a/src/nets.rs +++ b/src/nets.rs @@ -1,8 +1,28 @@ use bytemuck::{self, Pod, Zeroable}; +use goober::activation::Activation; use std::fs; use std::io::Write; use std::path::Path; +// Workaround for error in how goober handles an activation such as SCReLU +#[derive(Clone, Copy)] +pub struct SCReLU; + +impl Activation for SCReLU { + fn activate(x: f32) -> f32 { + let clamped = x.clamp(0.0, 1.0); + clamped * clamped + } + + fn derivative(x: f32) -> f32 { + if 0.0 < x && x < 1.0 { + 2.0 * x.sqrt() + } else { + 0.0 + } + } +} + #[derive(Clone, Copy, Debug, Zeroable)] #[repr(C)] pub struct Accumulator { @@ -33,6 +53,11 @@ pub fn relu(x: i16) -> i32 { i32::from(x).max(0) } +pub fn screlu(x: i16, q: i32) -> i32 { + let clamped = i32::from(x).clamp(0, q); + clamped * clamped +} + pub fn q_i16(x: f32, q: i32) -> i16 { let quantized = x * q as f32; assert!(f32::from(i16::MIN) < quantized && quantized < f32::from(i16::MAX),); diff --git a/src/nets/value.bin b/src/nets/value.bin index 61a181f..5efbb36 100644 Binary files a/src/nets/value.bin and b/src/nets/value.bin differ diff --git a/src/value.rs b/src/value.rs index c43f34a..ae8e232 100644 --- a/src/value.rs +++ b/src/value.rs @@ -1,5 +1,5 @@ use bytemuck::{allocation, Pod, Zeroable}; -use goober::activation::{ReLU, Tanh}; +use goober::activation::Tanh; use goober::layer::{DenseConnected, SparseConnected}; use goober::{FeedForwardNetwork, OutputLayer, SparseVector, Vector}; use std::boxed::Box; @@ -9,7 +9,7 @@ use std::path::Path; use crate::math::{randomize_dense, randomize_sparse, Rng}; use crate::mem::Align64; -use crate::nets::{q_i16, q_i32, relu, save_to_bin, Accumulator}; +use crate::nets::{q_i16, q_i32, save_to_bin, screlu, Accumulator, SCReLU}; use crate::state::{self, State}; const INPUT_SIZE: usize = state::VALUE_NUMBER_FEATURES; @@ -20,7 +20,7 @@ const QA: i32 = 256; const QB: i32 = 256; const QAB: i32 = QA * QB; -type Feature = SparseConnected; +type Feature = SparseConnected; type Output = DenseConnected; type QuantizedFeatureWeights = [Align64>; INPUT_SIZE]; @@ -287,16 +287,18 @@ impl QuantizedValueNetwork { } }); - let mut result: i32 = self.output_bias; + let mut result: i32 = 0; for (&x, w) in stm.vals.iter().zip(self.output_weights[0].vals) { - result += relu(x) * i32::from(w); + result += screlu(x, QA) * i32::from(w); } for (&x, w) in nstm.vals.iter().zip(self.output_weights[1].vals) { - result += relu(x) * i32::from(w); + result += screlu(x, QA) * i32::from(w); } + result = result / QA + self.output_bias; + // Fifty move rule dampening // Constants are chosen to make the max effect more significant at higher levels and max 50% let hmc = state.halfmove_clock();