Skip to content

Commit

Permalink
initial
Browse files Browse the repository at this point in the history
  • Loading branch information
robbie01 committed Nov 9, 2024
0 parents commit 41c8e3f
Show file tree
Hide file tree
Showing 8 changed files with 809 additions and 0 deletions.
137 changes: 137 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 11 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
[package]
name = "groestl-gfni"
version = "0.1.0"
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
rand = "0.8"
groestl = { git = "https://github.com/RustCrypto/hashes.git" } # for comparison
digest = { version = "0.11.0-pre.8", features = ["mac"] }
9 changes: 9 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# AVX512/GFNI implementation of Grøstl

Grøstl is a SHA-3 finalist that was meant to run well on hardware with AES instructions. Turns out, it runs even better with GFNI instructions thanks to the lower overhead.

This also runs faster than the RustCrypto implementation, which uses a lookup table instead of specialized instructions.

Unfortunately, nobody uses it because Keccak was selected by NIST, and BLAKE ended up being popular as well thanks to its simplicity. Additionally, because this requires nightly Rust and a fairly rare breed of CPU (seeing as how Intel wants people to forget about AVX512), I don't really see the utility here at the moment.

Regardless, if the implementation is correct, this should be a reasonably secure 256-bit hash, on par with BLAKE2s.
2 changes: 2 additions & 0 deletions rust-toolchain.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[toolchain]
channel = "nightly"
76 changes: 76 additions & 0 deletions src/bin/bruh.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
use std::{sync::{atomic::{compiler_fence, Ordering}, OnceLock}, arch::x86_64::*};

use groestl_gfni::digest::{Digest, core_api::CoreWrapper};
use rand::{thread_rng, RngCore};

#[inline(always)]
fn square<T: std::ops::Mul<T> + Copy>(x: T) -> <T as std::ops::Mul<T>>::Output {
x*x
}

fn profile(gen: impl Digest + Clone) {
static DATA: OnceLock<Vec<u8>> = OnceLock::new();
let mut hsh = None;

let data = DATA.get_or_init(|| vec![11; 1024*1024*128]);
let mut cpb = Vec::new();

for _ in 0..40 {
compiler_fence(Ordering::SeqCst);
unsafe { __cpuid(0) };
compiler_fence(Ordering::SeqCst);
let t0 = unsafe { _rdtsc() };
compiler_fence(Ordering::SeqCst);
let mut h = gen.clone();
h.update(data);
let hash = h.finalize();
compiler_fence(Ordering::SeqCst);
let t1 = unsafe { __rdtscp(&mut 0) };
compiler_fence(Ordering::SeqCst);
unsafe { __cpuid(0) };
compiler_fence(Ordering::SeqCst);
match hsh {
None => hsh = Some(hash),
Some(ref b) => assert_eq!(*b, hash)
}
cpb.push((t1-t0) as f32 / data.len() as f32);
}

let mean = cpb.iter().copied().sum::<f32>() / cpb.len() as f32;
let stdev = (cpb.iter().map(|&z| square(z-mean)).sum::<f32>() / cpb.len() as f32).sqrt();

println!("mean = {mean}, stdev = {stdev}");
}

fn main() {
let mut msg = vec![0; i32::MAX as usize]; // world's most famous mersenne prime

thread_rng().fill_bytes(&mut msg);

use std::time::Instant;

let t = Instant::now();
let reff = {
let mut h = groestl::Groestl256::new();
h.update(&msg);
h.finalize()
};
println!("{}", (Instant::now()-t).as_secs_f32());


let t = Instant::now();
let test = {
let mut h = groestl_gfni::Groestl256::new();
h.update(&msg);
h.finalize()
};
println!("{}", (Instant::now()-t).as_secs_f32());

assert_eq!(reff[..], test[..]);

// let orig = groestl::Groestl256::new();
// let test = groestl_gfni::Groestl256::new();

// profile(orig);
// profile(test);
}
42 changes: 42 additions & 0 deletions src/bin/drake.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#![feature(stdarch_x86_avx512)]
#![feature(avx512_target_feature)]

use std::arch::x86_64::*;

use rand::RngCore;

#[target_feature(enable = "sse2")]
unsafe fn mul2(x: __m128i) -> __m128i {
_mm_xor_si128(
_mm_add_epi8(x, x),
_mm_and_si128(
_mm_set1_epi8(0x1b),
_mm_cmplt_epi8(x, _mm_setzero_si128())
)
)
}

#[target_feature(enable = "sse2,sse4.1")]
unsafe fn mul2_sse4(x: __m128i) -> __m128i {
let dbl = _mm_add_epi8(x, x);
let xor = _mm_xor_si128(dbl, _mm_set1_epi8(0x1b));
_mm_blendv_epi8(dbl, xor, x)
}

#[target_feature(enable = "gfni")]
unsafe fn mul2_gfni(x: __m128i) -> __m128i {
_mm_gf2p8mul_epi8(x, _mm_set1_epi8(2))
}

fn main() { unsafe {
let mut x = [0; 16];
rand::thread_rng().fill_bytes(&mut x);
let r0 = _mm_loadu_si128(x.as_ptr() as *const __m128i);
let r1 = mul2(r0);
let r2 = mul2_gfni(r0);
let mut y = [0u8; 16];
let mut z = [0u8; 16];
_mm_storeu_si128(y.as_mut_ptr().cast(), r1);
_mm_storeu_si128(z.as_mut_ptr().cast(), r2);
assert_eq!(y, z)
}}
Loading

0 comments on commit 41c8e3f

Please sign in to comment.