-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 41c8e3f
Showing
8 changed files
with
809 additions
and
0 deletions.
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
[package] | ||
name = "groestl-gfni" | ||
version = "0.1.0" | ||
edition = "2021" | ||
|
||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html | ||
|
||
[dependencies] | ||
rand = "0.8" | ||
groestl = { git = "https://github.com/RustCrypto/hashes.git" } # for comparison | ||
digest = { version = "0.11.0-pre.8", features = ["mac"] } |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
# AVX512/GFNI implementation of Grøstl | ||
|
||
Grøstl is a SHA-3 finalist that was meant to run well on hardware with AES instructions. Turns out, it runs even better with GFNI instructions thanks to the lower overhead. | ||
|
||
This also runs faster than the RustCrypto implementation, which uses a lookup table instead of specialized instructions. | ||
|
||
Unfortunately, nobody uses it because Keccak was selected by NIST, and BLAKE ended up being popular as well thanks to its simplicity. Additionally, because this requires nightly Rust and a fairly rare breed of CPU (seeing as how Intel wants people to forget about AVX512), I don't really see the utility here at the moment. | ||
|
||
Regardless, if the implementation is correct, this should be a reasonably secure 256-bit hash, on par with BLAKE2s. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
[toolchain] | ||
channel = "nightly" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
use std::{sync::{atomic::{compiler_fence, Ordering}, OnceLock}, arch::x86_64::*}; | ||
|
||
use groestl_gfni::digest::{Digest, core_api::CoreWrapper}; | ||
use rand::{thread_rng, RngCore}; | ||
|
||
#[inline(always)] | ||
fn square<T: std::ops::Mul<T> + Copy>(x: T) -> <T as std::ops::Mul<T>>::Output { | ||
x*x | ||
} | ||
|
||
fn profile(gen: impl Digest + Clone) { | ||
static DATA: OnceLock<Vec<u8>> = OnceLock::new(); | ||
let mut hsh = None; | ||
|
||
let data = DATA.get_or_init(|| vec![11; 1024*1024*128]); | ||
let mut cpb = Vec::new(); | ||
|
||
for _ in 0..40 { | ||
compiler_fence(Ordering::SeqCst); | ||
unsafe { __cpuid(0) }; | ||
compiler_fence(Ordering::SeqCst); | ||
let t0 = unsafe { _rdtsc() }; | ||
compiler_fence(Ordering::SeqCst); | ||
let mut h = gen.clone(); | ||
h.update(data); | ||
let hash = h.finalize(); | ||
compiler_fence(Ordering::SeqCst); | ||
let t1 = unsafe { __rdtscp(&mut 0) }; | ||
compiler_fence(Ordering::SeqCst); | ||
unsafe { __cpuid(0) }; | ||
compiler_fence(Ordering::SeqCst); | ||
match hsh { | ||
None => hsh = Some(hash), | ||
Some(ref b) => assert_eq!(*b, hash) | ||
} | ||
cpb.push((t1-t0) as f32 / data.len() as f32); | ||
} | ||
|
||
let mean = cpb.iter().copied().sum::<f32>() / cpb.len() as f32; | ||
let stdev = (cpb.iter().map(|&z| square(z-mean)).sum::<f32>() / cpb.len() as f32).sqrt(); | ||
|
||
println!("mean = {mean}, stdev = {stdev}"); | ||
} | ||
|
||
fn main() { | ||
let mut msg = vec![0; i32::MAX as usize]; // world's most famous mersenne prime | ||
|
||
thread_rng().fill_bytes(&mut msg); | ||
|
||
use std::time::Instant; | ||
|
||
let t = Instant::now(); | ||
let reff = { | ||
let mut h = groestl::Groestl256::new(); | ||
h.update(&msg); | ||
h.finalize() | ||
}; | ||
println!("{}", (Instant::now()-t).as_secs_f32()); | ||
|
||
|
||
let t = Instant::now(); | ||
let test = { | ||
let mut h = groestl_gfni::Groestl256::new(); | ||
h.update(&msg); | ||
h.finalize() | ||
}; | ||
println!("{}", (Instant::now()-t).as_secs_f32()); | ||
|
||
assert_eq!(reff[..], test[..]); | ||
|
||
// let orig = groestl::Groestl256::new(); | ||
// let test = groestl_gfni::Groestl256::new(); | ||
|
||
// profile(orig); | ||
// profile(test); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
#![feature(stdarch_x86_avx512)] | ||
#![feature(avx512_target_feature)] | ||
|
||
use std::arch::x86_64::*; | ||
|
||
use rand::RngCore; | ||
|
||
#[target_feature(enable = "sse2")] | ||
unsafe fn mul2(x: __m128i) -> __m128i { | ||
_mm_xor_si128( | ||
_mm_add_epi8(x, x), | ||
_mm_and_si128( | ||
_mm_set1_epi8(0x1b), | ||
_mm_cmplt_epi8(x, _mm_setzero_si128()) | ||
) | ||
) | ||
} | ||
|
||
#[target_feature(enable = "sse2,sse4.1")] | ||
unsafe fn mul2_sse4(x: __m128i) -> __m128i { | ||
let dbl = _mm_add_epi8(x, x); | ||
let xor = _mm_xor_si128(dbl, _mm_set1_epi8(0x1b)); | ||
_mm_blendv_epi8(dbl, xor, x) | ||
} | ||
|
||
#[target_feature(enable = "gfni")] | ||
unsafe fn mul2_gfni(x: __m128i) -> __m128i { | ||
_mm_gf2p8mul_epi8(x, _mm_set1_epi8(2)) | ||
} | ||
|
||
fn main() { unsafe { | ||
let mut x = [0; 16]; | ||
rand::thread_rng().fill_bytes(&mut x); | ||
let r0 = _mm_loadu_si128(x.as_ptr() as *const __m128i); | ||
let r1 = mul2(r0); | ||
let r2 = mul2_gfni(r0); | ||
let mut y = [0u8; 16]; | ||
let mut z = [0u8; 16]; | ||
_mm_storeu_si128(y.as_mut_ptr().cast(), r1); | ||
_mm_storeu_si128(z.as_mut_ptr().cast(), r2); | ||
assert_eq!(y, z) | ||
}} |
Oops, something went wrong.