diff --git a/Cargo.lock b/Cargo.lock index c168272..414b097 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2146,6 +2146,14 @@ dependencies = [ "zerocopy", ] +[[package]] +name = "fstools" +version = "0.0.0" +dependencies = [ + "format", + "souls_vfs", +] + [[package]] name = "futures-core" version = "0.3.30" diff --git a/Cargo.toml b/Cargo.toml index 2af5df8..bc5caec 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,3 +1,11 @@ +[package] +name = "fstools" +edition = "2021" + +[dependencies] +format = { path = "format" } +souls_vfs = { path = "vfs" } + [workspace] resolver = "2" default-members = ["cli", "viewer"] diff --git a/cli/src/bin/dcx-extract.rs b/cli/src/bin/dcx-extract.rs index 20e7539..61ad607 100644 --- a/cli/src/bin/dcx-extract.rs +++ b/cli/src/bin/dcx-extract.rs @@ -1,8 +1,4 @@ -use std::io::{Read, Write}; - use clap::Parser; -use format::{bnd4::BND4, dcx::Dcx}; -use memmap2::MmapOptions; #[derive(Parser, Debug)] #[command(version, about, long_about = None)] @@ -12,44 +8,45 @@ struct Args { } fn main() -> Result<(), std::io::Error> { - let args = Args::parse(); - let path = std::path::PathBuf::from(args.file); - - let dcx_file = std::fs::File::open(&path)?; - let data = unsafe { - MmapOptions::new() - .populate() - .map_copy_read_only(&dcx_file)? - }; - - let dcx = Dcx::parse(&data).unwrap(); - - let mut decoder = dcx.create_decoder().expect("Could not create decoder"); - - let mut decompressed = Vec::with_capacity(decoder.hint_size()); - decoder.read_to_end(&mut decompressed)?; - - let mut cursor = std::io::Cursor::new(decompressed); - let bnd4 = BND4::from_reader(&mut cursor)?; - - let folder = format!( - "{}/{}/", - path.parent().unwrap().to_str().unwrap(), - path.file_stem().unwrap().to_str().unwrap(), - ); - - for entry in bnd4.files.iter() { - let trimmed_path = entry.path.replace("N:\\", "").replace('\\', "/"); - let output_path = std::path::PathBuf::from(folder.clone()).join(trimmed_path.as_str()); - - let parent = output_path.parent().unwrap(); - std::fs::create_dir_all(parent)?; - - let bytes = entry.bytes(&mut cursor)?; - - let mut file = std::fs::File::create(&output_path)?; - file.write_all(&bytes)?; - } - - Ok(()) + // let args = Args::parse(); + // let path = std::path::PathBuf::from(args.file); + // + // let dcx_file = std::fs::File::open(&path)?; + // let data = unsafe { + // MmapOptions::new() + // .populate() + // .map_copy_read_only(&dcx_file)? + // }; + // + // // let dcx = DcxHeader::parse_no_verify(&data).unwrap(); + // + // let mut decoder = dcx.create_decoder().expect("Could not create decoder"); + // + // let mut decompressed = Vec::with_capacity(decoder.hint_size()); + // decoder.read_to_end(&mut decompressed)?; + // + // let mut cursor = std::io::Cursor::new(decompressed); + // let bnd4 = BND4::from_reader(&mut cursor)?; + // + // let folder = format!( + // "{}/{}/", + // path.parent().unwrap().to_str().unwrap(), + // path.file_stem().unwrap().to_str().unwrap(), + // ); + // + // for entry in bnd4.files.iter() { + // let trimmed_path = entry.path.replace("N:\\", "").replace('\\', "/"); + // let output_path = std::path::PathBuf::from(folder.clone()).join(trimmed_path.as_str()); + // + // let parent = output_path.parent().unwrap(); + // std::fs::create_dir_all(parent)?; + // + // let bytes = entry.bytes(&mut cursor)?; + // + // let mut file = std::fs::File::create(&output_path)?; + // file.write_all(&bytes)?; + // } + // + // Ok(()) + todo!("FIXME") } diff --git a/format/src/dcx/deflate.rs b/format/src/dcx/deflate.rs index 435a72b..e7d995e 100644 --- a/format/src/dcx/deflate.rs +++ b/format/src/dcx/deflate.rs @@ -2,15 +2,15 @@ use std::io::{self, Read}; use flate2::read::ZlibDecoder; -pub struct DcxDecoderDeflate<'a>(ZlibDecoder<&'a [u8]>); +pub struct DcxDecoderDeflate(ZlibDecoder); -impl<'a> DcxDecoderDeflate<'a> { - pub fn from_buffer(buf: &'a [u8]) -> Self { - Self(ZlibDecoder::new(buf)) +impl DcxDecoderDeflate { + pub fn new(reader: R) -> Self { + Self(ZlibDecoder::new(reader)) } } -impl<'a> Read for DcxDecoderDeflate<'a> { +impl Read for DcxDecoderDeflate { fn read(&mut self, buf: &mut [u8]) -> io::Result { self.0.read(buf) } diff --git a/format/src/dcx/kraken.rs b/format/src/dcx/kraken.rs index be8bbe8..77638c4 100644 --- a/format/src/dcx/kraken.rs +++ b/format/src/dcx/kraken.rs @@ -1,65 +1,146 @@ -use std::io::{Cursor, Error, ErrorKind, Read, Result}; +use std::{ + cmp::min, + io::{BufRead, BufReader, Error, Read, Result, Take}, + ptr::null_mut, +}; -use byteorder::BE; use oodle_sys::{ - OodleLZ_Decode_ThreadPhase_OodleLZ_Decode_ThreadPhaseAll, OodleLZ_Decompress, OODLELZ_FAILED, + OodleLZDecoder, OodleLZDecoder_Create, OodleLZDecoder_DecodeSome, + OodleLZ_CheckCRC_OodleLZ_CheckCRC_Yes, OodleLZ_Compressor_OodleLZ_Compressor_Kraken, + OodleLZ_DecodeSome_Out, OodleLZ_Decode_ThreadPhase_OodleLZ_Decode_Unthreaded, + OodleLZ_FuzzSafe_OodleLZ_FuzzSafe_Yes, OodleLZ_Verbosity_OodleLZ_Verbosity_Lots, + OODLELZ_BLOCK_LEN, }; -use zerocopy::U32; -pub struct DcxDecoderKraken<'a> { - compressed: &'a [u8], - uncompressed_size: U32, - inner_cursor: Option>>, +pub struct DcxDecoderKraken { + reader: BufReader>, + + /// The total size of the raw data expected to be read from the underlying stream. + uncompressed_size: u32, + + /// The Oodle decoder instance created for this buffer. + decoder: *mut OodleLZDecoder, + + /// A sliding window of bytes decoded by the compressor, large enough to keep the past block in + /// memory while the next block is decoded. + sliding_window: Box<[u8]>, + + /// The decoders position into the sliding window. + sliding_window_pos: usize, + + /// The number of bytes that the consuming reader is lagging behind the decoder. + sliding_window_lag: usize, } -impl<'a> DcxDecoderKraken<'a> { - pub fn from_buffer(buf: &'a [u8], uncompressed_size: U32) -> Self { +impl DcxDecoderKraken { + // TODO: fix vfs reader so it isn't producing padding + pub fn new(reader: Take, uncompressed_size: u32) -> Self { + let compressor = OodleLZ_Compressor_OodleLZ_Compressor_Kraken; + let decoder = unsafe { + OodleLZDecoder_Create(compressor, uncompressed_size as i64, null_mut(), 0isize) + }; + + if decoder.is_null() { + panic!("return error here: failed to create decoder, check oodle error"); + } + + let sliding_window = Box::new([0u8; (OODLELZ_BLOCK_LEN * 2) as usize]); + Self { - compressed: buf, + decoder, + reader: BufReader::with_capacity(OODLELZ_BLOCK_LEN as usize, reader), + sliding_window, + sliding_window_pos: 0, + sliding_window_lag: 0, uncompressed_size, - inner_cursor: None, } } } -impl<'a> Read for DcxDecoderKraken<'a> { - // TODO: implement somewhat incremental reading by working with oodle's - // blocks per example in docs. - // It currently just decompresses the entire input one go and then - // operates a Cursor wrapping the decompressed bytes. + +impl Read for DcxDecoderKraken { fn read(&mut self, buf: &mut [u8]) -> Result { - if self.inner_cursor.is_none() { - let mut inner_buffer = vec![0u8; self.uncompressed_size.get() as usize]; - let compressed_len = - isize::try_from(inner_buffer.len()).map_err(|e| Error::new(ErrorKind::Other, e))?; - let inner_buffer_len = - isize::try_from(inner_buffer.len()).map_err(|e| Error::new(ErrorKind::Other, e))?; + if buf.is_empty() { + return Ok(0); + } + + let mut total_written = 0usize; + while total_written < buf.len() { + let wpos = self.sliding_window_pos; + + // Check if there's data to be written from the sliding window first + if self.sliding_window_lag > 0 { + let bytes_to_copy = min(self.sliding_window_lag, buf.len() - total_written); + let start = self.sliding_window_pos - self.sliding_window_lag; + let end = start + bytes_to_copy; + let src = &self.sliding_window[start..end]; + let dest = &mut buf[total_written..total_written + bytes_to_copy]; + + dest.copy_from_slice(src); + + self.sliding_window_lag -= bytes_to_copy; + total_written += bytes_to_copy; + + continue; + } + + // Read and decode new data + let input_data = self.reader.fill_buf()?; + if input_data.is_empty() { + break; // EOF reached + } + + let mut out: OodleLZ_DecodeSome_Out = unsafe { std::mem::zeroed() }; let result = unsafe { - OodleLZ_Decompress( - self.compressed.as_ptr() as *const _, - compressed_len, - inner_buffer.as_mut_ptr() as *mut _, - inner_buffer_len, - oodle_sys::OodleLZ_FuzzSafe_OodleLZ_FuzzSafe_Yes, - 0, - 0, - std::ptr::null_mut(), - 0, - None, - std::ptr::null_mut(), - std::ptr::null_mut(), - 0, - OodleLZ_Decode_ThreadPhase_OodleLZ_Decode_ThreadPhaseAll, - ) as usize + // EXTREMELY unlikely, however unsound otherwise. + let input_data_len = isize::try_from(input_data.len()).unwrap_or(isize::MAX); + + // SAFETY: + // - Signedness conversions of offsets are all valid, given that + // `sliding_window.len() <= i32::MAX` and `self.uncompressed_size < isize::MAX`. + // - Consumed `input_data_len` is caped at i32::MAX + OodleLZDecoder_DecodeSome( + self.decoder, + &mut out as *mut _, + self.sliding_window.as_mut_ptr() as *mut _, + wpos as isize, + self.uncompressed_size as _, + (self.sliding_window.len() - wpos) as isize, + input_data.as_ptr() as *const _, + input_data_len, + OodleLZ_FuzzSafe_OodleLZ_FuzzSafe_Yes, + OodleLZ_CheckCRC_OodleLZ_CheckCRC_Yes, + OodleLZ_Verbosity_OodleLZ_Verbosity_Lots, + OodleLZ_Decode_ThreadPhase_OodleLZ_Decode_Unthreaded, + ) }; - if result == OODLELZ_FAILED as usize { - return Err(Error::from(ErrorKind::Other)); + if result == 0 { + return Err(Error::other("Oodle decoder failed")); } - self.inner_cursor = Some(Cursor::new(inner_buffer)); + let decoded_bytes = out.decodedCount as usize; + let consumed_bytes = out.compBufUsed as usize; + + self.reader.consume(consumed_bytes); + + let bytes_to_copy = min(decoded_bytes, buf.len() - total_written); + let dest = &mut buf[total_written..total_written + bytes_to_copy]; + let src = &self.sliding_window[wpos..wpos + bytes_to_copy]; + + dest.copy_from_slice(src); + + self.sliding_window_pos += decoded_bytes; + self.sliding_window_lag = decoded_bytes - bytes_to_copy; + total_written += bytes_to_copy; + + // Manage sliding window + if self.sliding_window_pos >= self.sliding_window.len() { + self.sliding_window.rotate_left(self.sliding_window_pos); + self.sliding_window_pos = 0; + } } - self.inner_cursor.as_mut().unwrap().read(buf) + Ok(total_written) } } diff --git a/format/src/dcx/mod.rs b/format/src/dcx/mod.rs index 18b6f9e..07b9a80 100644 --- a/format/src/dcx/mod.rs +++ b/format/src/dcx/mod.rs @@ -1,8 +1,11 @@ -use std::{io, io::Read}; +use std::{ + io::{Error, Read}, + mem::size_of, +}; use byteorder::BE; use thiserror::Error; -use zerocopy::{FromBytes, FromZeroes, Ref, U32}; +use zerocopy::{FromBytes, FromZeroes, U32}; use self::{deflate::DcxDecoderDeflate, kraken::DcxDecoderKraken}; @@ -16,7 +19,7 @@ const MAGIC_ALGORITHM_DEFLATE: &[u8; 4] = b"DFLT"; #[derive(Debug, Error)] pub enum DcxError { #[error("Could not copy bytes {0}")] - Io(#[from] io::Error), + Io(#[from] std::io::Error), #[error("Unrecognized DCX compression algorithm: {0:x?}")] UnknownAlgorithm([u8; 4]), @@ -34,48 +37,34 @@ pub enum DecompressionError { Zlib, } -#[derive(FromZeroes, FromBytes)] -#[repr(C)] -#[allow(unused)] -pub struct Dcx<'a> { - bytes: &'a [u8], - - header: &'a Header, - sizes: &'a Sizes, - compression_parameters: &'a CompressionParameters, - additional: &'a Additional, - compressed: &'a [u8], +#[derive(FromBytes, FromZeroes)] +#[repr(packed)] +pub struct DcxHeader { + metadata: Metadata, + sizes: Sizes, + compression_parameters: CompressionParameters, + _additional: Additional, } -impl<'a> Dcx<'a> { - // TODO: add magic validation - pub fn parse(bytes: &'a [u8]) -> Option { - let (header, next) = Ref::<_, Header>::new_from_prefix(bytes)?; - let (sizes, next) = Ref::<_, Sizes>::new_from_prefix(next)?; - let (compression_parameters, next) = - Ref::<_, CompressionParameters>::new_from_prefix(next)?; - let (additional, rest) = Ref::<_, Additional>::new_from_prefix(next)?; - - Some(Self { - bytes, - header: header.into_ref(), - sizes: sizes.into_ref(), - compression_parameters: compression_parameters.into_ref(), - additional: additional.into_ref(), - compressed: rest, - }) +impl DcxHeader { + pub fn read(mut reader: R) -> Result<(DcxHeader, DcxContentDecoder), DcxError> { + let mut header_data = [0u8; size_of::()]; + reader.read_exact(&mut header_data)?; + + let dcx = DcxHeader::read_from(&header_data).ok_or(Error::other("unaligned DCX header"))?; + let decoder = dcx.create_decoder(reader)?; + + Ok((dcx, decoder)) } - pub fn create_decoder(&self) -> Result { + pub fn create_decoder(&self, reader: R) -> Result, DcxError> { let algorithm = &self.compression_parameters.algorithm; let decoder = match algorithm { - MAGIC_ALGORITHM_KRAKEN => Decoder::Kraken(DcxDecoderKraken::from_buffer( - self.compressed, - self.sizes.uncompressed_size, + MAGIC_ALGORITHM_KRAKEN => Decoder::Kraken(DcxDecoderKraken::new( + reader.take(self.sizes.compressed_size.get() as u64), + self.sizes.uncompressed_size.get(), )), - MAGIC_ALGORITHM_DEFLATE => { - Decoder::Deflate(DcxDecoderDeflate::from_buffer(self.compressed)) - } + MAGIC_ALGORITHM_DEFLATE => Decoder::Deflate(DcxDecoderDeflate::new(reader)), _ => return Err(DcxError::UnknownAlgorithm(algorithm.to_owned())), }; @@ -93,36 +82,36 @@ impl<'a> Dcx<'a> { } } -impl<'a> std::fmt::Debug for Dcx<'a> { +impl std::fmt::Debug for DcxHeader { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("DCX") - .field("header", self.header) - .field("sizes", self.sizes) - .field("compression_parameters", self.compression_parameters) + .field("header", &self.metadata) + .field("sizes", &self.sizes) + .field("compression_parameters", &self.compression_parameters) .finish() } } -pub enum Decoder<'a> { - Kraken(DcxDecoderKraken<'a>), - Deflate(DcxDecoderDeflate<'a>), +pub enum Decoder { + Kraken(DcxDecoderKraken), + Deflate(DcxDecoderDeflate), } -pub struct DcxContentDecoder<'a> { +pub struct DcxContentDecoder { /// Size of the contents once decompressed. uncompressed_size: U32, - decoder: Decoder<'a>, + decoder: Decoder, } -impl<'a> DcxContentDecoder<'a> { +impl DcxContentDecoder { pub fn hint_size(&self) -> usize { self.uncompressed_size.get() as usize } } -impl<'a> Read for DcxContentDecoder<'a> { - fn read(&mut self, buf: &mut [u8]) -> io::Result { +impl Read for DcxContentDecoder { + fn read(&mut self, buf: &mut [u8]) -> std::io::Result { match &mut self.decoder { Decoder::Kraken(d) => d.read(buf), Decoder::Deflate(d) => d.read(buf), @@ -134,7 +123,7 @@ impl<'a> Read for DcxContentDecoder<'a> { #[repr(C)] #[allow(unused)] /// The DCX chunk. Describes the layout of the container. -struct Header { +struct Metadata { chunk_magic: [u8; 4], /// Overal Dcx file version diff --git a/format/src/lib.rs b/format/src/lib.rs index a31f735..2f7ebc4 100644 --- a/format/src/lib.rs +++ b/format/src/lib.rs @@ -1,3 +1,4 @@ +#![feature(c_variadic)] #![feature(trait_alias)] #![feature(ptr_metadata)] pub mod bhd; diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..352b11e --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,11 @@ +pub mod formats { + pub use format::*; +} + +pub mod vfs { + pub use souls_vfs::*; +} + +pub mod prelude { + pub use super::{formats::*, vfs::*}; +} diff --git a/tests/dcx.rs b/tests/dcx.rs new file mode 100644 index 0000000..f38cd78 --- /dev/null +++ b/tests/dcx.rs @@ -0,0 +1,25 @@ +use std::{error::Error, path::PathBuf}; + +use format::dcx::DcxHeader; +pub use fstools::prelude::*; + +#[test] +pub fn decode_kraken_dcx() -> Result<(), Box> { + let er_path = PathBuf::from(std::env::var("ER_PATH").expect("no elden ring path provided")); + let keys = FileKeyProvider::new("keys"); + let archives = [ + er_path.join("Data0"), + er_path.join("Data1"), + er_path.join("Data2"), + er_path.join("Data3"), + er_path.join("sd/sd"), + ]; + + let vfs = Vfs::create(archives.clone(), &keys).expect("unable to create vfs"); + let file = vfs.open("/map/m60/m60_44_58_00/m60_44_58_00_445800.mapbnd.dcx")?; + let (_, mut reader) = DcxHeader::read(file)?; + + std::io::copy(&mut reader, &mut std::io::sink())?; + + Ok(()) +} diff --git a/vfs/src/bnd.rs b/vfs/src/bnd.rs index 0fde0c7..4062231 100644 --- a/vfs/src/bnd.rs +++ b/vfs/src/bnd.rs @@ -1,12 +1,9 @@ use std::{ collections::HashMap, - io::{self, Cursor, Read}, + io::{self, Cursor}, }; -use format::{ - bnd4::BND4, - dcx::{Dcx, DcxError}, -}; +use format::{bnd4::BND4, dcx::DcxError}; use thiserror::Error; use crate::{Name, VfsOpenError}; @@ -100,16 +97,18 @@ pub struct BndFileEntry { // Optionally undoes any Dcx compression when detected. Unfortunately there is // no guarantee that any file will be Dcx compressed but they usually are // meaning that the hot path will generally involve a copy. -pub fn undo_container_compression(buf: &[u8]) -> Result, DcxError> { - if Dcx::has_magic(buf) { - let dcx = Dcx::parse(buf).ok_or(DcxError::ParserError)?; - - let mut decoder = dcx.create_decoder()?; - let mut decompressed = Vec::with_capacity(decoder.hint_size()); - decoder.read_to_end(&mut decompressed)?; - - Ok(decompressed) - } else { - Ok(buf.to_vec()) - } +pub fn undo_container_compression(_buf: &[u8]) -> Result, DcxError> { + // if DcxHeader::has_magic(buf) { + // let dcx = DcxHeader::ref_from_prefix(buf).ok_or(DcxError::ParserError)?; + // + // let mut decoder = dcx.create_decoder()?; + // let mut decompressed = Vec::with_capacity(decoder.hint_size()); + // decoder.read_to_end(&mut decompressed)?; + // + // Ok(decompressed) + // } else { + // Ok(buf.to_vec()) + // } + + todo!("FIXME") } diff --git a/vfs/src/lib.rs b/vfs/src/lib.rs index 2a13206..e42b20d 100644 --- a/vfs/src/lib.rs +++ b/vfs/src/lib.rs @@ -112,6 +112,7 @@ impl Vfs { let offset = entry.file_offset as usize; let size = entry.file_size_with_padding as usize; + println!("{}", size); // Since its an optimization we don't really care about the // result. let _ = mmap.advise_range(Advice::Sequential, offset, size);