From fff1eef93423c782bd0af91eee8feeb40b7d2fc1 Mon Sep 17 00:00:00 2001 From: moe Date: Sat, 23 Nov 2019 17:13:22 +0100 Subject: [PATCH 1/7] implement restoring accurate mtimes for directories and consistent overwrite behaviour when extracting symlinks --- src/archive.rs | 35 +++++++++- src/entry.rs | 170 +++++++++++++++++++++++++++++++++++++++++++------ 2 files changed, 183 insertions(+), 22 deletions(-) diff --git a/src/archive.rs b/src/archive.rs index 5299b7fd..55c431c7 100644 --- a/src/archive.rs +++ b/src/archive.rs @@ -22,6 +22,9 @@ pub struct ArchiveInner { unpack_xattrs: bool, preserve_permissions: bool, preserve_mtime: bool, + #[cfg(unix)] + preserve_ownership: bool, + overwrite: bool, ignore_zeros: bool, obj: RefCell, } @@ -47,6 +50,8 @@ impl Archive { unpack_xattrs: false, preserve_permissions: false, preserve_mtime: true, + preserve_ownership: false, + overwrite: false, ignore_zeros: false, obj: RefCell::new(obj), pos: Cell::new(0), @@ -117,6 +122,21 @@ impl Archive { self.inner.preserve_permissions = preserve; } + /// Indicate whether ownership information is preserved + /// when unpacking this entry. + /// + /// This flag is disabled by default and is only present on + /// Unix. + #[cfg(unix)] + pub fn set_preserve_ownership(&mut self, preserve: bool) { + self.inner.preserve_ownership = preserve; + } + + /// Indicate whether files and symlinks should be overwritten on extraction. + pub fn set_overwrite(&mut self, overwrite: bool) { + self.inner.overwrite = overwrite; + } + /// Indicate whether access time information is preserved when unpacking /// this entry. /// @@ -151,9 +171,20 @@ impl<'a> Archive { } fn _unpack(&mut self, dst: &Path) -> io::Result<()> { + let mut deferred_times = vec![]; + for entry in self._entries()? { let mut file = entry.map_err(|e| TarError::new("failed to iterate over archive", e))?; - file.unpack_in(dst)?; + file.unpack_in(&mut deferred_times, dst)?; + } + { + // set times on directories + for (dst, time) in deferred_times { + //eprintln!("actually set mtime {} on dir: {:?}", time, dst); + filetime::set_file_times(&dst, time, time).map_err(|e| { + TarError::new(&format!("failed to set mtime for `{}`", dst.display()), e) + })?; + } } Ok(()) } @@ -254,7 +285,9 @@ impl<'a> EntriesFields<'a> { pax_extensions: None, unpack_xattrs: self.archive.inner.unpack_xattrs, preserve_permissions: self.archive.inner.preserve_permissions, + preserve_ownership: self.archive.inner.preserve_ownership, preserve_mtime: self.archive.inner.preserve_mtime, + overwrite: self.archive.inner.overwrite, }; // Store where the next entry is, rounding up by 512 bytes (the size of diff --git a/src/entry.rs b/src/entry.rs index 2f083ef1..b91bc8d3 100644 --- a/src/entry.rs +++ b/src/entry.rs @@ -15,6 +15,7 @@ use crate::header::bytes2path; use crate::other; use crate::pax::pax_extensions; use crate::{Archive, Header, PaxExtensions}; +use std::ffi::CString; /// A read-only view into an entry of an archive. /// @@ -39,7 +40,9 @@ pub struct EntryFields<'a> { pub data: Vec>, pub unpack_xattrs: bool, pub preserve_permissions: bool, + pub preserve_ownership: bool, pub preserve_mtime: bool, + pub overwrite: bool, } pub enum EntryIo<'a> { @@ -185,13 +188,19 @@ impl<'a, R: Read> Entry<'a, R> { /// /// let mut ar = Archive::new(File::open("foo.tar").unwrap()); /// + /// let mut deferred_times= vec![]; + /// /// for (i, file) in ar.entries().unwrap().enumerate() { /// let mut file = file.unwrap(); - /// file.unpack(format!("file-{}", i)).unwrap(); + /// file.unpack(&mut deferred_times, format!("file-{}", i)).unwrap(); /// } /// ``` - pub fn unpack>(&mut self, dst: P) -> io::Result { - self.fields.unpack(None, dst.as_ref()) + pub fn unpack>( + &mut self, + deferred_times: &mut Vec<(PathBuf, FileTime)>, + dst: P, + ) -> io::Result { + self.fields.unpack(deferred_times, None, dst.as_ref()) } /// Extracts this file under the specified path, avoiding security issues. @@ -213,13 +222,19 @@ impl<'a, R: Read> Entry<'a, R> { /// /// let mut ar = Archive::new(File::open("foo.tar").unwrap()); /// + /// let mut deferred_times= vec![]; + /// /// for (i, file) in ar.entries().unwrap().enumerate() { /// let mut file = file.unwrap(); - /// file.unpack_in("target").unwrap(); + /// file.unpack_in(&mut deferred_times, "target").unwrap(); /// } /// ``` - pub fn unpack_in>(&mut self, dst: P) -> io::Result { - self.fields.unpack_in(dst.as_ref()) + pub fn unpack_in>( + &mut self, + deferred_times: &mut Vec<(PathBuf, FileTime)>, + dst: P, + ) -> io::Result { + self.fields.unpack_in(deferred_times, dst.as_ref()) } /// Indicate whether extended file attributes (xattrs on Unix) are preserved @@ -242,6 +257,16 @@ impl<'a, R: Read> Entry<'a, R> { self.fields.preserve_permissions = preserve; } + /// Indicate whether ownership information is preserved + /// when unpacking this entry. + /// + /// This flag is disabled by default and is only present on + /// Unix. + #[cfg(unix)] + pub fn set_preserve_ownership(&mut self, preserve: bool) { + self.fields.preserve_ownership = preserve; + } + /// Indicate whether access time information is preserved when unpacking /// this entry. /// @@ -341,7 +366,11 @@ impl<'a> EntryFields<'a> { Ok(Some(pax_extensions(self.pax_extensions.as_ref().unwrap()))) } - fn unpack_in(&mut self, dst: &Path) -> io::Result { + fn unpack_in( + &mut self, + deferred_times: &mut Vec<(PathBuf, FileTime)>, + dst: &Path, + ) -> io::Result { // Notes regarding bsdtar 2.8.3 / libarchive 2.8.3: // * Leading '/'s are trimmed. For example, `///test` is treated as // `test`. @@ -401,7 +430,7 @@ impl<'a> EntryFields<'a> { let canon_target = self.validate_inside_dst(&dst, parent)?; - self.unpack(Some(&canon_target), &file_dst) + self.unpack(deferred_times, Some(&canon_target), &file_dst) .map_err(|e| TarError::new(&format!("failed to unpack `{}`", file_dst.display()), e))?; Ok(true) @@ -425,7 +454,12 @@ impl<'a> EntryFields<'a> { } /// Returns access to the header of this entry in the archive. - fn unpack(&mut self, target_base: Option<&Path>, dst: &Path) -> io::Result { + fn unpack( + &mut self, + deferred_times: &mut Vec<(PathBuf, FileTime)>, + target_base: Option<&Path>, + dst: &Path, + ) -> io::Result { let kind = self.header.entry_type(); if kind.is_dir() { @@ -433,6 +467,20 @@ impl<'a> EntryFields<'a> { if let Ok(mode) = self.header.mode() { set_perms(dst, None, mode, self.preserve_permissions)?; } + #[cfg(unix)] + { + if self.preserve_ownership { + unsafe { + set_owner(&dst, self.header.uid()?, self.header.gid()?)?; + } + } + } + if self.preserve_mtime { + if let Ok(mtime) = self.header.mtime() { + let mtime_set = FileTime::from_unix_time(mtime as i64, 0); + deferred_times.push((dst.to_owned(), mtime_set)); + } + } return Ok(Unpacked::__Nonexhaustive); } else if kind.is_hard_link() || kind.is_symlink() { let src = match self.link_name()? { @@ -484,17 +532,41 @@ impl<'a> EntryFields<'a> { ) })?; } else { - symlink(&src, dst).map_err(|err| { - Error::new( - err.kind(), - format!( - "{} when symlinking {} to {}", - err, - src.display(), - dst.display() - ), - ) - })?; + symlink(&src, dst) + .or_else(|err_io| { + if err_io.kind() == io::ErrorKind::AlreadyExists && self.overwrite { + // remove dest and try once more + std::fs::remove_file(dst).and_then(|()| symlink(&src, dst)) + } else { + Err(err_io) + } + }) + .map_err(|err| { + Error::new( + err.kind(), + format!( + "{} when symlinking {} to {}", + err, + src.display(), + dst.display() + ), + ) + }) + .and_then(|_| { + if self.preserve_mtime { + if let Ok(mtime) = self.header.mtime() { + let mtime_set = FileTime::from_unix_time(mtime as i64, 0); + filetime::set_symlink_file_times(&dst, mtime_set, mtime_set) + .map_err(|e| { + TarError::new( + &format!("failed to set mtime for `{}`", dst.display()), + e, + ) + })?; + } + } + Ok(()) + })?; }; return Ok(Unpacked::__Nonexhaustive); @@ -529,6 +601,14 @@ impl<'a> EntryFields<'a> { if let Ok(mode) = self.header.mode() { set_perms(dst, None, mode, self.preserve_permissions)?; } + #[cfg(unix)] + { + if self.preserve_ownership { + unsafe { + set_owner(&dst, self.header.uid()?, self.header.gid()?)?; + } + } + } return Ok(Unpacked::__Nonexhaustive); } @@ -550,12 +630,14 @@ impl<'a> EntryFields<'a> { let mut f = open(dst).or_else(|err| { if err.kind() != ErrorKind::AlreadyExists { Err(err) - } else { + } else if self.overwrite { match fs::remove_file(dst) { Ok(()) => open(dst), Err(ref e) if e.kind() == io::ErrorKind::NotFound => open(dst), Err(e) => Err(e), } + } else { + Err(err) } })?; for io in self.data.drain(..) { @@ -596,6 +678,15 @@ impl<'a> EntryFields<'a> { })?; } } + #[cfg(unix)] + { + if self.preserve_ownership { + unsafe { + set_owner(&dst, self.header.uid()?, self.header.gid()?)?; + } + } + } + if let Ok(mode) = self.header.mode() { set_perms(dst, Some(&mut f), mode, self.preserve_permissions)?; } @@ -779,3 +870,40 @@ impl<'a> Read for EntryIo<'a> { } } } + +/// Attempts setting ownership information on provided path. +/// +/// # Safety +/// This is hacky and racy since no file descriptors are used. Might need retrofitting the +/// crate to use `nix` low-level API. +#[cfg(any(unix, target_os = "redox"))] +unsafe fn set_owner>(path: P, uid: u64, gid: u64) -> io::Result<()> { + use std::os::unix::prelude::*; + + //let fd = f.as_raw_fd(); + if uid > libc::uid_t::max_value() as u64 { + return Err(io::Error::new( + io::ErrorKind::InvalidData, + format!("uid ({}) of entry would overflow system `uid_t` type", uid), + )); + } + if gid > libc::gid_t::max_value() as u64 { + return Err(io::Error::new( + io::ErrorKind::InvalidData, + format!("gid ({}) of entry would overflow system `gid_t` type", gid), + )); + } + let cstr_path = + CString::new(Vec::from(path.as_ref().as_os_str().as_bytes())).map_err(|err_nul| { + io::Error::new( + io::ErrorKind::InvalidData, + format!("unable to create CString from path: {}", err_nul), + ) + })?; + let res_chown = + /*unsafe*/ { libc::chown(cstr_path.as_ptr(), uid as libc::uid_t, gid as libc::gid_t) }; + if res_chown != 0 { + return Err(io::Error::from_raw_os_error(*libc::__errno_location())); + } + Ok(()) +} From 6f84d3d1bd8a5e3777efbc90c4bf42584b3667ec Mon Sep 17 00:00:00 2001 From: moe Date: Sat, 23 Nov 2019 17:35:17 +0100 Subject: [PATCH 2/7] change overwrite default to true --- src/archive.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/archive.rs b/src/archive.rs index 55c431c7..cd78e336 100644 --- a/src/archive.rs +++ b/src/archive.rs @@ -51,7 +51,7 @@ impl Archive { preserve_permissions: false, preserve_mtime: true, preserve_ownership: false, - overwrite: false, + overwrite: true, ignore_zeros: false, obj: RefCell::new(obj), pos: Cell::new(0), From e3e197838ffbb12d0d773b0fe68cb664d422bc61 Mon Sep 17 00:00:00 2001 From: moe Date: Sat, 23 Nov 2019 17:47:18 +0100 Subject: [PATCH 3/7] improve macos compatibility by abstracting over errno --- src/entry.rs | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/entry.rs b/src/entry.rs index b91bc8d3..0f37eae0 100644 --- a/src/entry.rs +++ b/src/entry.rs @@ -903,7 +903,17 @@ unsafe fn set_owner>(path: P, uid: u64, gid: u64) -> io::Result<( let res_chown = /*unsafe*/ { libc::chown(cstr_path.as_ptr(), uid as libc::uid_t, gid as libc::gid_t) }; if res_chown != 0 { - return Err(io::Error::from_raw_os_error(*libc::__errno_location())); + return Err(io::Error::from_raw_os_error(libc_errno())); } Ok(()) } + +#[cfg(all(unix, target_os = "linux"))] +fn libc_errno() -> libc::c_int { + unsafe { *libc::__errno_location() } +} + +#[cfg(all(unix, not(target_os = "linux")))] +fn errno() -> i32 { + unsafe { *libc::__errno() } +} From 2381040529e0a3cdd172aeb47e1ccdb2787930cf Mon Sep 17 00:00:00 2001 From: moe Date: Sat, 23 Nov 2019 17:48:33 +0100 Subject: [PATCH 4/7] remove copy-paste mishap flagging function as `cfg(target_os = "redox")` --- src/entry.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/entry.rs b/src/entry.rs index 0f37eae0..db4bb8da 100644 --- a/src/entry.rs +++ b/src/entry.rs @@ -876,7 +876,7 @@ impl<'a> Read for EntryIo<'a> { /// # Safety /// This is hacky and racy since no file descriptors are used. Might need retrofitting the /// crate to use `nix` low-level API. -#[cfg(any(unix, target_os = "redox"))] +#[cfg(unix)] unsafe fn set_owner>(path: P, uid: u64, gid: u64) -> io::Result<()> { use std::os::unix::prelude::*; From 4f5a960f0941a63662c67fd08121a5514acab5b4 Mon Sep 17 00:00:00 2001 From: moe Date: Sat, 23 Nov 2019 17:52:13 +0100 Subject: [PATCH 5/7] improve macos compat, fix mistypes --- src/entry.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/entry.rs b/src/entry.rs index db4bb8da..5cfe198b 100644 --- a/src/entry.rs +++ b/src/entry.rs @@ -914,6 +914,6 @@ fn libc_errno() -> libc::c_int { } #[cfg(all(unix, not(target_os = "linux")))] -fn errno() -> i32 { - unsafe { *libc::__errno() } +fn libc_errno() -> i32 { + unsafe { *libc::__error() } } From c86f8846d3a75ae3123130630e500a4eb9eeca2f Mon Sep 17 00:00:00 2001 From: moe Date: Sat, 23 Nov 2019 17:57:25 +0100 Subject: [PATCH 6/7] add cfg attributes for windows compat --- src/archive.rs | 2 ++ src/entry.rs | 1 + 2 files changed, 3 insertions(+) diff --git a/src/archive.rs b/src/archive.rs index cd78e336..0cb9a333 100644 --- a/src/archive.rs +++ b/src/archive.rs @@ -50,6 +50,7 @@ impl Archive { unpack_xattrs: false, preserve_permissions: false, preserve_mtime: true, + #[cfg(unix)] preserve_ownership: false, overwrite: true, ignore_zeros: false, @@ -284,6 +285,7 @@ impl<'a> EntriesFields<'a> { long_linkname: None, pax_extensions: None, unpack_xattrs: self.archive.inner.unpack_xattrs, + #[cfg(unix)] preserve_permissions: self.archive.inner.preserve_permissions, preserve_ownership: self.archive.inner.preserve_ownership, preserve_mtime: self.archive.inner.preserve_mtime, diff --git a/src/entry.rs b/src/entry.rs index 5cfe198b..35bd4789 100644 --- a/src/entry.rs +++ b/src/entry.rs @@ -15,6 +15,7 @@ use crate::header::bytes2path; use crate::other; use crate::pax::pax_extensions; use crate::{Archive, Header, PaxExtensions}; +#[cfg(unix)] use std::ffi::CString; /// A read-only view into an entry of an archive. From e80a3f8fc5ec4d6eeff087b2f245e379604c8945 Mon Sep 17 00:00:00 2001 From: moe Date: Sat, 23 Nov 2019 18:06:25 +0100 Subject: [PATCH 7/7] fix cfg attributes for windows compat --- src/archive.rs | 2 +- src/entry.rs | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/archive.rs b/src/archive.rs index 0cb9a333..0ae7a7c8 100644 --- a/src/archive.rs +++ b/src/archive.rs @@ -285,8 +285,8 @@ impl<'a> EntriesFields<'a> { long_linkname: None, pax_extensions: None, unpack_xattrs: self.archive.inner.unpack_xattrs, - #[cfg(unix)] preserve_permissions: self.archive.inner.preserve_permissions, + #[cfg(unix)] preserve_ownership: self.archive.inner.preserve_ownership, preserve_mtime: self.archive.inner.preserve_mtime, overwrite: self.archive.inner.overwrite, diff --git a/src/entry.rs b/src/entry.rs index 35bd4789..bdc530da 100644 --- a/src/entry.rs +++ b/src/entry.rs @@ -41,6 +41,7 @@ pub struct EntryFields<'a> { pub data: Vec>, pub unpack_xattrs: bool, pub preserve_permissions: bool, + #[cfg(unix)] pub preserve_ownership: bool, pub preserve_mtime: bool, pub overwrite: bool,