Skip to content

Commit

Permalink
safe tar extraction (#45)
Browse files Browse the repository at this point in the history
* safe tar extraction

* run pre-commit hooks
  • Loading branch information
nmichlo authored Mar 21, 2023
1 parent 4890b62 commit 4b65202
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 4 deletions.
9 changes: 5 additions & 4 deletions disent/dataset/data/_episodes__custom.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
from disent.dataset.data import BaseEpisodesData
from disent.util.inout.files import download_file
from disent.util.inout.paths import filename_from_url
from disent.util.inout.tar import tar_safe_extract_all

log = logging.getLogger(__name__)

Expand Down Expand Up @@ -147,11 +148,11 @@ def _download_and_extract_if_needed(self, download_url: str, required_file: str,
# unzip data
if (save_path != required_file) and not os.path.exists(required_file):
if save_path.endswith(".tar.xz"):
import tarfile

log.info(f"Extracting: {save_path=} to {required_file=}")
with tarfile.open(save_path) as f:
f.extractall(os.path.dirname(required_file))
tar_safe_extract_all(
save_path,
os.path.dirname(required_file),
)
log.info(f"Extracted!")
else:
raise IOError(f"Unsupported extension for: {save_path}")
Expand Down
49 changes: 49 additions & 0 deletions disent/util/inout/tar.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# ~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~
# MIT License
#
# Copyright (c) 2023 Nathan Juraj Michlo
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
# ~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~

import os
from pathlib import Path
from typing import Union


def tar_safe_extract_all(in_file: Union[Path, str], out_dir: Union[Path, str]):
import tarfile

in_file = str(in_file)
out_dir = str(out_dir)

def _is_safe_to_extract(tar):
for member in tar.getmembers():
# check inside directory
abs_dir = os.path.abspath(out_dir)
abs_targ = os.path.abspath(os.path.join(out_dir, member.name))
common_prefix = os.path.commonprefix([abs_dir, abs_targ])
# raise exception if not
if common_prefix != abs_dir:
raise Exception("Attempted path traversal in tar file")

# this is unsafe tar extraction
with tarfile.open(in_file) as f:
_is_safe_to_extract(f)
f.extractall(out_dir)

0 comments on commit 4b65202

Please sign in to comment.