From 5bd59b07926cb1ba0b96e1ca697682aecd2994d1 Mon Sep 17 00:00:00 2001 From: Krisztian Fekete <1246751+e3krisztian@users.noreply.github.com> Date: Thu, 15 Feb 2024 21:56:19 +0100 Subject: [PATCH 1/5] fix(tar) properly extract relative symlinks symlinks like sbin/halt -> ../bin/busybox were mistakenly dropped before. Co-authored-by: Andrew Fasano --- unblob/handlers/archive/_safe_tarfile.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/unblob/handlers/archive/_safe_tarfile.py b/unblob/handlers/archive/_safe_tarfile.py index 0ecc2e081e..5c6e143883 100644 --- a/unblob/handlers/archive/_safe_tarfile.py +++ b/unblob/handlers/archive/_safe_tarfile.py @@ -83,10 +83,9 @@ def extract(self, tarinfo: tarfile.TarInfo, extract_root: Path): # noqa: C901 "Converted to extraction relative path.", ) tarinfo.linkname = f"./{tarinfo.linkname}" - if not is_safe_path( - basedir=extract_root, - path=extract_root / tarinfo.linkname, - ): + + resolved_path = (extract_root / tarinfo.name).parent / tarinfo.linkname + if not is_safe_path(basedir=extract_root, path=resolved_path): self.record_problem( tarinfo, "Traversal attempt through link path.", From e313904b697441bbf02c59101e2f1d9b44b11a52 Mon Sep 17 00:00:00 2001 From: Krisztian Fekete <1246751+e3krisztian@users.noreply.github.com> Date: Thu, 15 Feb 2024 22:18:25 +0100 Subject: [PATCH 2/5] fix(tar) absolute symlink extraction Absolute targets were converted before, but it was too simple to be right other than when the link target was in the same directory. With this change absolute symlinks like /sbin/ls -> /bin/busybox are converted to /sbin/ls -> ../bin/busybox Co-authored-by: Andrew Fasano --- unblob/handlers/archive/_safe_tarfile.py | 25 ++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/unblob/handlers/archive/_safe_tarfile.py b/unblob/handlers/archive/_safe_tarfile.py index 5c6e143883..86203f454b 100644 --- a/unblob/handlers/archive/_safe_tarfile.py +++ b/unblob/handlers/archive/_safe_tarfile.py @@ -63,7 +63,7 @@ def extract(self, tarinfo: tarfile.TarInfo, extract_root: Path): # noqa: C901 "Absolute path.", "Converted to extraction relative path.", ) - tarinfo.name = f"./{tarinfo.name}" + tarinfo.name = str(Path(tarinfo.name).relative_to("/")) # prevent traversal attempts through file name if not is_safe_path(basedir=extract_root, path=extract_root / tarinfo.name): @@ -77,12 +77,33 @@ def extract(self, tarinfo: tarfile.TarInfo, extract_root: Path): # noqa: C901 # prevent traversal attempts through links if tarinfo.islnk() or tarinfo.issym(): if Path(tarinfo.linkname).is_absolute(): + + def calculate_linkname(): + root = extract_root.resolve() + path = (extract_root / tarinfo.name).resolve() + + if path.parts[: len(root.parts)] != root.parts: + return None + + depth = max(0, len(path.parts) - len(root.parts) - 1) + return ("/".join([".."] * depth) or ".") + tarinfo.linkname + + relative_linkname = calculate_linkname() + if relative_linkname is None: + self.record_problem( + tarinfo, + "Absolute path conversion to extraction relative failed - would escape root.", + "Skipped.", + ) + return + + assert not Path(relative_linkname).is_absolute() self.record_problem( tarinfo, "Absolute path as link target.", "Converted to extraction relative path.", ) - tarinfo.linkname = f"./{tarinfo.linkname}" + tarinfo.linkname = relative_linkname resolved_path = (extract_root / tarinfo.name).parent / tarinfo.linkname if not is_safe_path(basedir=extract_root, path=resolved_path): From c2b50dbdfcb28f7cc73007529c4d53d527da262d Mon Sep 17 00:00:00 2001 From: Krisztian Fekete <1246751+e3krisztian@users.noreply.github.com> Date: Thu, 15 Feb 2024 22:19:44 +0100 Subject: [PATCH 3/5] tests(tar) add and update symlink test files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit tests/integration/archive/tar/__input__/symlinks.tar: test/ |-- bin | `-- busybox `-- sbin |-- cp -> /test/bin/busybox |-- find -> ../bin/../bin/busybox |-- ln -> /test/bin/../bin/busybox |-- ls -> ../bin/busybox |-- mv -> ../../bin/busybox `-- rm -> ../../../bin/busybox is extracted as tests/integration/archive/tar/__output__/symlinks.tar_extract/ └── test ├── bin │ └── busybox └── sbin ├── cp -> ../bin/busybox ├── find -> ../bin/busybox ├── ln -> ../bin/busybox ├── ls -> ../bin/busybox └── mv -> ../../bin/busybox Please note, that `rm` is missing, as that is a path traversal attempt. ---- The symlinks.tar test file is created with this script in a docker container: rm -rf /test symlinks.tar mkdir -p /test/bin echo binary > /test/bin/busybox mkdir -p /test/sbin ln -s ../bin/busybox /test/sbin/ls ln -s ../bin/../bin/busybox /test/sbin/find ln -s ../../bin/busybox /test/sbin/mv ln -s ../../../bin/busybox /test/sbin/rm ln -s /test/bin/busybox /test/sbin/cp ln -s /test/bin/../bin/busybox /test/sbin/ln tar cPf /symlinks.tar /test/ tar tvPf /symlinks.tar --- tests/integration/archive/tar/__input__/symlinks.tar | 3 +++ .../tar/__output__/symlinks.tar_extract/test/bin/busybox | 3 +++ .../archive/tar/__output__/symlinks.tar_extract/test/sbin/cp | 1 + .../archive/tar/__output__/symlinks.tar_extract/test/sbin/find | 1 + .../archive/tar/__output__/symlinks.tar_extract/test/sbin/ln | 1 + .../archive/tar/__output__/symlinks.tar_extract/test/sbin/ls | 1 + .../archive/tar/__output__/symlinks.tar_extract/test/sbin/mv | 1 + 7 files changed, 11 insertions(+) create mode 100644 tests/integration/archive/tar/__input__/symlinks.tar create mode 100644 tests/integration/archive/tar/__output__/symlinks.tar_extract/test/bin/busybox create mode 120000 tests/integration/archive/tar/__output__/symlinks.tar_extract/test/sbin/cp create mode 120000 tests/integration/archive/tar/__output__/symlinks.tar_extract/test/sbin/find create mode 120000 tests/integration/archive/tar/__output__/symlinks.tar_extract/test/sbin/ln create mode 120000 tests/integration/archive/tar/__output__/symlinks.tar_extract/test/sbin/ls create mode 120000 tests/integration/archive/tar/__output__/symlinks.tar_extract/test/sbin/mv diff --git a/tests/integration/archive/tar/__input__/symlinks.tar b/tests/integration/archive/tar/__input__/symlinks.tar new file mode 100644 index 0000000000..8ef485d3ab --- /dev/null +++ b/tests/integration/archive/tar/__input__/symlinks.tar @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:152c7a03b3b3276f8a7527bfde01f26b0c4f2ae3f705a6deac0183d8bac2ba1c +size 10240 diff --git a/tests/integration/archive/tar/__output__/symlinks.tar_extract/test/bin/busybox b/tests/integration/archive/tar/__output__/symlinks.tar_extract/test/bin/busybox new file mode 100644 index 0000000000..6dfcf97655 --- /dev/null +++ b/tests/integration/archive/tar/__output__/symlinks.tar_extract/test/bin/busybox @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58eaf5a78d580f5dbd49d31a5b733094169b31bfdf49055b74bcac2877d8f58c +size 7 diff --git a/tests/integration/archive/tar/__output__/symlinks.tar_extract/test/sbin/cp b/tests/integration/archive/tar/__output__/symlinks.tar_extract/test/sbin/cp new file mode 120000 index 0000000000..71259713ee --- /dev/null +++ b/tests/integration/archive/tar/__output__/symlinks.tar_extract/test/sbin/cp @@ -0,0 +1 @@ +../bin/busybox \ No newline at end of file diff --git a/tests/integration/archive/tar/__output__/symlinks.tar_extract/test/sbin/find b/tests/integration/archive/tar/__output__/symlinks.tar_extract/test/sbin/find new file mode 120000 index 0000000000..71259713ee --- /dev/null +++ b/tests/integration/archive/tar/__output__/symlinks.tar_extract/test/sbin/find @@ -0,0 +1 @@ +../bin/busybox \ No newline at end of file diff --git a/tests/integration/archive/tar/__output__/symlinks.tar_extract/test/sbin/ln b/tests/integration/archive/tar/__output__/symlinks.tar_extract/test/sbin/ln new file mode 120000 index 0000000000..71259713ee --- /dev/null +++ b/tests/integration/archive/tar/__output__/symlinks.tar_extract/test/sbin/ln @@ -0,0 +1 @@ +../bin/busybox \ No newline at end of file diff --git a/tests/integration/archive/tar/__output__/symlinks.tar_extract/test/sbin/ls b/tests/integration/archive/tar/__output__/symlinks.tar_extract/test/sbin/ls new file mode 120000 index 0000000000..71259713ee --- /dev/null +++ b/tests/integration/archive/tar/__output__/symlinks.tar_extract/test/sbin/ls @@ -0,0 +1 @@ +../bin/busybox \ No newline at end of file diff --git a/tests/integration/archive/tar/__output__/symlinks.tar_extract/test/sbin/mv b/tests/integration/archive/tar/__output__/symlinks.tar_extract/test/sbin/mv new file mode 120000 index 0000000000..f948f1a72d --- /dev/null +++ b/tests/integration/archive/tar/__output__/symlinks.tar_extract/test/sbin/mv @@ -0,0 +1 @@ +../../bin/busybox \ No newline at end of file From 839abd6edc8f4e28ae031d19aa7da072c1aa2349 Mon Sep 17 00:00:00 2001 From: Krisztian Fekete <1246751+e3krisztian@users.noreply.github.com> Date: Thu, 15 Feb 2024 22:45:23 +0100 Subject: [PATCH 4/5] test(tar) update existing expectation on extraction of absolute links Due to the previous absolute-symlink extraction fix, the following updates are made: tests/integration/archive/tar/__output__/absolute.tar_extract/absolute/absolute-symlink - -> absolute/absolute-file + -> absolute-file tests/integration/archive/tar/__output__/absolute.tar_extract/absolute/link_to_etc_shadow - -> etc/shadow + -> ../etc/shadow --- .../__output__/absolute.tar_extract/absolute/absolute-symlink | 2 +- .../__output__/absolute.tar_extract/absolute/link_to_etc_shadow | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration/archive/tar/__output__/absolute.tar_extract/absolute/absolute-symlink b/tests/integration/archive/tar/__output__/absolute.tar_extract/absolute/absolute-symlink index 5b0423aa0e..23dcdb8ce7 120000 --- a/tests/integration/archive/tar/__output__/absolute.tar_extract/absolute/absolute-symlink +++ b/tests/integration/archive/tar/__output__/absolute.tar_extract/absolute/absolute-symlink @@ -1 +1 @@ -absolute/absolute-file \ No newline at end of file +absolute-file \ No newline at end of file diff --git a/tests/integration/archive/tar/__output__/absolute.tar_extract/absolute/link_to_etc_shadow b/tests/integration/archive/tar/__output__/absolute.tar_extract/absolute/link_to_etc_shadow index 5d50cf53f5..c3342eb2fa 120000 --- a/tests/integration/archive/tar/__output__/absolute.tar_extract/absolute/link_to_etc_shadow +++ b/tests/integration/archive/tar/__output__/absolute.tar_extract/absolute/link_to_etc_shadow @@ -1 +1 @@ -etc/shadow \ No newline at end of file +../etc/shadow \ No newline at end of file From 600f1663de4d7bcf85418e195e2c67e01cad5111 Mon Sep 17 00:00:00 2001 From: Krisztian Fekete <1246751+e3krisztian@users.noreply.github.com> Date: Thu, 15 Feb 2024 23:00:00 +0100 Subject: [PATCH 5/5] test(FileSystem) new create_symlink tests --- tests/test_file_utils.py | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/tests/test_file_utils.py b/tests/test_file_utils.py index 9a20da7420..1fc94138c2 100644 --- a/tests/test_file_utils.py +++ b/tests/test_file_utils.py @@ -25,7 +25,7 @@ round_down, round_up, ) -from unblob.report import PathTraversalProblem +from unblob.report import LinkExtractionProblem, PathTraversalProblem @pytest.mark.parametrize( @@ -503,6 +503,30 @@ def test_create_symlink(self, sandbox: FileSystem): assert os.readlink(output_path) == "target file" assert sandbox.problems == [] + def test_create_symlink_target_inside_sandbox(self, sandbox: FileSystem): + # ./sbin/shell -> ../bin/sh + sandbox.mkdir(Path("bin")) + sandbox.write_bytes(Path("bin/sh"), b"posix shell") + sandbox.mkdir(Path("sbin")) + sandbox.create_symlink(Path("../bin/sh"), Path("sbin/shell")) + + output_path = sandbox.root / "sbin/shell" + assert output_path.read_bytes() == b"posix shell" + assert output_path.exists() + assert os.readlink(output_path) == "../bin/sh" + assert sandbox.problems == [] + + def test_create_symlink_target_outside_sandbox(self, sandbox: FileSystem): + # /shell -> ../bin/sh + sandbox.mkdir(Path("bin")) + sandbox.write_bytes(Path("bin/sh"), b"posix shell") + sandbox.create_symlink(Path("../bin/sh"), Path("/shell")) + + assert any(p for p in sandbox.problems if isinstance(p, LinkExtractionProblem)) + output_path = sandbox.root / "shell" + assert not output_path.exists() + assert not output_path.is_symlink() + def test_create_symlink_absolute_paths(self, sandbox: FileSystem): sandbox.write_bytes(Path("target file"), b"test content") sandbox.create_symlink(Path("/target file"), Path("/symlink"))