From a09e83e10502fa2ad6ba4d310afd3ada5dca9077 Mon Sep 17 00:00:00 2001 From: John Sirois Date: Wed, 28 Feb 2024 21:07:49 -0800 Subject: [PATCH] Handle `.tar.bz2` & `.tgz` sdists when locking. (#2380) More generally, investigate what is out there (on PyPI) for sdists and explicitly admit `.zip`, `.tar.gz`, `.tar.bz2` and `.tgz` as covering 99.999% of all known cases. Fixes #2379 --------- Co-authored-by: Huon Wilson --- CHANGES.md | 8 ++++ pex/pip/tool.py | 21 ++++++---- pex/resolve/locked_resolve.py | 47 +++++++++++++++++++++- pex/resolve/lockfile/create.py | 4 +- pex/version.py | 2 +- tests/integration/test_issue_2739.py | 60 ++++++++++++++++++++++++++++ tests/test_pip.py | 23 ++++++++--- 7 files changed, 147 insertions(+), 18 deletions(-) create mode 100644 tests/integration/test_issue_2739.py diff --git a/CHANGES.md b/CHANGES.md index 30d8c8a86..cfe0e830b 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,5 +1,13 @@ # Release Notes +## 2.2.2 + +This release fixes `pex3 lock create` to handle `.tar.bz2` and `.tgz` +sdists in addition to the officially sanctioned `.tar.gz` and (less +officially so) `.zip` sdists. + +* Handle `.tar.bz2` & `.tgz` sdists when locking. (#2380) + ## 2.2.1 This release trims down the size of the Pex wheel on PyPI and the diff --git a/pex/pip/tool.py b/pex/pip/tool.py index c7466706b..08604bfde 100644 --- a/pex/pip/tool.py +++ b/pex/pip/tool.py @@ -247,7 +247,7 @@ class Pip(object): _PATCHES_PACKAGE_NAME = "_pex_pip_patches" _pip = attr.ib() # type: PipVenv - _version = attr.ib() # type: PipVersionValue + version = attr.ib() # type: PipVersionValue _pip_cache = attr.ib() # type: str @staticmethod @@ -259,22 +259,27 @@ def _calculate_resolver_version(package_index_configuration=None): else ResolverVersion.default() ) - @classmethod def _calculate_resolver_version_args( - cls, + self, interpreter, # type: PythonInterpreter package_index_configuration=None, # type: Optional[PackageIndexConfiguration] ): # type: (...) -> Iterator[str] - resolver_version = cls._calculate_resolver_version( + resolver_version = self._calculate_resolver_version( package_index_configuration=package_index_configuration ) # N.B.: The pip default resolver depends on the python it is invoked with. For Python 2.7 # Pip defaults to the legacy resolver and for Python 3 Pip defaults to the 2020 resolver. # Further, Pip warns when you do not use the default resolver version for the interpreter # in play. To both avoid warnings and set the correct resolver version, we need - # to only set the resolver version when it's not the default for the interpreter in play: - if resolver_version == ResolverVersion.PIP_2020 and interpreter.version[0] == 2: + # to only set the resolver version when it's not the default for the interpreter in play. + # As an added constraint, the 2020-resolver feature was removed and made default in the + # Pip 22.3 release. + if ( + resolver_version == ResolverVersion.PIP_2020 + and interpreter.version[0] == 2 + and self.version.version < PipVersion.v22_3.version + ): yield "--use-feature" yield "2020-resolver" elif resolver_version == ResolverVersion.PIP_LEGACY and interpreter.version[0] == 3: @@ -599,7 +604,7 @@ def _ensure_wheel_installed(self, package_index_configuration=None): if not atomic_dir.is_finalized(): self.spawn_download_distributions( download_dir=atomic_dir.work_dir, - requirements=[self._version.wheel_requirement], + requirements=[self.version.wheel_requirement], package_index_configuration=package_index_configuration, build_configuration=BuildConfiguration.create(allow_builds=False), ).wait() @@ -617,7 +622,7 @@ def spawn_build_wheels( ): # type: (...) -> Job - if self._version is PipVersion.VENDORED: + if self.version is PipVersion.VENDORED: self._ensure_wheel_installed(package_index_configuration=package_index_configuration) wheel_cmd = ["wheel", "--no-deps", "--wheel-dir", wheel_dir] diff --git a/pex/resolve/locked_resolve.py b/pex/resolve/locked_resolve.py index b6db419ca..ad346569c 100644 --- a/pex/resolve/locked_resolve.py +++ b/pex/resolve/locked_resolve.py @@ -159,12 +159,57 @@ def __lt__(self, other): @attr.s(frozen=True, order=False) class FileArtifact(Artifact): + @staticmethod + def is_zip_sdist(path): + # type: (str) -> bool + + # N.B.: Windows sdists traditionally were released in zip format. + return path.endswith(".zip") + + @staticmethod + def is_tar_sdist(path): + # type: (str) -> bool + + # N.B.: PEP-625 (https://peps.python.org/pep-0625/) says sdists must use .tar.gz, but we + # have a known example of tar.bz2 in the wild in python-constraint 1.4.0 on PyPI: + # https://pypi.org/project/python-constraint/1.4.0/#files + # This probably all stems from the legacy `python setup.py sdist` as last described here: + # https://docs.python.org/3.11/distutils/sourcedist.html + # There was a move to reject exotic formats in PEP-527 in 2016 and the historical sdist + # formats appear to be listed here: https://peps.python.org/pep-0527/#file-extensions + # A query on the PyPI dataset shows: + # + # SELECT + # REGEXP_EXTRACT(path, r'\.([^.]+|tar\.[^.]+|tar)$') as extension, + # count(*) as count + # FROM `bigquery-public-data.pypi.distribution_metadata` + # group by extension + # order by count desc + # + # | extension | count | + # |-----------|---------| + # | whl | 6332494 | + # * | tar.gz | 5283102 | + # | egg | 135940 | + # * | zip | 108532 | + # | exe | 18452 | + # * | tar.bz2 | 3857 | + # | msi | 625 | + # | rpm | 603 | + # * | tgz | 226 | + # | dmg | 47 | + # | deb | 36 | + # * | tar.zip | 2 | + # * | ZIP | 1 | + # + return path.endswith((".tar.gz", ".tgz", ".tar.bz2")) + filename = attr.ib() # type: str @property def is_source(self): # type: () -> bool - return self.filename.endswith((".sdist", ".tar.gz", ".tgz", ".tar.bz2", ".tbz2", ".zip")) + return self.is_tar_sdist(self.filename) or self.is_zip_sdist(self.filename) def parse_tags(self): # type: () -> Iterator[tags.Tag] diff --git a/pex/resolve/lockfile/create.py b/pex/resolve/lockfile/create.py index 7ecc4b694..20dc4952c 100644 --- a/pex/resolve/lockfile/create.py +++ b/pex/resolve/lockfile/create.py @@ -154,10 +154,10 @@ def _prepare_project_directory(build_request): return target, project extract_dir = os.path.join(safe_mkdtemp(), "project") - if project.endswith(".zip"): + if FileArtifact.is_zip_sdist(project): with open_zip(project) as zf: zf.extractall(extract_dir) - elif project.endswith(".tar.gz"): + elif FileArtifact.is_tar_sdist(project): with tarfile.open(project) as tf: tf.extractall(extract_dir) else: diff --git a/pex/version.py b/pex/version.py index f22599f25..bc50cbc38 100644 --- a/pex/version.py +++ b/pex/version.py @@ -1,4 +1,4 @@ # Copyright 2015 Pex project contributors. # Licensed under the Apache License, Version 2.0 (see LICENSE). -__version__ = "2.2.1" +__version__ = "2.2.2" diff --git a/tests/integration/test_issue_2739.py b/tests/integration/test_issue_2739.py new file mode 100644 index 000000000..8913e6290 --- /dev/null +++ b/tests/integration/test_issue_2739.py @@ -0,0 +1,60 @@ +# Copyright 2024 Pex project contributors. +# Licensed under the Apache License, Version 2.0 (see LICENSE). + +import os.path +import subprocess + +from pex.pep_440 import Version +from pex.pep_503 import ProjectName +from pex.resolve.locked_resolve import FileArtifact +from pex.resolve.lockfile import json_codec +from pex.resolve.resolved_requirement import Pin +from pex.typing import TYPE_CHECKING +from testing import run_pex_command +from testing.cli import run_pex3 + +if TYPE_CHECKING: + from typing import Any + + +def test_tar_bz2(tmpdir): + # type: (Any) -> None + + lock = os.path.join(str(tmpdir), "lock.json") + pex_root = os.path.join(str(tmpdir), "pex_root") + run_pex3( + "lock", + "create", + "--pex-root", + pex_root, + "python-constraint==1.4.0", + "-o", + lock, + "--indent", + "2", + ).assert_success() + + lock_file = json_codec.load(lock) + assert len(lock_file.locked_resolves) == 1 + + locked_resolve = lock_file.locked_resolves[0] + assert len(locked_resolve.locked_requirements) == 1 + + locked_requirement = locked_resolve.locked_requirements[0] + assert Pin(ProjectName("python-constraint"), Version("1.4.0")) == locked_requirement.pin + assert isinstance(locked_requirement.artifact, FileArtifact) + assert locked_requirement.artifact.is_source + assert locked_requirement.artifact.filename.endswith(".tar.bz2") + assert not locked_requirement.additional_artifacts + + pex = os.path.join(str(tmpdir), "pex") + run_pex_command( + args=["--pex-root", pex_root, "--runtime-pex-root", pex_root, "--lock", lock, "-o", pex] + ).assert_success() + + assert ( + b"1.4.0" + == subprocess.check_output( + args=[pex, "-c", "from constraint.version import __version__; print(__version__)"] + ).strip() + ) diff --git a/tests/test_pip.py b/tests/test_pip.py index ed0367f18..d4ff502ac 100644 --- a/tests/test_pip.py +++ b/tests/test_pip.py @@ -19,6 +19,7 @@ from pex.pip.version import PipVersion, PipVersionValue from pex.platforms import Platform from pex.resolve.configured_resolver import ConfiguredResolver +from pex.resolve.resolver_configuration import ResolverVersion from pex.targets import AbbreviatedPlatform, LocalInterpreter, Target from pex.typing import TYPE_CHECKING from pex.variables import ENV @@ -105,6 +106,17 @@ def test_no_duplicate_constraints_pex_warnings( ) +def package_index_configuration(pip_version): + # type: (PipVersionValue) -> Optional[PackageIndexConfiguration] + if pip_version is PipVersion.v23_2: + # N.B.: Pip 23.2 has a bug handling PEP-658 metadata with the legacy resolver; so we use the + # 2020 resolver to work around. See: https://github.com/pypa/pip/issues/12156 + return PackageIndexConfiguration.create( + pip_version, resolver_version=ResolverVersion.PIP_2020 + ) + return None + + @pytest.mark.skipif( not IS_LINUX or not any( @@ -126,18 +138,15 @@ def test_download_platform_issues_1355( pip = create_pip(py38, version=version) download_dir = os.path.join(str(tmpdir), "downloads") - def download_pyarrow( - target=None, # type: Optional[Target] - package_index_configuration=None, # type: Optional[PackageIndexConfiguration] - ): - # type: (...) -> Job + def download_pyarrow(target=None): + # type: (Optional[Target]) -> Job safe_rmtree(download_dir) return pip.spawn_download_distributions( download_dir=download_dir, requirements=["pyarrow==4.0.1"], transitive=False, target=target, - package_index_configuration=package_index_configuration, + package_index_configuration=package_index_configuration(pip.version), ) def assert_pyarrow_downloaded( @@ -175,6 +184,7 @@ def assert_download_platform_markers_issue_1366( requirements=["typing_extensions==3.7.4.2; python_version < '3.8'"], download_dir=download_dir, transitive=False, + package_index_configuration=package_index_configuration(pip.version), ).wait() assert ["typing_extensions-3.7.4.2-py2-none-any.whl"] == os.listdir(download_dir) @@ -256,6 +266,7 @@ def test_download_platform_markers_issue_1488( constraint_files=[constraints_file], download_dir=download_dir, transitive=True, + package_index_configuration=package_index_configuration(version), ).wait() assert (