From 71088d1dc545d9819cd785e94f2d2f552c62aa67 Mon Sep 17 00:00:00 2001 From: Donald Stufft Date: Fri, 31 May 2024 08:37:48 -0400 Subject: [PATCH] Move request sanitization and upload disallowed out of file_upload --- warehouse/forklift/legacy.py | 180 +++++++++++++++++++++-------------- 1 file changed, 108 insertions(+), 72 deletions(-) diff --git a/warehouse/forklift/legacy.py b/warehouse/forklift/legacy.py index e925db39489c..d156d13ea38c 100644 --- a/warehouse/forklift/legacy.py +++ b/warehouse/forklift/legacy.py @@ -38,6 +38,7 @@ HTTPPermanentRedirect, HTTPTooManyRequests, ) +from pyramid.request import Request from pyramid.view import view_config from sqlalchemy import and_, exists, func, orm from sqlalchemy.exc import MultipleResultsFound, NoResultFound @@ -378,87 +379,30 @@ def file_upload(request): # This is a list of warnings that we'll emit *IF* the request is successful. warnings = [] - # If we're in read-only mode, let upload clients know - if request.flags.enabled(AdminFlagValue.READ_ONLY): - raise _exc_with_message( - HTTPForbidden, "Read-only mode: Uploads are temporarily disabled." - ) - - if request.flags.enabled(AdminFlagValue.DISALLOW_NEW_UPLOAD): - raise _exc_with_message( - HTTPForbidden, - "New uploads are temporarily disabled. " - "See {projecthelp} for more information.".format( - projecthelp=request.help_url(_anchor="admin-intervention") - ), - ) - # Log an attempt to upload metrics = request.find_service(IMetricsService, context=None) metrics.increment("warehouse.upload.attempt") - # Before we do anything, if there isn't an authenticated identity with - # this request, then we'll go ahead and bomb out. - if request.identity is None: - raise _exc_with_message( - HTTPForbidden, - "Invalid or non-existent authentication information. " - "See {projecthelp} for more information.".format( - projecthelp=request.help_url(_anchor="invalid-auth") - ), - ) - - # These checks only make sense when our authenticated identity is a user, - # not a project identity (like OIDC-minted tokens.) - if request.user: - # Ensure that user has a verified, primary email address. This should both - # reduce the ease of spam account creation and activity, as well as act as - # a forcing function for https://github.com/pypa/warehouse/issues/3632. - # TODO: Once https://github.com/pypa/warehouse/issues/3632 has been solved, - # we might consider a different condition, possibly looking at - # User.is_active instead. - if not (request.user.primary_email and request.user.primary_email.verified): - raise _exc_with_message( - HTTPBadRequest, - ( - "User {!r} does not have a verified primary email address. " - "Please add a verified primary email before attempting to " - "upload to PyPI. See {project_help} for more information." - ).format( - request.user.username, - project_help=request.help_url(_anchor="verified-email"), - ), - ) from None - - # Do some cleanup of the various form fields - for key in list(request.POST): - value = request.POST.get(key) - if isinstance(value, str): - # distutils "helpfully" substitutes unknown, but "required" values - # with the string "UNKNOWN". This is basically never what anyone - # actually wants so we'll just go ahead and delete anything whose - # value is UNKNOWN. - if value.strip() == "UNKNOWN": - del request.POST[key] - - # Escape NUL characters, which psycopg doesn't like - if "\x00" in value: - request.POST[key] = value.replace("\x00", "\\x00") - # We require protocol_version 1, it's the only supported version however # passing a different version should raise an error. if request.POST.get("protocol_version", "1") != "1": raise _exc_with_message(HTTPBadRequest, "Unknown protocol version.") - # Check if any fields were supplied as a tuple and have become a - # FieldStorage. The 'content' field _should_ be a FieldStorage, however, - # and we don't care about the legacy gpg_signature field. - # ref: https://github.com/pypi/warehouse/issues/2185 - # ref: https://github.com/pypi/warehouse/issues/2491 - for field in set(request.POST) - {"content", "gpg_signature"}: - values = request.POST.getall(field) - if any(isinstance(value, FieldStorage) for value in values): - raise _exc_with_message(HTTPBadRequest, f"{field}: Should not be a tuple.") + # Sanitize the incoming request. There's a lot of garbage that gets sent to + # this view, which we'll sanitize to clean that up and/or fail early rather + # than getting failures deeper in the stack. + # + # NOTE: This method mutates the current request to do it's cleanup, but it + # can also return an error message if it could not sanitize. + if (reason := _sanitize_request(request)) is not None: + raise _exc_with_message(HTTPBadRequest, reason) + + # Do some basic check to make sure that we're allowing uploads, either + # generally or for the current identity. Wo do this first, before doing + # anything else so that we can bail out early if there's no chance we're + # going to accept the upload anyways. + if (reason := _upload_disallowed(request)) is not None: + raise _exc_with_message(HTTPForbidden, reason) # Validate and process the incoming file data. form = UploadForm(request.POST) @@ -1232,6 +1176,98 @@ def file_upload(request): return HTTPOk(body="\n".join(warnings)) +def _sanitize_request(request: Request) -> str | None: + """ + Sanitize a Pyramid request. + + There's a lot of garbage that gets sent to the upload view, which we'll + sanitize to clean that up and/or fail early rather than getting failures + deeper in the stack. + + This method will mutate the passed in request to sanitize it. + + :return: An error message if the request could not be sanitized, otherwise + None. + """ + # Do some cleanup of the various form fields, there's a lot of garbage that + # gets sent to this view, and this helps prevent issues later on. + for key in list(request.POST): + value = request.POST.get(key) + if isinstance(value, str): + # distutils "helpfully" substitutes unknown, but "required" values + # with the string "UNKNOWN". This is basically never what anyone + # actually wants so we'll just go ahead and delete anything whose + # value is UNKNOWN. + if value.strip() == "UNKNOWN": + del request.POST[key] + + # Escape NUL characters, which psycopg doesn't like + if "\x00" in value: + request.POST[key] = value.replace("\x00", "\\x00") + + # Check if any fields were supplied as a tuple and have become a + # FieldStorage. The 'content' field _should_ be a FieldStorage, however, + # and we don't care about the legacy gpg_signature field. + # ref: https://github.com/pypi/warehouse/issues/2185 + # ref: https://github.com/pypi/warehouse/issues/2491 + for field in set(request.POST) - {"content", "gpg_signature"}: + values = request.POST.getall(field) + if any(isinstance(value, FieldStorage) for value in values): + return f"{field}: Should not be a tuple." + + +def _upload_disallowed(request: Request) -> str | None: + """ + Ensures that we're currently allowing uploads, either generally or for the + current request.identity. + + :return: An error message if uploading is to be disallowed, otherwise None. + """ + # The very first thing we want to check, is whether we're currently in + # read only mode, because if we're in read only mode nothing else matters. + if request.flags.enabled(AdminFlagValue.READ_ONLY): + return "Read-only mode: Uploads are temporarily disabled." + + # After that, we want to check if we're disallowing new uploads, which is + # functionally the same as read only mode, but only for the upload endpoint. + if request.flags.enabled(AdminFlagValue.DISALLOW_NEW_UPLOAD): + return ( + "New uploads are temporarily disabled. " + "See {projecthelp} for more information.".format( + projecthelp=request.help_url(_anchor="admin-intervention") + ) + ) + + # Before we do anything else, if there isn't an authenticated identity with + # this request, then we'll go ahead and bomb out. + if request.identity is None: + return ( + "Invalid or non-existent authentication information. " + "See {projecthelp} for more information.".format( + projecthelp=request.help_url(_anchor="invalid-auth") + ), + ) + + # These checks only make sense when our authenticated identity is a user, + # not a project identity (like OIDC-minted tokens.) + if request.user: + # Ensure that user has a verified, primary email address. This should both + # reduce the ease of spam account creation and activity, as well as act as + # a forcing function for https://github.com/pypa/warehouse/issues/3632. + # TODO: Once https://github.com/pypa/warehouse/issues/3632 has been solved, + # we might consider a different condition, possibly looking at + # User.is_active instead. + if not (request.user.primary_email and request.user.primary_email.verified): + return ( + "User {!r} does not have a verified primary email address. " + "Please add a verified primary email before attempting to " + "upload to PyPI. See {project_help} for more information." + ).format( + request.user.username, + project_help=request.help_url(_anchor="verified-email"), + ) + + @view_config( route_name="forklift.legacy.submit", require_csrf=False, require_methods=["POST"] )