Skip to content

Commit

Permalink
Limit the maximum retry time for query streaming
Browse files Browse the repository at this point in the history
If after a couple of minutes the server is still not able to handle our query, we should just bail so that a Butler server failure doesn't cascade indefinitely into other services trying to use the Butler.
  • Loading branch information
dhirving committed Dec 5, 2024
1 parent 3bef2a9 commit 8ea635f
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 4 deletions.
8 changes: 6 additions & 2 deletions python/lsst/daf/butler/remote_butler/_http_connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,13 +230,17 @@ def _send_request_with_stream_response(self, request: _Request) -> Iterator[http
raise ButlerServerError(request.request_id) from e

def _send_with_retries(self, request: _Request, stream: bool) -> httpx.Response:
max_retry_time_seconds = 120
start_time = time.time()
while True:
response = self._client.send(request.request, stream=stream)
retry = _needs_retry(response)
if retry.retry:
time_remaining = max_retry_time_seconds - (time.time() - start_time)
if retry.retry and time_remaining > 0:
if stream:
response.close()
time.sleep(retry.delay_seconds)
sleep_time = min(time_remaining, retry.delay_seconds)
time.sleep(sleep_time)
else:
return response

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -123,8 +123,7 @@ async def execute_streaming_query(query: StreamingQuery) -> StreamingResponse:
await _block_retry_for_unit_test()
raise HTTPException(
status_code=503, # service temporarily unavailable
detail="The Butler Server is currently overloaded with requests."
f" Try again in {_QUERY_RETRY_SECONDS} seconds.",
detail="The Butler Server is currently overloaded with requests.",
headers={"retry-after": str(_QUERY_RETRY_SECONDS)},
)

Expand Down

0 comments on commit 8ea635f

Please sign in to comment.