From 417ddf4d924c6e46092aded344a844e578420cea Mon Sep 17 00:00:00 2001 From: Naman Nandan Date: Fri, 26 Jul 2024 12:27:52 -0700 Subject: [PATCH 1/3] Add Sequence Status to describe model API --- .../http/messages/DescribeModelResponse.java | 21 +++++++++++++++++++ .../java/org/pytorch/serve/util/ApiUtils.java | 5 +++++ .../java/org/pytorch/serve/wlm/Model.java | 8 +++++++ 3 files changed, 34 insertions(+) diff --git a/frontend/server/src/main/java/org/pytorch/serve/http/messages/DescribeModelResponse.java b/frontend/server/src/main/java/org/pytorch/serve/http/messages/DescribeModelResponse.java index 00b5a62142..3ee6e55475 100644 --- a/frontend/server/src/main/java/org/pytorch/serve/http/messages/DescribeModelResponse.java +++ b/frontend/server/src/main/java/org/pytorch/serve/http/messages/DescribeModelResponse.java @@ -42,6 +42,7 @@ public class DescribeModelResponse { private List workers; private Metrics metrics; private JobQueueStatus jobQueueStatus; + private SequenceStatus sequenceStatus; private JsonObject customizedMetadata; public DescribeModelResponse() { @@ -299,6 +300,14 @@ public void setJobQueueStatus(JobQueueStatus jobQueueStatus) { this.jobQueueStatus = jobQueueStatus; } + public SequenceStatus getSequenceStatus() { + return sequenceStatus; + } + + public void setSequenceStatus(SequenceStatus sequenceStatus) { + this.sequenceStatus = sequenceStatus; + } + public void setCustomizedMetadata(byte[] customizedMetadata) { String stringMetadata = new String(customizedMetadata, Charset.forName("UTF-8")); try { @@ -434,4 +443,16 @@ public void setPendingRequests(int pendingRequests) { this.pendingRequests = pendingRequests; } } + + public static final class SequenceStatus { + private int sequenceCount; + + public int getSequenceCount() { + return sequenceCount; + } + + public void setSequenceCount(int sequenceCount) { + this.sequenceCount = sequenceCount; + } + } } diff --git a/frontend/server/src/main/java/org/pytorch/serve/util/ApiUtils.java b/frontend/server/src/main/java/org/pytorch/serve/util/ApiUtils.java index 30ce8b156d..2ac0cb6b3c 100644 --- a/frontend/server/src/main/java/org/pytorch/serve/util/ApiUtils.java +++ b/frontend/server/src/main/java/org/pytorch/serve/util/ApiUtils.java @@ -437,6 +437,11 @@ private static DescribeModelResponse createModelResponse( jobQueueStatus.setPendingRequests(model.getPendingRequestsInJobQueue()); resp.setJobQueueStatus(jobQueueStatus); + DescribeModelResponse.SequenceStatus sequenceStatus = + new DescribeModelResponse.SequenceStatus(); + sequenceStatus.setSequenceCount(model.getJobGroupCount()); + resp.setSequenceStatus(sequenceStatus); + return resp; } diff --git a/frontend/server/src/main/java/org/pytorch/serve/wlm/Model.java b/frontend/server/src/main/java/org/pytorch/serve/wlm/Model.java index 5debd238d0..40a5a3b864 100644 --- a/frontend/server/src/main/java/org/pytorch/serve/wlm/Model.java +++ b/frontend/server/src/main/java/org/pytorch/serve/wlm/Model.java @@ -638,6 +638,14 @@ public LinkedBlockingDeque getPendingJobGroups() { return pendingJobGroups; } + public int getJobGroupCount() { + if (jobGroups != null) { + return jobGroups.size(); + } + + return 0; + } + public JobGroup getJobGroup(String groupId) { return jobGroups.get(groupId); } From c72d5c31b53283f34d8a72fb0a53df4a9419ec9d Mon Sep 17 00:00:00 2001 From: Naman Nandan Date: Fri, 26 Jul 2024 15:19:22 -0700 Subject: [PATCH 2/3] Enable cleaning up job group irrespective of prediction status --- .../org/pytorch/serve/wlm/SequenceContinuousBatching.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/frontend/server/src/main/java/org/pytorch/serve/wlm/SequenceContinuousBatching.java b/frontend/server/src/main/java/org/pytorch/serve/wlm/SequenceContinuousBatching.java index 3284e8606a..52dc2b5173 100644 --- a/frontend/server/src/main/java/org/pytorch/serve/wlm/SequenceContinuousBatching.java +++ b/frontend/server/src/main/java/org/pytorch/serve/wlm/SequenceContinuousBatching.java @@ -113,7 +113,6 @@ public boolean sendResponse(ModelWorkerResponse message) { } else { job.getPayload().setCachedInBackend(true); } - setJobGroupFinished(prediction); } } else { for (Map.Entry j : jobs.entrySet()) { @@ -134,6 +133,12 @@ public boolean sendResponse(ModelWorkerResponse message) { cleanJobs(); } + if (!message.getPredictions().isEmpty()) { + for (Predictions prediction : message.getPredictions()) { + setJobGroupFinished(prediction); + } + } + resetCurrentJobGroupIds(); return true; From b4df09a183f3868c49d8a751452cc5bc0fe26a27 Mon Sep 17 00:00:00 2001 From: Naman Nandan Date: Fri, 26 Jul 2024 16:16:53 -0700 Subject: [PATCH 3/3] Revert "Enable cleaning up job group irrespective of prediction status" This reverts commit c72d5c31b53283f34d8a72fb0a53df4a9419ec9d. --- .../org/pytorch/serve/wlm/SequenceContinuousBatching.java | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/frontend/server/src/main/java/org/pytorch/serve/wlm/SequenceContinuousBatching.java b/frontend/server/src/main/java/org/pytorch/serve/wlm/SequenceContinuousBatching.java index 52dc2b5173..3284e8606a 100644 --- a/frontend/server/src/main/java/org/pytorch/serve/wlm/SequenceContinuousBatching.java +++ b/frontend/server/src/main/java/org/pytorch/serve/wlm/SequenceContinuousBatching.java @@ -113,6 +113,7 @@ public boolean sendResponse(ModelWorkerResponse message) { } else { job.getPayload().setCachedInBackend(true); } + setJobGroupFinished(prediction); } } else { for (Map.Entry j : jobs.entrySet()) { @@ -133,12 +134,6 @@ public boolean sendResponse(ModelWorkerResponse message) { cleanJobs(); } - if (!message.getPredictions().isEmpty()) { - for (Predictions prediction : message.getPredictions()) { - setJobGroupFinished(prediction); - } - } - resetCurrentJobGroupIds(); return true;