Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Inference API] Default eis endpoint #119694

Draft
wants to merge 39 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
0fb460a
Starting completion model
jonathan-buttner Dec 6, 2024
8ef932a
Adding model
jonathan-buttner Dec 6, 2024
bb97600
initial implementation of request and response handling, manager, and…
maxhniebergall Dec 9, 2024
31f3f2c
Working response from openai
jonathan-buttner Dec 9, 2024
7213932
Update docs/changelog/118301.yaml
jonathan-buttner Dec 9, 2024
ae9dbf7
Fixing comment
jonathan-buttner Dec 10, 2024
147ba77
Adding some initial tests
jonathan-buttner Dec 11, 2024
9d4e02e
Moving tests around
jonathan-buttner Dec 11, 2024
9f78b40
Address some TODOs
jaybcee Dec 18, 2024
b09b3f5
Remove a TODO
jaybcee Dec 18, 2024
b92724b
[CI] Auto commit changes from spotless
elasticsearchmachine Dec 18, 2024
25ab348
Delete docs/changelog/118301.yaml
jaybcee Dec 19, 2024
7168dc6
Rename EISUnifiedChatCompletionResponseHandler
jaybcee Dec 19, 2024
91daa01
Renames to ElasticInferenceServiceUnifiedCompletionRequestManager
jaybcee Dec 19, 2024
7842b6c
Renames EISUnifiedChatCompletionRequest
jaybcee Dec 19, 2024
7f44d04
Renames and comments
jaybcee Dec 19, 2024
afc8ebc
propagateTraceContext extraction
jaybcee Dec 19, 2024
346ceba
Clean up trace
jaybcee Dec 19, 2024
a206fab
Address comments
jaybcee Dec 23, 2024
cef606d
Update x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/…
jaybcee Jan 7, 2025
a5e91d9
Update x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/…
jaybcee Jan 7, 2025
6933c49
[CI] Auto commit changes from spotless
elasticsearchmachine Jan 7, 2025
31fe29c
Address comments
jaybcee Jan 7, 2025
8ebe833
add default endpoint for EIS completion
maxhniebergall Jan 7, 2025
1be3aca
Avoid using immutable map for constructing EISCompletionModel
maxhniebergall Jan 7, 2025
0278776
Update docs/changelog/119694.yaml
maxhniebergall Jan 7, 2025
eeb12b3
actually include service settings
maxhniebergall Jan 7, 2025
a619561
[CI] Auto commit changes from spotless
elasticsearchmachine Jan 7, 2025
f9e6b7c
Update changemessage
maxhniebergall Jan 7, 2025
59c8791
match ElasticsearchInternalService implementation of defaults
maxhniebergall Jan 8, 2025
5d430eb
Update tests
maxhniebergall Jan 8, 2025
ad8c7ab
[CI] Auto commit changes from spotless
elasticsearchmachine Jan 8, 2025
84b654c
[CI] Auto commit changes from spotless
elasticsearchmachine Jan 8, 2025
1687bba
update model name constant
maxhniebergall Jan 9, 2025
ef802b0
[CI] Auto commit changes from spotless
elasticsearchmachine Jan 9, 2025
986654b
fix merge conflicts
maxhniebergall Jan 9, 2025
1d79df7
remove uncessary comment
maxhniebergall Jan 9, 2025
fa74489
remove todo
maxhniebergall Jan 9, 2025
5b1a509
Replace local constant with class variable
maxhniebergall Jan 9, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/changelog/119694.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 119694
summary: "[Inference API] Add default endpoint for completion in elastic inference service"
area: Machine Learning
type: enhancement
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
import java.util.stream.Stream;

import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.empty;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.equalToIgnoringCase;
import static org.hamcrest.Matchers.hasSize;
Expand All @@ -58,7 +57,7 @@ public void testCRUD() throws IOException {
}

var getAllModels = getAllModels();
int numModels = 12;
int numModels = 13;
assertThat(getAllModels, hasSize(numModels));

var getSparseModels = getModels("_all", TaskType.SPARSE_EMBEDDING);
Expand Down Expand Up @@ -543,8 +542,8 @@ private static String expectedResult(String input) {
}
}

public void testGetZeroModels() throws IOException {
public void testGetCompletionModels() throws IOException {
var models = getModels("_all", TaskType.COMPLETION);
assertThat(models, empty());
assertEquals(models.size(), 1);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,10 @@ public class ElasticInferenceService extends SenderService {
private static final EnumSet<TaskType> supportedTaskTypes = EnumSet.of(TaskType.SPARSE_EMBEDDING, TaskType.COMPLETION);
private static final String SERVICE_NAME = "Elastic";

public static final String DEFAULT_EIS_COMPLETION_ENDPOINT_ID_V1 = ".eis-alpha-1";

public static final List<String> DEFAULT_EIS_ENDPOINT_IDS = List.of(DEFAULT_EIS_COMPLETION_ENDPOINT_ID_V1);

public ElasticInferenceService(
HttpRequestSender.Factory factory,
ServiceComponents serviceComponents,
Expand Down Expand Up @@ -175,6 +179,17 @@ public void parseRequestConfig(
Map<String, Object> config,
ActionListener<Model> parsedModelListener
) {
if (DEFAULT_EIS_ENDPOINT_IDS.contains(inferenceEntityId)) {
parsedModelListener.onFailure(
new ElasticsearchStatusException(
"[{}] is a reserved inference Id. Cannot create a new inference endpoint with a reserved Id",
RestStatus.BAD_REQUEST,
inferenceEntityId
)
);
return;
}

try {
Map<String, Object> serviceSettingsMap = removeFromMapOrThrowIfNull(config, ModelConfigurations.SERVICE_SETTINGS);
Map<String, Object> taskSettingsMap = removeFromMapOrDefaultEmpty(config, ModelConfigurations.TASK_SETTINGS);
Expand Down Expand Up @@ -210,6 +225,32 @@ public EnumSet<TaskType> supportedTaskTypes() {
return supportedTaskTypes;
}

@Override
public List<DefaultConfigId> defaultConfigIds() {
return List.of(new DefaultConfigId(DEFAULT_EIS_COMPLETION_ENDPOINT_ID_V1, TaskType.COMPLETION, this));
}

@Override
public void defaultConfigs(ActionListener<List<Model>> defaultsListener) {
defaultsListener.onResponse(List.of(firstDefaultCompletionModel()));
}

private ElasticInferenceServiceCompletionModel firstDefaultCompletionModel() {
var serviceSettings = new HashMap<String, Object>(1);
serviceSettings.put(MODEL_ID, "elastic-model"); // TODO

return new ElasticInferenceServiceCompletionModel(
DEFAULT_EIS_COMPLETION_ENDPOINT_ID_V1,
TaskType.COMPLETION,
NAME,
serviceSettings,
null,
null,
elasticInferenceServiceComponents,
ConfigurationParseContext.PERSISTENT
);
}

private static ElasticInferenceServiceModel createModel(
String inferenceEntityId,
TaskType taskType,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ public class ElasticInferenceServiceSettings {
public ElasticInferenceServiceSettings(Settings settings) {
eisGatewayUrl = EIS_GATEWAY_URL.get(settings);
elasticInferenceServiceUrl = ELASTIC_INFERENCE_SERVICE_URL.get(settings);

}

public static List<Setting<?>> getSettingsDefinitions() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ public ElasticInferenceServiceCompletionModel(

}

ElasticInferenceServiceCompletionModel(
public ElasticInferenceServiceCompletionModel(
String inferenceEntityId,
TaskType taskType,
String service,
Expand Down
Loading