Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: azure openai provider #3

Merged
merged 2 commits into from
Nov 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 13 additions & 3 deletions config-example.yaml
Original file line number Diff line number Diff line change
@@ -1,15 +1,24 @@
providers:
- name: openai
- key: azure-openai
type: azure
api_key: "<your-azure-api-key>"
resource_name: "<your-resource-name>"
api_version: "<your-api-version>"
- key: openai
type: openai
api_key: "<your-openai-api-key>"

models:
- name: gpt-4o-openai
- key: gpt-4o-openai
type: gpt-4o
provider: openai
- key: gpt-4o-azure
type: gpt-4o
provider: azure-openai
deployment: "<your-deployment>"

pipelines:
- name: default
- key: default
type: chat
plugins:
- logging:
Expand All @@ -21,4 +30,5 @@ pipelines:
api_key: "<your-traceloop-api-key>"
- model-router:
models:
- gpt-4o-azure
- gpt-4o-openai
12 changes: 9 additions & 3 deletions src/ai_models/instance.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use crate::config::models::ModelConfig;
use crate::models::chat::{ChatCompletionRequest, ChatCompletionResponse};
use crate::models::completion::{CompletionRequest, CompletionResponse};
use crate::models::embeddings::{EmbeddingsRequest, EmbeddingsResponse};
Expand All @@ -10,6 +11,7 @@ pub struct ModelInstance {
pub name: String,
pub model_type: String,
pub provider: Arc<dyn Provider>,
pub config: ModelConfig,
}

impl ModelInstance {
Expand All @@ -19,7 +21,9 @@ impl ModelInstance {
mut payload: ChatCompletionRequest,
) -> Result<ChatCompletionResponse, StatusCode> {
payload.model = self.model_type.clone();
self.provider.chat_completions(state, payload).await
self.provider
.chat_completions(state, payload, &self.config)
.await
}

pub async fn completions(
Expand All @@ -29,7 +33,9 @@ impl ModelInstance {
) -> Result<CompletionResponse, StatusCode> {
payload.model = self.model_type.clone();

self.provider.completions(state, payload).await
self.provider
.completions(state, payload, &self.config)
.await
}

pub async fn embeddings(
Expand All @@ -38,6 +44,6 @@ impl ModelInstance {
mut payload: EmbeddingsRequest,
) -> Result<EmbeddingsResponse, StatusCode> {
payload.model = self.model_type.clone();
self.provider.embeddings(state, payload).await
self.provider.embeddings(state, payload, &self.config).await
}
}
7 changes: 4 additions & 3 deletions src/ai_models/registry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use std::collections::HashMap;
use std::sync::Arc;

use super::instance::ModelInstance;
use crate::config::models::Model as ModelConfig;
use crate::config::models::ModelConfig;
use crate::providers::registry::ProviderRegistry;

pub struct ModelRegistry {
Expand All @@ -20,12 +20,13 @@ impl ModelRegistry {
for config in model_configs {
if let Some(provider) = provider_registry.get(&config.provider) {
let model = Arc::new(ModelInstance {
name: config.name.clone(),
name: config.key.clone(),
model_type: config.r#type.clone(),
provider,
config: config.clone(),
});

models.insert(config.name.clone(), model);
models.insert(config.key.clone(), model);
}
}

Expand Down
15 changes: 9 additions & 6 deletions src/config/models.rs
Original file line number Diff line number Diff line change
@@ -1,27 +1,30 @@
use serde::{Deserialize, Serialize};
use std::collections::HashMap;

use serde::{Deserialize, Serialize};

#[derive(Debug, Deserialize, Serialize, Clone)]
pub struct Config {
pub providers: Vec<Provider>,
pub models: Vec<Model>,
pub models: Vec<ModelConfig>,
pub pipelines: Vec<Pipeline>,
}

#[derive(Debug, Deserialize, Serialize, Clone)]
pub struct Provider {
pub name: String,
pub key: String,
pub r#type: String,
pub api_key: String,
#[serde(flatten)]
pub additional_config: HashMap<String, String>,
pub params: HashMap<String, String>,
}

#[derive(Debug, Deserialize, Serialize, Clone)]
pub struct Model {
pub name: String,
pub struct ModelConfig {
pub key: String,
pub r#type: String,
pub provider: String,
#[serde(flatten)]
pub params: HashMap<String, String>,
}

#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
Expand Down
2 changes: 1 addition & 1 deletion src/handlers/chat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ pub async fn completions(
Json(payload): Json<ChatCompletionRequest>,
) -> Result<Json<ChatCompletionResponse>, StatusCode> {
for model in state.config.models.iter() {
if let Some(model) = state.model_registry.get(&model.name) {
if let Some(model) = state.model_registry.get(&model.key) {
let response = model
.chat_completions(state.clone(), payload.clone())
.await?;
Expand Down
2 changes: 1 addition & 1 deletion src/handlers/completion.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ pub async fn completions(
Json(payload): Json<CompletionRequest>,
) -> Result<Json<CompletionResponse>, StatusCode> {
for model in state.config.models.iter() {
if let Some(model) = state.model_registry.get(&model.name) {
if let Some(model) = state.model_registry.get(&model.key) {
let response = model.completions(state.clone(), payload.clone()).await?;
return Ok(Json(response));
}
Expand Down
2 changes: 1 addition & 1 deletion src/handlers/embeddings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ pub async fn embeddings(
Json(payload): Json<EmbeddingsRequest>,
) -> Result<Json<EmbeddingsResponse>, StatusCode> {
for model in state.config.models.iter() {
if let Some(model) = state.model_registry.get(&model.name) {
if let Some(model) = state.model_registry.get(&model.key) {
let response = model.embeddings(state.clone(), payload.clone()).await?;
return Ok(Json(response));
}
Expand Down
9 changes: 5 additions & 4 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,11 @@ async fn main() -> Result<(), anyhow::Error> {

info!("Starting the application...");

let config =
load_config("config.yaml").map_err(|_| anyhow::anyhow!("Failed to load configuration"))?;
let state =
Arc::new(AppState::new(config).map_err(|_| anyhow::anyhow!("Failed to create app state"))?);
let config = load_config("config.yaml")
.map_err(|e| anyhow::anyhow!("Failed to load configuration: {}", e))?;
let state = Arc::new(
AppState::new(config).map_err(|e| anyhow::anyhow!("Failed to create app state: {}", e))?,
);
let app = routes::create_router(state);
let port: String = std::env::var("PORT").unwrap_or("3000".to_string());
let listener = tokio::net::TcpListener::bind(format!("0.0.0.0:{}", port))
Expand Down
9 changes: 6 additions & 3 deletions src/providers/anthropic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use axum::http::StatusCode;
use std::sync::Arc;

use super::provider::Provider;
use crate::config::models::Provider as ProviderConfig;
use crate::config::models::{ModelConfig, Provider as ProviderConfig};
use crate::models::chat::{ChatCompletionRequest, ChatCompletionResponse};
use crate::models::common::Usage;
use crate::models::completion::{CompletionChoice, CompletionRequest, CompletionResponse};
Expand All @@ -26,8 +26,8 @@ impl Provider for AnthropicProvider {
}
}

fn name(&self) -> String {
self.config.name.clone()
fn key(&self) -> String {
self.config.key.clone()
}

fn r#type(&self) -> String {
Expand All @@ -38,6 +38,7 @@ impl Provider for AnthropicProvider {
&self,
state: Arc<AppState>,
payload: ChatCompletionRequest,
_model_config: &ModelConfig,
) -> Result<ChatCompletionResponse, StatusCode> {
let response = state
.http_client
Expand All @@ -64,6 +65,7 @@ impl Provider for AnthropicProvider {
&self,
state: Arc<AppState>,
payload: CompletionRequest,
_model_config: &ModelConfig,
) -> Result<CompletionResponse, StatusCode> {
let anthropic_payload = serde_json::json!({
"model": payload.model,
Expand Down Expand Up @@ -125,6 +127,7 @@ impl Provider for AnthropicProvider {
&self,
state: Arc<AppState>,
payload: EmbeddingsRequest,
_model_config: &ModelConfig,
) -> Result<EmbeddingsResponse, StatusCode> {
let anthropic_payload = match &payload.input {
EmbeddingsInput::Single(text) => serde_json::json!({
Expand Down
149 changes: 149 additions & 0 deletions src/providers/azure.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
use axum::async_trait;
use axum::http::StatusCode;
use std::sync::Arc;

use super::provider::Provider;
use crate::config::models::{ModelConfig, Provider as ProviderConfig};
use crate::models::chat::{ChatCompletionRequest, ChatCompletionResponse};
use crate::models::completion::{CompletionRequest, CompletionResponse};
use crate::models::embeddings::{EmbeddingsRequest, EmbeddingsResponse};
use crate::state::AppState;

pub struct AzureProvider {
config: ProviderConfig,
}

impl AzureProvider {
fn endpoint(&self) -> String {
format!(
"https://{}.openai.azure.com/openai/deployments",
self.config.params.get("resource_name").unwrap(),
)
}

fn api_version(&self) -> String {
self.config.params.get("api_version").unwrap().clone()
}
}

#[async_trait]
impl Provider for AzureProvider {
fn new(config: &ProviderConfig) -> Self {
Self {
config: config.clone(),
}
}

fn key(&self) -> String {
self.config.key.clone()
}

fn r#type(&self) -> String {
"azure".to_string()
}

async fn chat_completions(
&self,
state: Arc<AppState>,
payload: ChatCompletionRequest,
model_config: &ModelConfig,
) -> Result<ChatCompletionResponse, StatusCode> {
let deployment = model_config.params.get("deployment").unwrap();
let api_version = self.api_version();
let url = format!(
"{}/{}/chat/completions?api-version={}",
self.endpoint(),
deployment,
api_version
);

let response = state
.http_client
.post(&url)
.header("api-key", &self.config.api_key)
.json(&payload)
.send()
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;

let status = response.status();
if status.is_success() {
response
.json()
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)
} else {
Err(StatusCode::from_u16(status.as_u16()).unwrap_or(StatusCode::INTERNAL_SERVER_ERROR))
}
}

async fn completions(
&self,
state: Arc<AppState>,
payload: CompletionRequest,
model_config: &ModelConfig,
) -> Result<CompletionResponse, StatusCode> {
let deployment = model_config.params.get("deployment").unwrap();
let api_version = self.api_version();
let url = format!(
"{}/openai/deployments/{}/completions?api-version={}",
self.endpoint(),
deployment,
api_version
);

let response = state
.http_client
.post(&url)
.header("api-key", &self.config.api_key)
.json(&payload)
.send()
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;

let status = response.status();
if status.is_success() {
response
.json()
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)
} else {
Err(StatusCode::from_u16(status.as_u16()).unwrap_or(StatusCode::INTERNAL_SERVER_ERROR))
}
}

async fn embeddings(
&self,
state: Arc<AppState>,
payload: EmbeddingsRequest,
model_config: &ModelConfig,
) -> Result<EmbeddingsResponse, StatusCode> {
let deployment = model_config.params.get("deployment").unwrap();
let api_version = self.api_version();
let url = format!(
"{}/openai/deployments/{}/embeddings?api-version={}",
self.endpoint(),
deployment,
api_version
);

let response = state
.http_client
.post(&url)
.header("api-key", &self.config.api_key)
.json(&payload)
.send()
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;

let status = response.status();
if status.is_success() {
response
.json()
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)
} else {
Err(StatusCode::from_u16(status.as_u16()).unwrap_or(StatusCode::INTERNAL_SERVER_ERROR))
}
}
}
1 change: 1 addition & 0 deletions src/providers/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
pub mod anthropic;
pub mod azure;
pub mod openai;
pub mod provider;
pub mod registry;
Loading