From ed495a7a91584a95ec8336f0390d653e5af9a4f6 Mon Sep 17 00:00:00 2001 From: nullswan Date: Tue, 29 Oct 2024 07:41:18 +0100 Subject: [PATCH] feat(openaiprovider): implement text-to-speech provider --- .../providers/openaiprovider/texttospeech.go | 60 +++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 internal/providers/openaiprovider/texttospeech.go diff --git a/internal/providers/openaiprovider/texttospeech.go b/internal/providers/openaiprovider/texttospeech.go new file mode 100644 index 0000000..2798896 --- /dev/null +++ b/internal/providers/openaiprovider/texttospeech.go @@ -0,0 +1,60 @@ +package openaiprovider + +import ( + "context" + "fmt" + "io" + + baseprovider "github.com/nullswan/nomi/internal/providers/base" + "github.com/sashabaranov/go-openai" +) + +const ( + OpenAITextToSpeechDefaultModel = openai.TTSModel1 +) + +type TextToSpeechProvider struct { + client *openai.Client +} + +func NewTextToSpeechProvider( + config oaiProviderConfig, +) (baseprovider.TextToSpeechProvider, error) { + p := &TextToSpeechProvider{ + client: openai.NewClient(config.apiKey), + } + + return p, nil +} + +func (p TextToSpeechProvider) Close() error { + return nil +} + +func (p TextToSpeechProvider) GenerateSpeech( + ctx context.Context, + message string, +) ([]byte, error) { + resp, err := p.client.CreateSpeech(ctx, openai.CreateSpeechRequest{ + Model: OpenAITextToSpeechDefaultModel, + Voice: openai.VoiceAlloy, + Input: message, + }) + if err != nil { + return nil, fmt.Errorf("error creating speech: %w", err) + } + + defer resp.Close() + + buf, err := io.ReadAll(resp) + if err != nil { + return nil, fmt.Errorf("error reading speech response: %w", err) + } + + return buf, nil +} + +// For now, we are always using the default model +func (p TextToSpeechProvider) GetModel() string { + return string(OpenAITextToSpeechDefaultModel) +}