From c3b31a37c526f7a0e2c771aff5691e795c2a7880 Mon Sep 17 00:00:00 2001
From: Philipp Zagar <zagar@stanford.edu>
Date: Mon, 8 Jan 2024 07:48:11 -0800
Subject: [PATCH] SpeziLLM Remote OpenAI integration (#41)

# SpeziLLM Remote OpenAI integration

## :recycle: Current situation & Problem
Currently, the module provides basic OpenAI integration, however, not in
the SpeziLLM ecosystem.


## :gear: Release Notes
- Add the `SpeziLLMOpenAI` target that provides an OpenAI integration on
the basis of the SpeziLLM ecosystem.


## :books: Documentation
Added in-line docs + DocC articles + README


## :white_check_mark: Testing
Wrote basic UI test cases, manual testing


## :pencil: Code of Conduct & Contributing Guidelines

By submitting creating this pull request, you agree to follow our [Code
of
Conduct](https://github.com/StanfordSpezi/.github/blob/main/CODE_OF_CONDUCT.md)
and [Contributing
Guidelines](https://github.com/StanfordSpezi/.github/blob/main/CONTRIBUTING.md):
- [X] I agree to follow the [Code of
Conduct](https://github.com/StanfordSpezi/.github/blob/main/CODE_OF_CONDUCT.md)
and [Contributing
Guidelines](https://github.com/StanfordSpezi/.github/blob/main/CONTRIBUTING.md).
---
 CITATION.cff                                  |   6 +-
 Package.swift                                 |   6 +-
 README.md                                     |  96 ++++----
 Sources/SpeziLLM/Helpers/Chat+Append.swift    |  47 ++++
 Sources/SpeziLLM/LLM.swift                    |  47 ++--
 Sources/SpeziLLM/LLMError.swift               |  40 ++--
 Sources/SpeziLLM/LLMHostingType.swift         |   2 +
 Sources/SpeziLLM/LLMRunner.swift              |  66 ++++--
 Sources/SpeziLLM/LLMState.swift               |  15 +-
 Sources/SpeziLLM/Mock/LLMMock.swift           |   9 +-
 .../SpeziLLM/Resources/Localizable.xcstrings  |  83 +------
 Sources/SpeziLLM/SpeziLLM.docc/SpeziLLM.md    |  35 +--
 .../SpeziLLM/Tasks/LLMGenerationTask.swift    |  47 ++--
 .../SpeziLLM/Tasks/LLMRunnerSetupTask.swift   |   3 +-
 .../Tasks/LLMRunnerSetupTaskBuilder.swift     |  46 +---
 Sources/SpeziLLM/Views/LLMChatView.swift      |  53 ++---
 ....swift => LLMLocalContextParameters.swift} |   6 +-
 ...ameters.swift => LLMLocalParameters.swift} |  12 +-
 ...swift => LLMLocalSamplingParameters.swift} |   6 +-
 .../SpeziLLMLocal/LLMLlama+Generation.swift   |  61 ++---
 Sources/SpeziLLMLocal/LLMLlama+Helpers.swift  | 106 +++++----
 Sources/SpeziLLMLocal/LLMLlama.swift          |  53 +++--
 Sources/SpeziLLMLocal/LLMLlamaError.swift     |  71 ++++++
 .../Resources/Localizable.xcstrings           | 150 ++++++++++++
 .../SpeziLLMLocal.docc/SpeziLLMLocal.md       |  23 +-
 .../LLMOpenAIModelParameters.swift            |  76 ++++++
 .../Configuration/LLMOpenAIParameters.swift   |  45 ++++
 .../SpeziLLMOpenAI/Helpers/Chat+OpenAI.swift  |  23 ++
 .../LLMOpenAIConstants.swift}                 |   4 +-
 .../LLMOpenAI+Configuration.swift             |  43 ++++
 Sources/SpeziLLMOpenAI/LLMOpenAI.swift        | 189 +++++++++++++++
 Sources/SpeziLLMOpenAI/LLMOpenAIError.swift   | 108 +++++++++
 .../SpeziLLMOpenAI/LLMOpenAIRunnerSetup.swift | 119 ++++++++++
 .../SpeziLLMOpenAI/LLMOpenAITokenSaver.swift  | 106 +++++++++
 .../LLMOpenAIAPITokenOnboardingStep.swift}    |  26 +--
 .../LLMOpenAIModelOnboardingStep.swift}       |  46 ++--
 .../OpenAIChatStreamResult+Sendable.swift     |   9 -
 Sources/SpeziLLMOpenAI/OpenAIError.swift      |  14 --
 Sources/SpeziLLMOpenAI/OpenAIModel.swift      | 129 ----------
 Sources/SpeziLLMOpenAI/OpenAIModule.swift     |  44 ----
 .../Resources/Localizable.xcstrings           | 220 ++++++++++++++++++
 ...ng => LLMOpenAIAPITokenOnboardingStep.png} | Bin
 ...MOpenAIAPITokenOnboardingStep.png.license} |   0
 ... LLMOpenAIAPITokenOnboardingStep~dark.png} | Bin
 ...AIAPITokenOnboardingStep~dark.png.license} |   0
 ...p.png => LLMOpenAIModelOnboardingStep.png} | Bin
 ... LLMOpenAIModelOnboardingStep.png.license} |   0
 ... => LLMOpenAIModelOnboardingStep~dark.png} | Bin
 ...penAIModelOnboardingStep~dark.png.license} |   0
 .../SpeziLLMOpenAI.docc/SpeziLLMOpenAI.md     | 166 ++++++-------
 Tests/UITests/TestApp/FeatureFlags.swift      |   4 +-
 .../LLMLocal/LLMLocalChatTestView.swift       |  12 +-
 .../Onboarding/LLMLocalOnboardingFlow.swift   |   2 +-
 .../LLMOpenAI/LLMOpenAIChatTestView.swift     |  29 ++-
 .../LLMOpenAI/LLMOpenAIOnboardingView.swift   |  25 +-
 .../Onboarding/LLMOpenAIModelOnboarding.swift |  23 ++
 .../Onboarding/LLMOpenAITokenOnboarding.swift |  23 ++
 Tests/UITests/TestApp/TestAppDelegate.swift   |   2 +-
 .../TestAppLLMLocalUITests.swift              |   6 +-
 .../TestAppLLMOpenAIUITests.swift             |  22 +-
 .../UITests/UITests.xcodeproj/project.pbxproj |  16 ++
 .../xcshareddata/xcschemes/TestApp.xcscheme   |   2 +-
 62 files changed, 1835 insertions(+), 787 deletions(-)
 create mode 100644 Sources/SpeziLLM/Helpers/Chat+Append.swift
 rename Sources/SpeziLLMLocal/Configuration/{LLMContextParameters.swift => LLMLocalContextParameters.swift} (95%)
 rename Sources/SpeziLLMLocal/Configuration/{LLMParameters.swift => LLMLocalParameters.swift} (94%)
 rename Sources/SpeziLLMLocal/Configuration/{LLMSamplingParameters.swift => LLMLocalSamplingParameters.swift} (97%)
 create mode 100644 Sources/SpeziLLMLocal/LLMLlamaError.swift
 create mode 100644 Sources/SpeziLLMOpenAI/Configuration/LLMOpenAIModelParameters.swift
 create mode 100644 Sources/SpeziLLMOpenAI/Configuration/LLMOpenAIParameters.swift
 create mode 100644 Sources/SpeziLLMOpenAI/Helpers/Chat+OpenAI.swift
 rename Sources/SpeziLLMOpenAI/{OpenAIConstants.swift => Helpers/LLMOpenAIConstants.swift} (78%)
 create mode 100644 Sources/SpeziLLMOpenAI/LLMOpenAI+Configuration.swift
 create mode 100644 Sources/SpeziLLMOpenAI/LLMOpenAI.swift
 create mode 100644 Sources/SpeziLLMOpenAI/LLMOpenAIError.swift
 create mode 100644 Sources/SpeziLLMOpenAI/LLMOpenAIRunnerSetup.swift
 create mode 100644 Sources/SpeziLLMOpenAI/LLMOpenAITokenSaver.swift
 rename Sources/SpeziLLMOpenAI/{OpenAIAPIKeyOnboardingStep.swift => Onboarding/LLMOpenAIAPITokenOnboardingStep.swift} (82%)
 rename Sources/SpeziLLMOpenAI/{OpenAIModelSelectionOnboardingStep.swift => Onboarding/LLMOpenAIModelOnboardingStep.swift} (69%)
 delete mode 100644 Sources/SpeziLLMOpenAI/OpenAIChatStreamResult+Sendable.swift
 delete mode 100644 Sources/SpeziLLMOpenAI/OpenAIError.swift
 delete mode 100644 Sources/SpeziLLMOpenAI/OpenAIModel.swift
 delete mode 100644 Sources/SpeziLLMOpenAI/OpenAIModule.swift
 rename Sources/SpeziLLMOpenAI/SpeziLLMOpenAI.docc/Resources/{OpenAIAPIKeyOnboardingStep.png => LLMOpenAIAPITokenOnboardingStep.png} (100%)
 rename Sources/SpeziLLMOpenAI/SpeziLLMOpenAI.docc/Resources/{OpenAIAPIKeyOnboardingStep.png.license => LLMOpenAIAPITokenOnboardingStep.png.license} (100%)
 rename Sources/SpeziLLMOpenAI/SpeziLLMOpenAI.docc/Resources/{OpenAIAPIKeyOnboardingStep~dark.png => LLMOpenAIAPITokenOnboardingStep~dark.png} (100%)
 rename Sources/SpeziLLMOpenAI/SpeziLLMOpenAI.docc/Resources/{OpenAIAPIKeyOnboardingStep~dark.png.license => LLMOpenAIAPITokenOnboardingStep~dark.png.license} (100%)
 rename Sources/SpeziLLMOpenAI/SpeziLLMOpenAI.docc/Resources/{OpenAIModelSelectionOnboardingStep.png => LLMOpenAIModelOnboardingStep.png} (100%)
 rename Sources/SpeziLLMOpenAI/SpeziLLMOpenAI.docc/Resources/{OpenAIModelSelectionOnboardingStep.png.license => LLMOpenAIModelOnboardingStep.png.license} (100%)
 rename Sources/SpeziLLMOpenAI/SpeziLLMOpenAI.docc/Resources/{OpenAIModelSelectionOnboardingStep~dark.png => LLMOpenAIModelOnboardingStep~dark.png} (100%)
 rename Sources/SpeziLLMOpenAI/SpeziLLMOpenAI.docc/Resources/{OpenAIModelSelectionOnboardingStep~dark.png.license => LLMOpenAIModelOnboardingStep~dark.png.license} (100%)
 create mode 100644 Tests/UITests/TestApp/LLMOpenAI/Onboarding/LLMOpenAIModelOnboarding.swift
 create mode 100644 Tests/UITests/TestApp/LLMOpenAI/Onboarding/LLMOpenAITokenOnboarding.swift

diff --git a/CITATION.cff b/CITATION.cff
index 8e684abd..00c6074f 100644
--- a/CITATION.cff
+++ b/CITATION.cff
@@ -12,9 +12,9 @@ authors:
 - family-names: "Schmiedmayer"
   given-names: "Paul"
   orcid: "https://orcid.org/0000-0002-8607-9148"
-- family-names: "Ravi"
-  given-names: "Vishnu"
-  orcid: "https://orcid.org/0000-0003-0359-1275"
+- family-names: "Zagar"
+  given-names: "Philipp"
+  orcid: "https://orcid.org/0009-0001-5934-2078"
 title: "SpeziLLM"
 doi: 10.5281/zenodo.7538165
 url: "https://github.com/StanfordSpezi/SpeziLLM"
diff --git a/Package.swift b/Package.swift
index 0f08fbfb..b12303d0 100644
--- a/Package.swift
+++ b/Package.swift
@@ -26,11 +26,11 @@ let package = Package(
     dependencies: [
         .package(url: "https://github.com/MacPaw/OpenAI", .upToNextMinor(from: "0.2.4")),
         .package(url: "https://github.com/StanfordBDHG/llama.cpp", .upToNextMinor(from: "0.1.6")),
-        .package(url: "https://github.com/StanfordSpezi/Spezi", .upToNextMinor(from: "0.8.0")),
+        .package(url: "https://github.com/StanfordSpezi/Spezi", .upToNextMinor(from: "0.8.2")),
         .package(url: "https://github.com/StanfordSpezi/SpeziStorage", .upToNextMinor(from: "0.5.0")),
         .package(url: "https://github.com/StanfordSpezi/SpeziOnboarding", .upToNextMinor(from: "0.7.0")),
         .package(url: "https://github.com/StanfordSpezi/SpeziSpeech", .upToNextMinor(from: "0.1.1")),
-        .package(url: "https://github.com/StanfordSpezi/SpeziChat", .upToNextMinor(from: "0.1.1")),
+        .package(url: "https://github.com/StanfordSpezi/SpeziChat", .upToNextMinor(from: "0.1.2")),
         .package(url: "https://github.com/StanfordSpezi/SpeziViews", .upToNextMinor(from: "0.6.3"))
     ],
     targets: [
@@ -63,10 +63,10 @@ let package = Package(
         .target(
             name: "SpeziLLMOpenAI",
             dependencies: [
+                .target(name: "SpeziLLM"),
                 .product(name: "OpenAI", package: "OpenAI"),
                 .product(name: "Spezi", package: "Spezi"),
                 .product(name: "SpeziChat", package: "SpeziChat"),
-                .product(name: "SpeziLocalStorage", package: "SpeziStorage"),
                 .product(name: "SpeziSecureStorage", package: "SpeziStorage"),
                 .product(name: "SpeziSpeechRecognizer", package: "SpeziSpeech"),
                 .product(name: "SpeziOnboarding", package: "SpeziOnboarding")
diff --git a/README.md b/README.md
index 2f40a450..120629d1 100644
--- a/README.md
+++ b/README.md
@@ -56,7 +56,7 @@ The section below highlights the setup and basic use of the [SpeziLLMLocal](http
 
 ### Spezi LLM Local
 
-The target enables developers to easily execute medium-size Language Models (LLMs) locally on-device via the [llama.cpp framework](https://github.com/ggerganov/llama.cpp). The module allows you to interact with the locally run LLM via purely Swift-based APIs, no interaction with low-level C or C++ code is necessary.
+The target enables developers to easily execute medium-size Language Models (LLMs) locally on-device via the [llama.cpp framework](https://github.com/ggerganov/llama.cpp). The module allows you to interact with the locally run LLM via purely Swift-based APIs, no interaction with low-level C or C++ code is necessary, building on top of the infrastructure of the [SpeziLLM target](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillm).
 
 #### Setup
 
@@ -80,25 +80,29 @@ class TestAppDelegate: SpeziAppDelegate {
 }
 ```
 
-Spezi will then automatically inject the `LLMRunner` in the SwiftUI environment to make it accessible throughout your application. 
-The example below also showcases how to use the `LLMRunner` to execute a SpeziLLM-based [`LLM`](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillm/llm).
+#### Usage
 
-```swift
-class ExampleView: View {
-    @Environment(LLMRunner.self) var runner
-    @State var model: LLM = LLMLlama(
-        modelPath: URL(string: "...") // The locally stored Language Model File in the ".gguf" format
-    )
+The code example below showcases the interaction with the `LLMLlama` through the the [SpeziLLM](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillm) [`LLMRunner`](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillm/llmrunner), which is injected into the SwiftUI `Environment` via the `Configuration` shown above..
+Based on a `String` prompt, the `LLMGenerationTask/generate(prompt:)` method returns an `AsyncThrowingStream` which yields the inferred characters until the generation has completed.
 
-    var body: some View {
-        EmptyView()
-            .task {
-                // Returns an `AsyncThrowingStream` which yields the produced output of the LLM.
-                let stream = try await runner(with: model).generate(prompt: "Some example prompt")
-                
-                // ...
-            }
-    }
+```swift
+struct LocalLLMChatView: View {
+   @Environment(LLMRunner.self) var runner: LLMRunner
+
+   // The locally executed LLM
+   @State var model: LLMLlama = .init(
+        modelPath: ...
+   )
+   @State var responseText: String
+
+   func executePrompt(prompt: String) {
+        // Execute the query on the runner, returning a stream of outputs
+        let stream = try await runner(with: model).generate(prompt: "Hello LLM!")
+
+        for try await token in stream {
+            responseText.append(token)
+       }
+   }
 }
 ```
 
@@ -107,43 +111,51 @@ class ExampleView: View {
 
 ### Spezi LLM Open AI
 
-A module that allows you to interact with GPT-based large language models (LLMs) from OpenAI within your Spezi application.
+A module that allows you to interact with GPT-based Large Language Models (LLMs) from OpenAI within your Spezi application.
+`SpeziLLMOpenAI` provides a pure Swift-based API for interacting with the OpenAI GPT API, building on top of the infrastructure of the [SpeziLLM target](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillm).
 
 #### Setup
 
-You can configure the `OpenAIModule` in the `SpeziAppDelegate` as follows.
-In the example, we configure the `OpenAIModule` to use the GPT-4 model with a default API key.
+In order to use `LLMOpenAI`, the [SpeziLLM](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillm) [`LLMRunner`](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillm/llmrunner) needs to be initialized in the Spezi `Configuration`. Only after, the `LLMRunner` can be used to execute the ``LLMOpenAI``.
+See the [SpeziLLM documentation](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillm) for more details.
 
 ```swift
-import Spezi
-import SpeziLLMOpenAI
-
-class ExampleDelegate: SpeziAppDelegate {
+class LLMOpenAIAppDelegate: SpeziAppDelegate {
     override var configuration: Configuration {
-        Configuration {
-            OpenAIModule(apiToken: "API_KEY", openAIModel: .gpt4)
+         Configuration {
+             LLMRunner {
+                LLMOpenAIRunnerSetupTask()
+            }
         }
     }
 }
 ```
 
-The OpenAIModule injects an `OpenAIModel` in the SwiftUI environment to make it accessible throughout your application. The model is queried via an instance of [`Chat` from the SpeziChat package](https://swiftpackageindex.com/stanfordspezi/spezichat/documentation/spezichat/chat).
+#### Usage
+
+The code example below showcases the interaction with the `LLMOpenAI` through the the [SpeziLLM](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillm) [`LLMRunner`](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillm/llmrunner), which is injected into the SwiftUI `Environment` via the `Configuration` shown above.
+Based on a `String` prompt, the `LLMGenerationTask/generate(prompt:)` method returns an `AsyncThrowingStream` which yields the inferred characters until the generation has completed.
 
 ```swift
-class ExampleView: View {
-    @Environment(OpenAIModel.self) var model
-    let chat: Chat = [
-        .init(role: .user, content: "Example prompt!"),
-    ]
-
-    var body: some View {
-        EmptyView()
-            .task {
-                // Returns an `AsyncThrowingStream` which yields the produced output of the LLM.
-                let stream = try model.queryAPI(withChat: chat)
-                
-                // ...
-            }
+struct LLMOpenAIChatView: View {
+    @Environment(LLMRunner.self) var runner: LLMRunner
+    
+    @State var model: LLMOpenAI = .init(
+        parameters: .init(
+            modelType: .gpt3_5Turbo,
+            systemPrompt: "You're a helpful assistant that answers questions from users.",
+            overwritingToken: "abc123"
+        )
+    )
+    @State var responseText: String
+
+    func executePrompt(prompt: String) {
+        // Execute the query on the runner, returning a stream of outputs
+        let stream = try await runner(with: model).generate(prompt: "Hello LLM!")
+
+        for try await token in stream {
+            responseText.append(token)
+        }
     }
 }
 ```
diff --git a/Sources/SpeziLLM/Helpers/Chat+Append.swift b/Sources/SpeziLLM/Helpers/Chat+Append.swift
new file mode 100644
index 00000000..71ef909c
--- /dev/null
+++ b/Sources/SpeziLLM/Helpers/Chat+Append.swift
@@ -0,0 +1,47 @@
+//
+// This source file is part of the Stanford Spezi open-source project
+//
+// SPDX-FileCopyrightText: 2022 Stanford University and the project authors (see CONTRIBUTORS.md)
+//
+// SPDX-License-Identifier: MIT
+//
+
+import SpeziChat
+
+
+extension Chat {
+    /// Append an `ChatEntity/Role/assistant` output to the `Chat`.
+    /// Automatically overwrites the last `ChatEntity/Role/assistant` message if there is one, otherwise create a new one.
+    ///
+    /// - Parameters:
+    ///     - output: The `ChatEntity/Role/assistant` output `String` (part) that should be appended.
+    @MainActor
+    public mutating func append(assistantOutput output: String) {
+        if self.last?.role == .assistant {
+            self[self.count - 1] = .init(
+                role: self.last?.role ?? .assistant,
+                content: (self.last?.content ?? "") + output
+            )
+        } else {
+            self.append(.init(role: .assistant, content: output))
+        }
+    }
+    
+    /// Append an `ChatEntity/Role/user` input to the `Chat`.
+    ///
+    /// - Parameters:
+    ///     - input: The `ChatEntity/Role/user` input that should be appended.
+    @MainActor
+    public mutating func append(userInput input: String) {
+        self.append(.init(role: .user, content: input))
+    }
+    
+    /// Append an `ChatEntity/Role/system` prompt to the `Chat` at the first position.
+    ///
+    /// - Parameters:
+    ///     - systemPrompt: The `ChatEntity/Role/system` prompt of the `Chat`, inserted at the very beginning.
+    @MainActor
+    public mutating func append(systemMessage systemPrompt: String) {
+        self.insert(.init(role: .system, content: systemPrompt), at: 0)
+    }
+}
diff --git a/Sources/SpeziLLM/LLM.swift b/Sources/SpeziLLM/LLM.swift
index f8f91030..cc28027f 100644
--- a/Sources/SpeziLLM/LLM.swift
+++ b/Sources/SpeziLLM/LLM.swift
@@ -7,13 +7,16 @@
 //
 
 import Foundation
+import SpeziChat
 
 
 /// The ``LLM`` protocol provides an abstraction layer for the usage of Large Language Models within the Spezi ecosystem,
 /// regardless of the execution locality (local or remote) or the specific model type.
 /// Developers can use the ``LLM`` protocol to conform their LLM interface implementations to a standard which is consistent throughout the Spezi ecosystem.
 ///
-/// It is recommended that ``LLM`` should be used in conjunction with the [Swift Actor concept](https://developer.apple.com/documentation/swift/actor), meaning one should use the `actor` keyword (not `class`) for the implementation of the model component. The Actor concept provides guarantees regarding concurrent access to shared instances from multiple threads.
+/// The ``LLM`` contains the ``LLM/context`` property which holds the entire history of the model interactions.
+/// This includes the system prompt, user input, but also assistant responses.
+/// Ensure the property always contains all necessary information, as the ``LLM/generate(continuation:)`` function executes the inference based on the ``LLM/context``.
 ///
 /// - Important: An ``LLM`` shouldn't be executed on it's own but always used together with the ``LLMRunner``.
 /// Please refer to the ``LLMRunner`` documentation for a complete code example.
@@ -21,22 +24,26 @@ import Foundation
 /// ### Usage
 ///
 /// An example conformance of the ``LLM`` looks like the code sample below (lots of details were omitted for simplicity).
-/// The key point is the need to implement the ``LLM/setup(runnerConfig:)`` as well as the ``LLM/generate(prompt:continuation:)`` functions, whereas the ``LLM/setup(runnerConfig:)`` has an empty default implementation as not every ``LLMHostingType`` requires the need for a setup closure.
+/// The key point is the need to implement the ``LLM/setup(runnerConfig:)`` as well as the ``LLM/generate(continuation:)`` functions, whereas the ``LLM/setup(runnerConfig:)`` has an empty default implementation as not every ``LLMHostingType`` requires the need for a setup closure.
 ///
 /// ```swift
-/// actor LLMTest: LLM {
-///     var type: LLMHostingType = .local
-///     var state: LLMState = .uninitialized
+/// @Observable
+/// public class LLMTest: LLM {
+///     public let type: LLMHostingType = .local
+///     @MainActor public var state: LLMState = .uninitialized
+///     @MainActor public var context: Chat = []
 ///
-///     func setup(/* */) async {}
-///     func generate(/* */) async {}
+///     public func setup(/* */) async throws {}
+///     public func generate(/* */) async {}
 /// }
 /// ```
-public protocol LLM {
+public protocol LLM: AnyObject {
     /// The type of the ``LLM`` as represented by the ``LLMHostingType``.
-    var type: LLMHostingType { get async }
+    var type: LLMHostingType { get }
     /// The state of the ``LLM`` indicated by the ``LLMState``.
-    @MainActor var state: LLMState { get }
+    @MainActor var state: LLMState { get set }
+    /// The current context state of the ``LLM``, includes the entire prompt history including system prompts, user input, and model responses.
+    @MainActor var context: Chat { get set }
     
     
     /// Performs any setup-related actions for the ``LLM``.
@@ -46,11 +53,25 @@ public protocol LLM {
     ///   - runnerConfig: The runner configuration as a ``LLMRunnerConfiguration``.
     func setup(runnerConfig: LLMRunnerConfiguration) async throws
     
-    /// Performs the actual text generation functionality of the ``LLM`` based on an input prompt `String`.
+    /// Performs the actual text generation functionality of the ``LLM`` based on the ``LLM/context``.
     /// The result of the text generation is streamed via a Swift `AsyncThrowingStream` that is passed as a parameter.
     ///
     /// - Parameters:
-    ///   - prompt: The input prompt `String` used for the text generation.
     ///   - continuation: A Swift `AsyncThrowingStream` enabling the streaming of the text generation.
-    func generate(prompt: String, continuation: AsyncThrowingStream<String, Error>.Continuation) async
+    func generate(continuation: AsyncThrowingStream<String, Error>.Continuation) async
+}
+
+
+extension LLM {
+    /// Finishes the continuation with an error and sets the ``LLM/state`` to the respective error (on the main actor).
+    ///
+    /// - Parameters:
+    ///   - error: The error that occurred.
+    ///   - continuation: The `AsyncThrowingStream` that streams the generated output.
+    public func finishGenerationWithError<E: LLMError>(_ error: E, on continuation: AsyncThrowingStream<String, Error>.Continuation) async {
+        continuation.finish(throwing: error)
+        await MainActor.run {
+            self.state = .error(error: error)
+        }
+    }
 }
diff --git a/Sources/SpeziLLM/LLMError.swift b/Sources/SpeziLLM/LLMError.swift
index edc41b9c..71634db0 100644
--- a/Sources/SpeziLLM/LLMError.swift
+++ b/Sources/SpeziLLM/LLMError.swift
@@ -9,46 +9,34 @@
 import Foundation
 
 
-/// The ``LLMError`` describes possible errors that occur during the execution of the ``LLM`` via the ``LLMRunner``.
-public enum LLMError: LocalizedError {
-    /// Indicates that the local model file is not found.
-    case modelNotFound
-    /// Indicates that the ``LLM`` is not yet ready, e.g., not initialized.
-    case modelNotReadyYet
-    /// Indicates that during generation an error occurred.
-    case generationError
+/// Defines errors that may occur during setting up the runner environment for ``LLM`` generation jobs.
+public enum LLMRunnerError: LLMError {
+    /// Indicates an error occurred during setup of the LLM generation.
+    case setupError
     
     
     public var errorDescription: String? {
         switch self {
-        case .modelNotFound:
-            String(localized: LocalizedStringResource("LLM_MODEL_NOT_FOUND_ERROR_DESCRIPTION", bundle: .atURL(from: .module)))
-        case .modelNotReadyYet:
-            String(localized: LocalizedStringResource("LLM_MODEL_NOT_READY_ERROR_DESCRIPTION", bundle: .atURL(from: .module)))
-        case .generationError:
-            String(localized: LocalizedStringResource("LLM_GENERATION_ERROR_DESCRIPTION", bundle: .atURL(from: .module)))
+        case .setupError:
+            String(localized: LocalizedStringResource("LLM_SETUP_ERROR_DESCRIPTION", bundle: .atURL(from: .module)))
         }
     }
     
     public var recoverySuggestion: String? {
         switch self {
-        case .modelNotFound:
-            String(localized: LocalizedStringResource("LLM_MODEL_NOT_FOUND_RECOVERY_SUGGESTION", bundle: .atURL(from: .module)))
-        case .modelNotReadyYet:
-            String(localized: LocalizedStringResource("LLM_MODEL_NOT_READY_RECOVERY_SUGGESTION", bundle: .atURL(from: .module)))
-        case .generationError:
-            String(localized: LocalizedStringResource("LLM_GENERATION_ERROR_RECOVERY_SUGGESTION", bundle: .atURL(from: .module)))
+        case .setupError:
+            String(localized: LocalizedStringResource("LLM_SETUP_ERROR_RECOVERY_SUGGESTION", bundle: .atURL(from: .module)))
         }
     }
 
     public var failureReason: String? {
         switch self {
-        case .modelNotFound:
-            String(localized: LocalizedStringResource("LLM_MODEL_NOT_FOUND_FAILURE_REASON", bundle: .atURL(from: .module)))
-        case .modelNotReadyYet:
-            String(localized: LocalizedStringResource("LLM_MODEL_NOT_READY_FAILURE_REASON", bundle: .atURL(from: .module)))
-        case .generationError:
-            String(localized: LocalizedStringResource("LLM_GENERATION_ERROR_FAILURE_REASON", bundle: .atURL(from: .module)))
+        case .setupError:
+            String(localized: LocalizedStringResource("LLM_SETUP_ERROR_FAILURE_REASON", bundle: .atURL(from: .module)))
         }
     }
 }
+
+
+/// The ``LLMError`` defines a common error protocol which should be used for defining errors within the SpeziLLM ecosystem.
+public protocol LLMError: LocalizedError, Equatable {}
diff --git a/Sources/SpeziLLM/LLMHostingType.swift b/Sources/SpeziLLM/LLMHostingType.swift
index ff221279..ec586482 100644
--- a/Sources/SpeziLLM/LLMHostingType.swift
+++ b/Sources/SpeziLLM/LLMHostingType.swift
@@ -14,4 +14,6 @@ public enum LLMHostingType: String, CaseIterable {
     case fog
     /// Remote, cloud-based execution of the ``LLM``.
     case cloud
+    /// Mock execution
+    case mock
 }
diff --git a/Sources/SpeziLLM/LLMRunner.swift b/Sources/SpeziLLM/LLMRunner.swift
index f624a801..2beeda81 100644
--- a/Sources/SpeziLLM/LLMRunner.swift
+++ b/Sources/SpeziLLM/LLMRunner.swift
@@ -39,14 +39,13 @@ import Spezi
 /// }
 ///
 /// struct LocalLLMChatView: View {
-///    // The runner responsible for executing the local LLM.
-///    @Environment(LLMRunner.self) private var runner: LLMRunner
+///    // The runner responsible for executing the LLM.
+///    @Environment(LLMRunner.self) var runner: LLMRunner
 ///
-///    // The locally executed LLM
-///    private let model: LLMLlama = .init(
+///    // The executed LLM
+///    @State var model: LLMLlama = .init(
 ///         modelPath: ...
 ///    )
-///
 ///    @State var responseText: String
 ///
 ///    func executePrompt(prompt: String) {
@@ -65,13 +64,21 @@ public actor LLMRunner: Module, DefaultInitializable, EnvironmentAccessible {
     public enum State {
         case idle
         case processing
+        case error(LocalizedError)
     }
     
     
     /// The configuration of the runner represented by ``LLMRunnerConfiguration``.
     private let runnerConfiguration: LLMRunnerConfiguration
-    /// All to be performed ``LLMRunner``-related setup tasks.
-    private let runnerSetupTasks: [LLMHostingType: any LLMRunnerSetupTask]
+    /// Indicates if the ``LLMRunner`` should lazily perform the passed ``LLMRunnerSetupTask``'s.
+    private let lazyRunnerSetup: Bool
+    /// Holds all dependencies of the ``LLMRunner`` as expressed by all stated ``LLMRunnerSetupTask``'s in the ``init(runnerConfig:_:)``.
+    /// Is required to enable the injection of `Dependency`s into the ``LLMRunnerSetupTask``'s.
+    @Dependency private var runnerSetupTaskModules: [any Module]
+    /// All to be performed ``LLMRunner``-related setup tasks, mapped to the respective ``LLMHostingType``.
+    /// Derived from the ``LLMRunnerSetupTask``'s passed within the ``init(runnerConfig:_:)``.
+    private var runnerSetupTasks: [LLMHostingType: any LLMRunnerSetupTask] = [:]
+    
     /// Stores all currently available ``LLMGenerationTask``'s, one for each Spezi ``LLM``, identified by the ``LLMTaskIdentifier``.
     private var runnerTasks: [LLMTaskIdentifier: LLMGenerationTask] = [:]
     /// Indicates for which ``LLMHostingType`` the runner backend is already initialized.
@@ -94,13 +101,15 @@ public actor LLMRunner: Module, DefaultInitializable, EnvironmentAccessible {
     ///
     /// - Parameters:
     ///   - runnerConfig: The configuration of the ``LLMRunner`` represented by the ``LLMRunnerConfiguration``.
-    ///   - content: A result builder that aggregates all stated ``LLMRunnerSetupTask``'s.
+    ///   - dependencies: A result builder that aggregates all stated ``LLMRunnerSetupTask``'s as dependencies.
     public init(
         runnerConfig: LLMRunnerConfiguration = .init(),
-        @LLMRunnerSetupTaskBuilder _ content: @Sendable @escaping () -> _LLMRunnerSetupTaskCollection
+        lazyRunnerSetup: Bool = true,
+        @LLMRunnerSetupTaskBuilder _ dependencies: @Sendable () -> DependencyCollection
     ) {
         self.runnerConfiguration = runnerConfig
-        self.runnerSetupTasks = content().runnerSetupTasks
+        self.lazyRunnerSetup = lazyRunnerSetup
+        self._runnerSetupTaskModules = Dependency(using: dependencies())
         
         for modelType in LLMHostingType.allCases {
             self.runnerBackendInitialized[modelType] = false
@@ -112,6 +121,27 @@ public actor LLMRunner: Module, DefaultInitializable, EnvironmentAccessible {
         self.init(runnerConfig: .init()) {}
     }
     
+    public nonisolated func configure() {
+        Task {
+            await mapRunnerSetupTasks()
+        }
+    }
+    
+    private func mapRunnerSetupTasks() async {
+        for module in runnerSetupTaskModules {
+            guard let task = module as? any LLMRunnerSetupTask else {
+                preconditionFailure("SpeziLLM: Reached inconsistent state. \(type(of: module)) is not a \((any LLMRunnerSetupTask).self)")
+            }
+            
+            runnerSetupTasks[task.type] = task
+            
+            if !lazyRunnerSetup {
+                try? await task.setupRunner(runnerConfig: self.runnerConfiguration)
+                runnerBackendInitialized[task.type] = true
+            }
+        }
+    }
+    
     
     /// This call-as-a-function ``LLMRunner`` usage wraps a Spezi ``LLM`` and makes it ready for execution.
     /// It manages a set of all ``LLMGenerationTask``'s, guaranteeing efficient model execution.
@@ -120,10 +150,10 @@ public actor LLMRunner: Module, DefaultInitializable, EnvironmentAccessible {
     ///   - with: The ``LLM`` that should be executed.
     ///
     /// - Returns: The ready to use ``LLMGenerationTask``.
-    public func callAsFunction(with model: any LLM) async -> LLMGenerationTask {
-        let modelType = await model.type
+    public func callAsFunction(with model: any LLM) async throws -> LLMGenerationTask {
+        let modelType = model.type
         /// If necessary, setup of the runner backend
-        if runnerBackendInitialized[modelType] == false {
+        if runnerBackendInitialized[modelType] != true && modelType != .mock {
             /// Initializes the required runner backends for the respective ``LLMHostingType``.
             guard let task = self.runnerSetupTasks[modelType] else {
                 preconditionFailure("""
@@ -132,7 +162,15 @@ public actor LLMRunner: Module, DefaultInitializable, EnvironmentAccessible {
                 """)
             }
             
-            try? await task.setupRunner(runnerConfig: self.runnerConfiguration)
+            do {
+                try await task.setupRunner(runnerConfig: self.runnerConfiguration)
+            } catch {
+                // Adjust `LLM/state` to not initialized in order to allow for new errors to surface and trigger and alert
+                await MainActor.run {
+                    model.state = .uninitialized
+                }
+                throw error
+            }
             
             runnerBackendInitialized[modelType] = true
         }
diff --git a/Sources/SpeziLLM/LLMState.swift b/Sources/SpeziLLM/LLMState.swift
index 1729db84..52f6e1df 100644
--- a/Sources/SpeziLLM/LLMState.swift
+++ b/Sources/SpeziLLM/LLMState.swift
@@ -20,7 +20,7 @@ public enum LLMState: CustomStringConvertible, Equatable {
     /// The Spezi ``LLM`` is currently in the process of generating an output.
     case generating
     /// The Spezi ``LLM`` is in an error state as described by the associated value ``LLMError``.
-    case error(error: LLMError)
+    case error(error: any LLMError)
     
     
     /// A textual description of the current ``LLMState``.
@@ -33,4 +33,17 @@ public enum LLMState: CustomStringConvertible, Equatable {
         case .error: String(localized: LocalizedStringResource("LLM_STATE_ERROR", bundle: .atURL(from: .module)))
         }
     }
+    
+    
+    /// Necessary `Equatable` implementation
+    public static func == (lhs: LLMState, rhs: LLMState) -> Bool {
+        switch (lhs, rhs) {
+        case (.uninitialized, .uninitialized): true
+        case (.loading, .loading): true
+        case (.ready, .ready): true
+        case (.generating, .generating): true
+        case (.error, .error): true
+        default: false
+        }
+    }
 }
diff --git a/Sources/SpeziLLM/Mock/LLMMock.swift b/Sources/SpeziLLM/Mock/LLMMock.swift
index e2dcb457..b3ceb03a 100644
--- a/Sources/SpeziLLM/Mock/LLMMock.swift
+++ b/Sources/SpeziLLM/Mock/LLMMock.swift
@@ -7,12 +7,15 @@
 //
 
 import Foundation
+import SpeziChat
 
 
 /// A mock SpeziLLM ``LLM`` that is used for testing and preview purposes.
-public actor LLMMock: LLM {
-    public let type: LLMHostingType = .local
+@Observable
+public class LLMMock: LLM {
+    public let type: LLMHostingType = .mock
     @MainActor public var state: LLMState = .uninitialized
+    @MainActor public var context: Chat = []
     
     
     public init() {}
@@ -24,7 +27,7 @@ public actor LLMMock: LLM {
         }
     }
     
-    public func generate(prompt: String, continuation: AsyncThrowingStream<String, Error>.Continuation) async {
+    public func generate(continuation: AsyncThrowingStream<String, Error>.Continuation) async {
         /// Generate mock message
         try? await Task.sleep(for: .seconds(1))
         continuation.yield("Mock ")
diff --git a/Sources/SpeziLLM/Resources/Localizable.xcstrings b/Sources/SpeziLLM/Resources/Localizable.xcstrings
index 288773ca..3fddccf3 100644
--- a/Sources/SpeziLLM/Resources/Localizable.xcstrings
+++ b/Sources/SpeziLLM/Resources/Localizable.xcstrings
@@ -1,103 +1,32 @@
 {
   "sourceLanguage" : "en",
   "strings" : {
-    "CHAT_VIEW_TITLE" : {
-      "extractionState" : "stale",
+    "LLM_SETUP_ERROR_DESCRIPTION" : {
       "localizations" : {
         "en" : {
           "stringUnit" : {
             "state" : "translated",
-            "value" : "Spezi LLM Chat"
+            "value" : "LLM Setup Error."
           }
         }
       }
     },
-    "LLM_GENERATION_ERROR_DESCRIPTION" : {
+    "LLM_SETUP_ERROR_FAILURE_REASON" : {
       "localizations" : {
         "en" : {
           "stringUnit" : {
             "state" : "translated",
-            "value" : "Generation of LLM output failed."
+            "value" : "An error has occurred during the setup of the LLM environment or model."
           }
         }
       }
     },
-    "LLM_GENERATION_ERROR_FAILURE_REASON" : {
+    "LLM_SETUP_ERROR_RECOVERY_SUGGESTION" : {
       "localizations" : {
         "en" : {
           "stringUnit" : {
             "state" : "translated",
-            "value" : "An unknown error has occurred during the generation of the output."
-          }
-        }
-      }
-    },
-    "LLM_GENERATION_ERROR_RECOVERY_SUGGESTION" : {
-      "localizations" : {
-        "en" : {
-          "stringUnit" : {
-            "state" : "translated",
-            "value" : "Ensure that the device has enough free computing and memory resources. Try restarting the application."
-          }
-        }
-      }
-    },
-    "LLM_MODEL_NOT_FOUND_ERROR_DESCRIPTION" : {
-      "localizations" : {
-        "en" : {
-          "stringUnit" : {
-            "state" : "translated",
-            "value" : "LLM file not found."
-          }
-        }
-      }
-    },
-    "LLM_MODEL_NOT_FOUND_FAILURE_REASON" : {
-      "localizations" : {
-        "en" : {
-          "stringUnit" : {
-            "state" : "translated",
-            "value" : "The specified LLM file could not be found on the device."
-          }
-        }
-      }
-    },
-    "LLM_MODEL_NOT_FOUND_RECOVERY_SUGGESTION" : {
-      "localizations" : {
-        "en" : {
-          "stringUnit" : {
-            "state" : "translated",
-            "value" : "Ensure that the LLM file is downloaded successfully and properly stored on the device. Try restarting the application."
-          }
-        }
-      }
-    },
-    "LLM_MODEL_NOT_READY_ERROR_DESCRIPTION" : {
-      "localizations" : {
-        "en" : {
-          "stringUnit" : {
-            "state" : "translated",
-            "value" : "LLM not ready yet."
-          }
-        }
-      }
-    },
-    "LLM_MODEL_NOT_READY_FAILURE_REASON" : {
-      "localizations" : {
-        "en" : {
-          "stringUnit" : {
-            "state" : "translated",
-            "value" : "The LLM needs to be initialized before usage."
-          }
-        }
-      }
-    },
-    "LLM_MODEL_NOT_READY_RECOVERY_SUGGESTION" : {
-      "localizations" : {
-        "en" : {
-          "stringUnit" : {
-            "state" : "translated",
-            "value" : "Make sure that the application initialized the LLM properly. Try restarting the application."
+            "value" : "Please restart the application and try again."
           }
         }
       }
diff --git a/Sources/SpeziLLM/SpeziLLM.docc/SpeziLLM.md b/Sources/SpeziLLM/SpeziLLM.docc/SpeziLLM.md
index 20480470..9a2b3190 100644
--- a/Sources/SpeziLLM/SpeziLLM.docc/SpeziLLM.md
+++ b/Sources/SpeziLLM/SpeziLLM.docc/SpeziLLM.md
@@ -32,25 +32,31 @@ The two main components of ``SpeziLLM`` are the ``LLM`` abstraction as well as t
 
 ### LLM abstraction
 
-``SpeziLLM`` provides the ``LLM`` protocol which provides an abstraction of an arbitrary Language Model, regardless of the execution locality (local or remote) or the specific model type.
+The ``LLM`` protocol provides an abstraction layer for the usage of Large Language Models within the Spezi ecosystem,
+regardless of the execution locality (local or remote) or the specific model type.
 Developers can use the ``LLM`` protocol to conform their LLM interface implementations to a standard which is consistent throughout the Spezi ecosystem.
-It is recommended that ``LLM`` should be used in conjunction with the [Swift Actor concept](https://developer.apple.com/documentation/swift/actor), meaning one should use the `actor` keyword (not `class`) for the implementation of the model component. The Actor concept provides guarantees regarding concurrent access to shared instances from multiple threads.
+
+The ``LLM`` contains the ``LLM/context`` property which holds the entire history of the model interactions.
+This includes the system prompt, user input, but also assistant responses.
+Ensure the property always contains all necessary information, as the ``LLM/generate(continuation:)`` function executes the inference based on the ``LLM/context``.
 
 > Important: An ``LLM`` shouldn't be executed on it's own but always used together with the ``LLMRunner``.
-> Please refer to the ``LLMRunner`` documentation for a complete code example.
+    Please refer to the ``LLMRunner`` documentation for a complete code example.
 
-#### Usage
+### Usage
 
 An example conformance of the ``LLM`` looks like the code sample below (lots of details were omitted for simplicity).
-The key point is the need to implement the ``LLM/setup(runnerConfig:)`` as well as the ``LLM/generate(prompt:continuation:)`` functions, whereas the ``LLM/setup(runnerConfig:)`` has an empty default implementation as not every ``LLMHostingType`` requires the need for a setup closure.
+The key point is the need to implement the ``LLM/setup(runnerConfig:)`` as well as the ``LLM/generate(continuation:)`` functions, whereas the ``LLM/setup(runnerConfig:)`` has an empty default implementation as not every ``LLMHostingType`` requires the need for a setup closure.
 
 ```swift
-actor LLMTest: LLM {
-    var type: LLMHostingType = .local
-    var state: LLMState = .uninitialized
-
-    func setup(/* */) async {}
-    func generate(/* */) async {}
+@Observable
+public class LLMTest: LLM {
+    public let type: LLMHostingType = .local
+    @MainActor public var state: LLMState = .uninitialized
+    @MainActor public var context: Chat = []
+
+    public func setup(/* */) async throws {}
+    public func generate(/* */) async {}
 }
 ```
 
@@ -92,13 +98,12 @@ import SpeziLLMLocal
 
 struct LocalLLMChatView: View {
    // The runner responsible for executing the local LLM.
-   @Environment(LLMRunner.self) private var runner: LLMRunner
+   @Environment(LLMRunner.self) var runner: LLMRunner
 
    // The locally executed LLM
-   private let model: LLMLlama = .init(
+   @State var model: LLMLlama = .init(
         modelPath: ...
    )
-
    @State var responseText: String
 
    func executePrompt(prompt: String) {
@@ -118,7 +123,7 @@ The ``LLMChatView`` presents a basic chat view that enables users to chat with a
 The ``LLMChatView`` takes an ``LLM`` instance as well as initial assistant prompt as arguments to configure the chat properly.
 
 > Tip: The ``LLMChatView`` builds on top of the [SpeziChat package](https://swiftpackageindex.com/stanfordspezi/spezichat/documentation).
-> For more details, please refer to the DocC documentation of the [`ChatView`](https://swiftpackageindex.com/stanfordspezi/spezichat/documentation/spezichat/chatview).
+    For more details, please refer to the DocC documentation of the [`ChatView`](https://swiftpackageindex.com/stanfordspezi/spezichat/documentation/spezichat/chatview).
 
 #### Usage
 
diff --git a/Sources/SpeziLLM/Tasks/LLMGenerationTask.swift b/Sources/SpeziLLM/Tasks/LLMGenerationTask.swift
index 9bb417aa..e61d06cb 100644
--- a/Sources/SpeziLLM/Tasks/LLMGenerationTask.swift
+++ b/Sources/SpeziLLM/Tasks/LLMGenerationTask.swift
@@ -7,6 +7,7 @@
 //
 
 import Foundation
+import SpeziChat
 
 
 /// The ``LLMGenerationTask`` with the specific responsibility to handle LLM generation tasks.
@@ -25,9 +26,7 @@ public actor LLMGenerationTask {
     
     /// The `LLMTaskIdentifier` of the ``LLMGenerationTask``.
     var id: LLMTaskIdentifier {
-        get async {
-            .init(fromModel: model)
-        }
+        .init(fromModel: model)
     }
     
     /// Describes the state of the ``LLM`` as a ``LLMState``.
@@ -49,14 +48,13 @@ public actor LLMGenerationTask {
     }
     
     
-    /// Starts the LLM output generation based on an input prompt.
+    /// Starts the LLM output generation based on the ``LLM/context``.
     /// Handles management takes like the initial setup of the ``LLM``.
     ///
-    /// - Parameters:
-    ///     - prompt: The `String` that should be used as an input to the ``LLM``
-    ///
     /// - Returns: An asynchronous stream of the ``LLM`` generation results.
-    public func generate(prompt: String) async throws -> AsyncThrowingStream<String, Error> {
+    ///
+    /// - Important: This function takes the state present within the ``LLM/context`` to query the ``LLM``. Ensure that the ``LLM/context`` reflects the state you want to use, especially the last (user) entry of the ``LLM/context``.
+    public func generate() async throws -> AsyncThrowingStream<String, Error> {
         let (stream, continuation) = AsyncThrowingStream.makeStream(of: String.self)
         
         /// Setup the model if necessary.
@@ -64,17 +62,30 @@ public actor LLMGenerationTask {
             try await model.setup(runnerConfig: self.runnerConfig)
         }
         
-        /// Execute the LLM generation.
-        switch await model.state {
-        case .ready, .error:
-            self.task = Task(priority: self.runnerConfig.taskPriority) {
-                await model.generate(prompt: prompt, continuation: continuation)
-            }
-            
-            return stream
-        default:
-            throw LLMError.modelNotReadyYet
+        /// Execute the output generation of the LLM.
+        self.task = Task(priority: self.runnerConfig.taskPriority) {
+            await model.generate(continuation: continuation)
         }
+        
+        return stream
+    }
+    
+    
+    /// Starts the LLM output generation based on an input prompt.
+    /// Handles management takes like the initial setup of the ``LLM``.
+    ///
+    /// - Parameters:
+    ///     - userPrompt: The `String` that should be used as an input prompt to the ``LLM``
+    ///
+    /// - Returns: An asynchronous stream of the ``LLM`` generation results.
+    ///
+    /// - Important: This function appends to the``LLM/context``. Ensure that this wasn't done before by, e.g., via the ``LLMChatView``.
+    public func generate(prompt userPrompt: String) async throws -> AsyncThrowingStream<String, Error> {
+        await MainActor.run {
+            self.model.context.append(userInput: userPrompt)
+        }
+        
+        return try await self.generate()
     }
     
     
diff --git a/Sources/SpeziLLM/Tasks/LLMRunnerSetupTask.swift b/Sources/SpeziLLM/Tasks/LLMRunnerSetupTask.swift
index 69b713e5..5f56e3bd 100644
--- a/Sources/SpeziLLM/Tasks/LLMRunnerSetupTask.swift
+++ b/Sources/SpeziLLM/Tasks/LLMRunnerSetupTask.swift
@@ -7,6 +7,7 @@
 //
 
 import Foundation
+import Spezi
 
 
 /// The ``LLMRunnerSetupTask`` provides an abstraction of different runner-related setup `Task`'s.
@@ -23,7 +24,7 @@ import Foundation
 ///         }
 ///     }
 /// }
-public protocol LLMRunnerSetupTask {
+public protocol LLMRunnerSetupTask: Module {
     /// The ``LLMHostingType`` that the ``LLMRunnerSetupTask`` sets up.
     var type: LLMHostingType { get }
     
diff --git a/Sources/SpeziLLM/Tasks/LLMRunnerSetupTaskBuilder.swift b/Sources/SpeziLLM/Tasks/LLMRunnerSetupTaskBuilder.swift
index bc0a1335..c3b99277 100644
--- a/Sources/SpeziLLM/Tasks/LLMRunnerSetupTaskBuilder.swift
+++ b/Sources/SpeziLLM/Tasks/LLMRunnerSetupTaskBuilder.swift
@@ -7,52 +7,16 @@
 //
 
 import Foundation
+import Spezi
 import SwiftUI
 
 
 /// A result builder used to aggregate multiple Spezi ``LLMRunnerSetupTask``s within the ``LLMRunner``.
 @resultBuilder
 @_documentation(visibility: internal)
-public enum LLMRunnerSetupTaskBuilder {
-    /// If declared, provides contextual type information for statement expressions to translate them into partial results.
-    public static func buildExpression(_ expression: any LLMRunnerSetupTask) -> [any LLMRunnerSetupTask] {
-        [expression]
-    }
-
-    /// Required by every result builder to build combined results from statement blocks.
-    public static func buildBlock(_ children: [any LLMRunnerSetupTask]...) -> [any LLMRunnerSetupTask] {
-        children.flatMap { $0 }
-    }
-
-    /// Enables support for `if` statements that do not have an `else`.
-    public static func buildOptional(_ component: [any LLMRunnerSetupTask]?) -> [any LLMRunnerSetupTask] {
-        // swiftlint:disable:previous discouraged_optional_collection
-        // The optional collection is a requirement defined by @resultBuilder, we can not use a non-optional collection here.
-        component ?? []
-    }
-
-    /// With buildEither(second:), enables support for 'if-else' and 'switch' statements by folding conditional results into a single result.
-    public static func buildEither(first: [any LLMRunnerSetupTask]) -> [any LLMRunnerSetupTask] {
-        first
-    }
-
-    /// With buildEither(first:), enables support for 'if-else' and 'switch' statements by folding conditional results into a single result.
-    public static func buildEither(second: [any LLMRunnerSetupTask]) -> [any LLMRunnerSetupTask] {
-        second
-    }
-    
-    /// Enables support for 'for..in' loops by combining the results of all iterations into a single result.
-    public static func buildArray(_ components: [[any LLMRunnerSetupTask]]) -> [any LLMRunnerSetupTask] {
-        components.flatMap { $0 }
-    }
-    
-    /// If declared, this will be called on the partial result of an 'if #available' block to allow the result builder to erase type information.
-    public static func buildLimitedAvailability(_ component: [any LLMRunnerSetupTask]) -> [any LLMRunnerSetupTask] {
-        component
-    }
-    
-    /// If declared, this will be called on the partial result from the outermost block statement to produce the final returned result.
-    public static func buildFinalResult(_ component: [any LLMRunnerSetupTask]) -> _LLMRunnerSetupTaskCollection {
-        _LLMRunnerSetupTaskCollection(runnerSetupTasks: component)
+public enum LLMRunnerSetupTaskBuilder: DependencyCollectionBuilder {
+    /// An auto-closure expression, providing the default dependency value, building the ``DependencyCollection``.
+    public static func buildExpression<L: LLMRunnerSetupTask>(_ expression: @escaping @autoclosure () -> L) -> DependencyCollection {
+        DependencyCollection(singleEntry: expression)
     }
 }
diff --git a/Sources/SpeziLLM/Views/LLMChatView.swift b/Sources/SpeziLLM/Views/LLMChatView.swift
index 6bf98170..01954119 100644
--- a/Sources/SpeziLLM/Views/LLMChatView.swift
+++ b/Sources/SpeziLLM/Views/LLMChatView.swift
@@ -27,13 +27,7 @@ import SwiftUI
 /// struct LLMLocalChatTestView: View {
 ///     var body: some View {
 ///         LLMChatView(
-///             model: LLMMock(),
-///             initialAssistantPrompt: [
-///                 .init(
-///                     role: .assistant,
-///                     content: "Hello!"
-///                 )
-///             ]
+///             model: LLMMock()
 ///         )
 ///     }
 /// }
@@ -41,36 +35,33 @@ import SwiftUI
 public struct LLMChatView: View {
     /// A ``LLMRunner`` is responsible for executing the ``LLM``. Must be configured via the Spezi `Configuration`.
     @Environment(LLMRunner.self) private var runner
-    /// Represents the chat content that is displayed.
-    @State private var chat: Chat = []
-    /// Indicates if the input field is disabled.
-    @State private var inputDisabled = false
-    /// Indicates the state of the view, get's derived from the ``LLM/state``.
-    @State private var viewState: ViewState = .idle
-    
     /// A SpeziLLM ``LLM`` that is used for the text generation within the chat view
     @State private var model: any LLM
     
     
+    /// Indicates if the input field is disabled.
+    @MainActor var inputDisabled: Bool {
+        model.state.representation == .processing
+    }
+    
     public var body: some View {
-        ChatView($chat, disableInput: $inputDisabled)
-            .onChange(of: chat) { oldValue, newValue in
+        ChatView($model.context, disableInput: inputDisabled)
+            .onChange(of: model.context) { oldValue, newValue in
                 /// Once the user enters a message in the chat, send a request to the local LLM.
                 if oldValue.count != newValue.count,
                    let lastChat = newValue.last, lastChat.role == .user {
                     Task {
-                        inputDisabled = true
-                        
-                        /// Stream the LLMs response via an `AsyncThrowingStream`
-                        let stream = try await runner(with: model).generate(prompt: lastChat.content)
-                        chat.append(.init(role: .assistant, content: ""))
-                        
-                        for try await token in stream {
-                            let lastMessageContent = chat.last?.content ?? ""
-                            chat[chat.count - 1] = .init(role: .assistant, content: lastMessageContent + token)
+                        do {
+                            let stream = try await runner(with: model).generate()
+                            
+                            for try await token in stream {
+                                model.context.append(assistantOutput: token)
+                            }
+                        } catch let error as LLMError {
+                            model.state = .error(error: error)
+                        } catch {
+                            model.state = .error(error: LLMRunnerError.setupError)
                         }
-                        
-                        inputDisabled = false
                     }
                 }
             }
@@ -82,20 +73,16 @@ public struct LLMChatView: View {
     ///
     /// - Parameters:
     ///   - model: The SpeziLLM ``LLM`` that should be used for the text generation.
-    ///   - initialAssistantPrompt: The initial message of the LLM assistant.
     public init(
-        model: any LLM,
-        initialAssistantPrompt chat: Chat
+        model: any LLM
     ) {
         self._model = State(wrappedValue: model)
-        self._chat = State(wrappedValue: chat)
     }
 }
 
 
 #Preview {
     LLMChatView(
-        model: LLMMock(),
-        initialAssistantPrompt: [.init(role: .assistant, content: "Hello world!")]
+        model: LLMMock()
     )
 }
diff --git a/Sources/SpeziLLMLocal/Configuration/LLMContextParameters.swift b/Sources/SpeziLLMLocal/Configuration/LLMLocalContextParameters.swift
similarity index 95%
rename from Sources/SpeziLLMLocal/Configuration/LLMContextParameters.swift
rename to Sources/SpeziLLMLocal/Configuration/LLMLocalContextParameters.swift
index ea8c4388..f46402d0 100644
--- a/Sources/SpeziLLMLocal/Configuration/LLMContextParameters.swift
+++ b/Sources/SpeziLLMLocal/Configuration/LLMLocalContextParameters.swift
@@ -10,9 +10,9 @@ import Foundation
 import llama
 
 
-/// The ``LLMContextParameters`` represents the context parameters of the LLM.
+/// The ``LLMLocalContextParameters`` represents the context parameters of the LLM.
 /// Internally, these data points are passed as a llama.cpp `llama_context_params` C struct to the LLM.
-public struct LLMContextParameters: Sendable {
+public struct LLMLocalContextParameters: Sendable {
     /// Wrapped C struct from the llama.cpp library, later-on passed to the LLM
     private var wrapped: llama_context_params
     
@@ -132,7 +132,7 @@ public struct LLMContextParameters: Sendable {
         }
     }
     
-    /// Creates the ``LLMContextParameters`` which wrap the underlying llama.cpp `llama_context_params` C struct.
+    /// Creates the ``LLMLocalContextParameters`` which wrap the underlying llama.cpp `llama_context_params` C struct.
     /// Is passed to the underlying llama.cpp model in order to configure the context of the LLM.
     ///
     /// - Parameters:
diff --git a/Sources/SpeziLLMLocal/Configuration/LLMParameters.swift b/Sources/SpeziLLMLocal/Configuration/LLMLocalParameters.swift
similarity index 94%
rename from Sources/SpeziLLMLocal/Configuration/LLMParameters.swift
rename to Sources/SpeziLLMLocal/Configuration/LLMLocalParameters.swift
index cbb52d25..466720a1 100644
--- a/Sources/SpeziLLMLocal/Configuration/LLMParameters.swift
+++ b/Sources/SpeziLLMLocal/Configuration/LLMLocalParameters.swift
@@ -10,9 +10,9 @@ import Foundation
 import llama
 
 
-/// The ``LLMParameters`` represents the parameters of the LLM.
+/// The ``LLMLocalParameters`` represents the parameters of the LLM.
 /// Internally, these data points are passed as a llama.cpp `llama_model_params` C struct to the LLM.
-public struct LLMParameters: Sendable {
+public struct LLMLocalParameters: Sendable {
     /// Typealias for an internal llama.cpp progress callback function
     public typealias LlamaProgressCallback = (@convention(c) (Float, UnsafeMutableRawPointer?) -> Void)
     
@@ -124,12 +124,12 @@ public struct LLMParameters: Sendable {
     }
     
     
-    /// Creates the ``LLMParameters`` which wrap the underlying llama.cpp `llama_model_params` C struct.
+    /// Creates the ``LLMLocalParameters`` which wrap the underlying llama.cpp `llama_model_params` C struct.
     /// Is passed to the underlying llama.cpp model in order to configure the LLM.
     ///
     /// - Parameters:
-    ///   - systemPromot: The to-be-used system prompt of the LLM enabling fine-tuning of the LLMs behaviour. Defaults to the regular Llama2 system prompt.
-    ///   - maxOutputLength: The maximum output length generated by the Spezi `LLM`, defaults to `1024`.
+    ///   - systemPrompt: The to-be-used system prompt of the LLM enabling fine-tuning of the LLMs behaviour. Defaults to the regular Llama2 system prompt.
+    ///   - maxOutputLength: The maximum output length generated by the Spezi `LLM`, defaults to `512`.
     ///   - addBosToken: Indicates wether the BOS token is added by the Spezi `LLM`, defaults to `false`.
     ///   - gpuLayerCount: Number of layers to store in VRAM, defaults to `1`, meaning Apple's `Metal` framework is enabled.
     ///   - mainGpu: GPU that is used for scratch and small tensors, defaults to `0` representing the main GPU.
@@ -141,7 +141,7 @@ public struct LLMParameters: Sendable {
     ///   - useMlock: Forces the system to keep model in RAM, defaults to `false`.
     public init(
         systemPrompt: String = Defaults.defaultLlama2SystemPrompt,
-        maxOutputLength: Int = 1024,
+        maxOutputLength: Int = 512,
         addBosToken: Bool = false,
         gpuLayerCount: Int32 = 1,
         mainGpu: Int32 = 0,
diff --git a/Sources/SpeziLLMLocal/Configuration/LLMSamplingParameters.swift b/Sources/SpeziLLMLocal/Configuration/LLMLocalSamplingParameters.swift
similarity index 97%
rename from Sources/SpeziLLMLocal/Configuration/LLMSamplingParameters.swift
rename to Sources/SpeziLLMLocal/Configuration/LLMLocalSamplingParameters.swift
index 18a75245..00b92cbf 100644
--- a/Sources/SpeziLLMLocal/Configuration/LLMSamplingParameters.swift
+++ b/Sources/SpeziLLMLocal/Configuration/LLMLocalSamplingParameters.swift
@@ -10,9 +10,9 @@ import Foundation
 import llama
 
 
-/// The ``LLMSamplingParameters`` represents the sampling parameters of the LLM.
+/// The ``LLMLocalSamplingParameters`` represents the sampling parameters of the LLM.
 /// Internally, these data points are passed as a llama.cpp `llama_sampling_params` C struct to the LLM.
-public struct LLMSamplingParameters: Sendable {
+public struct LLMLocalSamplingParameters: Sendable {
     /// Helper enum for the Mirostat sampling method
     public enum Mirostat {
         init(rawValue: Int, targetEntropy: Float = 5.0, learningRate: Float = 0.1) {
@@ -241,7 +241,7 @@ public struct LLMSamplingParameters: Sendable {
     }
     
     
-    /// Creates the ``LLMContextParameters`` which wrap the underlying llama.cpp `llama_context_params` C struct.
+    /// Creates the ``LLMLocalContextParameters`` which wrap the underlying llama.cpp `llama_context_params` C struct.
     /// Is passed to the underlying llama.cpp model in order to configure the context of the LLM.
     ///
     /// - Parameters:
diff --git a/Sources/SpeziLLMLocal/LLMLlama+Generation.swift b/Sources/SpeziLLMLocal/LLMLlama+Generation.swift
index 0474ea2b..267be4b7 100644
--- a/Sources/SpeziLLMLocal/LLMLlama+Generation.swift
+++ b/Sources/SpeziLLMLocal/LLMLlama+Generation.swift
@@ -20,10 +20,8 @@ extension LLMLlama {
     /// Based on the input prompt, generate the output with llama.cpp
     ///
     /// - Parameters:
-    ///   - prompt: The input `String` prompt.
     ///   - continuation: A Swift `AsyncThrowingStream` that streams the generated output.
-    func _generate( // swiftlint:disable:this identifier_name function_body_length
-        prompt: String,
+    func _generate( // swiftlint:disable:this identifier_name function_body_length cyclomatic_complexity
         continuation: AsyncThrowingStream<String, Error>.Continuation
     ) async {
         await MainActor.run {
@@ -31,39 +29,48 @@ extension LLMLlama {
         }
         
         // Log the most important parameters of the LLM
-        Self.logger.debug("n_length = \(self.parameters.maxOutputLength, privacy: .public), n_ctx = \(self.contextParameters.contextWindowSize, privacy: .public), n_batch = \(self.contextParameters.batchSize, privacy: .public), n_kv_req = \(self.parameters.maxOutputLength, privacy: .public)")
+        Self.logger.debug("SpeziLLMLocal: n_length = \(self.parameters.maxOutputLength, privacy: .public), n_ctx = \(self.contextParameters.contextWindowSize, privacy: .public), n_batch = \(self.contextParameters.batchSize, privacy: .public), n_kv_req = \(self.parameters.maxOutputLength, privacy: .public)")
         
         // Allocate new model context, if not already present
-        if self.context == nil {
+        if self.modelContext == nil {
             guard let context = llama_new_context_with_model(model, self.contextParameters.llamaCppRepresentation) else {
-                Self.logger.error("Failed to initialize context")
-                continuation.finish(throwing: LLMError.generationError)
+                Self.logger.error("SpeziLLMLocal: Failed to initialize context")
+                await finishGenerationWithError(LLMLlamaError.generationError, on: continuation)
                 return
             }
-            self.context = context
+            self.modelContext = context
         }
 
         // Check if the maximal output generation length is smaller or equals to the context window size.
         guard self.parameters.maxOutputLength <= self.contextParameters.contextWindowSize else {
-            Self.logger.error("Error: n_kv_req \(self.parameters.maxOutputLength, privacy: .public) > n_ctx, the required KV cache size is not big enough")
-            continuation.finish(throwing: LLMError.generationError)
+            Self.logger.error("SpeziLLMLocal: Error: n_kv_req \(self.parameters.maxOutputLength, privacy: .public) > n_ctx, the required KV cache size is not big enough")
+            await finishGenerationWithError(LLMLlamaError.generationError, on: continuation)
             return
         }
         
-        let tokens = tokenize(prompt)
+        // Tokenizes the entire context of the `LLM?
+        guard let tokens = try? await tokenize() else {
+            Self.logger.error("""
+            SpeziLLMLocal: Tokenization failed as illegal context exists.
+            Ensure the content of the context is structured in: System Prompt, User prompt, and an
+            arbitrary number of assistant responses and follow up user prompts.
+            """)
+            await finishGenerationWithError(LLMLlamaError.illegalContext, on: continuation)
+            return
+        }
         
         // Check if the input token count is smaller than the context window size decremented by 4 (space for end tokens).
         guard tokens.count <= self.contextParameters.contextWindowSize - 4 else {
             Self.logger.error("""
-            Input prompt is too long with \(tokens.count, privacy: .public) tokens for the configured
+            SpeziLLMLocal: Input prompt is too long with \(tokens.count, privacy: .public) tokens for the configured
             context window size of \(self.contextParameters.contextWindowSize, privacy: .public) tokens.
             """)
-            continuation.finish(throwing: LLMError.generationError)
+            await finishGenerationWithError(LLMLlamaError.generationError, on: continuation)
             return
         }
         
         // Clear the KV cache in order to free up space for the incoming prompt (as we inject the entire history of the chat again)
-        llama_kv_cache_clear(self.context)
+        llama_kv_cache_clear(self.modelContext)
         
         var batch = llama_batch_init(Int32(tokens.count), 0, 1)
         defer {
@@ -77,9 +84,8 @@ extension LLMLlama {
         // llama_decode will output logits only for the last token of the prompt
         batch.logits[Int(batch.n_tokens) - 1] = 1
         
-        if llama_decode(self.context, batch) != 0 {
-            Self.logger.error("Initial decoding of the input prompt failed.")
-            continuation.finish(throwing: LLMError.generationError)
+        if llama_decode(self.modelContext, batch) != 0 {
+            await finishGenerationWithError(LLMLlamaError.generationError, on: continuation)
             return
         }
         
@@ -97,7 +103,6 @@ extension LLMLlama {
             if nextTokenId == llama_token_eos(self.model)
                 || decodedTokens == self.parameters.maxOutputLength
                 || batchTokenIndex == self.contextParameters.contextWindowSize {
-                self.generatedText.append(Self.EOS)
                 continuation.finish()
                 await MainActor.run {
                     self.state = .ready
@@ -105,14 +110,17 @@ extension LLMLlama {
                 return
             }
             
-            var nextStringPiece = String(llama_token_to_piece(context, nextTokenId))
+            var nextStringPiece = String(llama_token_to_piece(self.modelContext, nextTokenId))
             // As first character is sometimes randomly prefixed by a single space (even though prompt has an additional character)
             if decodedTokens == 0 && nextStringPiece.starts(with: " ") {
                 nextStringPiece = String(nextStringPiece.dropFirst())
             }
             
+            // Yield the response from the model to the Stream
+            Self.logger.debug("""
+            SpeziLLMLocal: Yielded token: \(nextStringPiece, privacy: .public)
+            """)
             continuation.yield(nextStringPiece)
-            self.generatedText.append(nextStringPiece)
             
             // Prepare the next batch
             llama_batch_clear(&batch)
@@ -124,22 +132,19 @@ extension LLMLlama {
             batchTokenIndex += 1
             
             // Evaluate the current batch with the transformer model
-            let decodeOutput = llama_decode(self.context, batch)
+            let decodeOutput = llama_decode(self.modelContext, batch)
             if decodeOutput != 0 {      // = 0 Success, > 0 Warning, < 0 Error
-                Self.logger.error("Decoding of generated output failed. Output: \(decodeOutput, privacy: .public)")
-                await MainActor.run {
-                    self.state = .error(error: .generationError)
-                }
-                continuation.finish(throwing: LLMError.generationError)
+                Self.logger.error("SpeziLLMLocal: Decoding of generated output failed. Output: \(decodeOutput, privacy: .public)")
+                await finishGenerationWithError(LLMLlamaError.generationError, on: continuation)
                 return
             }
         }
         
         let elapsedTime = Date().timeIntervalSince(startTime)
         
-        Self.logger.debug("Decoded \(decodedTokens, privacy: .public) tokens in \(String(format: "%.2f", elapsedTime), privacy: .public) s, speed: \(String(format: "%.2f", Double(decodedTokens) / elapsedTime), privacy: .public)) t/s")
+        Self.logger.debug("SpeziLLMLocal: Decoded \(decodedTokens, privacy: .public) tokens in \(String(format: "%.2f", elapsedTime), privacy: .public) s, speed: \(String(format: "%.2f", Double(decodedTokens) / elapsedTime), privacy: .public)) t/s")
 
-        llama_print_timings(self.context)
+        llama_print_timings(self.modelContext)
          
         continuation.finish()
         await MainActor.run {
diff --git a/Sources/SpeziLLMLocal/LLMLlama+Helpers.swift b/Sources/SpeziLLMLocal/LLMLlama+Helpers.swift
index f3a03089..2eeb74b9 100644
--- a/Sources/SpeziLLMLocal/LLMLlama+Helpers.swift
+++ b/Sources/SpeziLLMLocal/LLMLlama+Helpers.swift
@@ -14,53 +14,45 @@ import llama
 /// Extension of ``LLMLlama`` handling the text tokenization.
 extension LLMLlama {
     /// BOS token of the LLM, used at the start of each prompt passage.
-    static let BOS: String = {
+    private static let BOS: String = {
         "<s>"
     }()
     
     /// EOS token of the LLM, used at the end of each prompt passage.
-    static let EOS: String = {
+    private static let EOS: String = {
         "</s>"
     }()
     
     /// BOSYS token of the LLM, used at the start of the system prompt.
-    static let BOSYS: String = {
+    private static let BOSYS: String = {
         "<<SYS>>"
     }()
     
     /// EOSYS token of the LLM, used at the end of the system prompt.
-    static let EOSYS: String = {
+    private static let EOSYS: String = {
         "<</SYS>>"
     }()
     
     /// BOINST token of the LLM, used at the start of the instruction part of the prompt.
-    static let BOINST: String = {
+    private static let BOINST: String = {
         "[INST]"
     }()
     
     /// EOINST token of the LLM, used at the end of the instruction part of the prompt.
-    static let EOINST: String = {
+    private static let EOINST: String = {
         "[/INST]"
     }()
     
     
-    /// Converts a textual `String` to the individual `LLMLlamaToken`'s based on the model's dictionary.
+    /// Converts the current context of the model to the individual `LLMLlamaToken`'s based on the model's dictionary.
     /// This is a required tasks as LLMs internally processes tokens.
     ///
-    /// - Parameters:
-    ///   - toBeTokenizedText: The input `String` that should be tokenized.
-    ///
     /// - Returns: The tokenized `String` as `LLMLlamaToken`'s.
-    func tokenize(_ toBeTokenizedText: String) -> [LLMLlamaToken] {
-        let formattedPrompt = buildPrompt(with: toBeTokenizedText)
-        if self.generatedText.isEmpty {
-            self.generatedText = formattedPrompt
-        } else {
-            self.generatedText.append(formattedPrompt)
-        }
+    func tokenize() async throws -> [LLMLlamaToken] {
+        let formattedChat = try await formatChat()
         
         var tokens: [LLMLlamaToken] = .init(
-            llama_tokenize_with_context(self.context, std.string(self.generatedText), self.parameters.addBosToken, true)
+            llama_tokenize_with_context(self.modelContext, std.string(formattedChat), self.parameters.addBosToken, true)
         )
         
         // Truncate tokens if there wouldn't be enough context size for the generated output
@@ -72,7 +64,7 @@ extension LLMLlama {
         if tokens.isEmpty {
             tokens.append(llama_token_bos(self.model))
             Self.logger.warning("""
-            The input prompt didn't map to any tokens, so the prompt was considered empty.
+            SpeziLLMLocal: The input prompt didn't map to any tokens, so the prompt was considered empty.
             To mediate this issue, a BOS token was added to the prompt so that the output generation
             doesn't run without any tokens.
             """)
@@ -90,7 +82,7 @@ extension LLMLlama {
     /// - Note: Used only for debug purposes
     func detokenize(tokens: [LLMLlamaToken]) -> [(LLMLlamaToken, String)] {
         tokens.reduce(into: [(LLMLlamaToken, String)]()) { partialResult, token in
-            partialResult.append((token, String(llama_token_to_piece(self.context, token))))
+            partialResult.append((token, String(llama_token_to_piece(self.modelContext, token))))
         }
     }
     
@@ -101,7 +93,7 @@ extension LLMLlama {
     /// - Returns: A sampled `LLMLLamaToken`
     func sample(batchSize: Int32) -> LLMLlamaToken {
         let nVocab = llama_n_vocab(model)
-        let logits = llama_get_logits_ith(self.context, batchSize - 1)
+        let logits = llama_get_logits_ith(self.modelContext, batchSize - 1)
         
         var candidates: [llama_token_data] = .init(repeating: llama_token_data(), count: Int(nVocab))
         
@@ -117,17 +109,17 @@ extension LLMLlama {
         
         // Sample via the temperature method
         let minKeep = Int(max(1, self.samplingParameters.outputProbabilities))
-        llama_sample_top_k(self.context, &candidatesP, self.samplingParameters.topK, minKeep)
-        llama_sample_tail_free(self.context, &candidatesP, self.samplingParameters.tfs, minKeep)
-        llama_sample_typical(self.context, &candidatesP, self.samplingParameters.typicalP, minKeep)
-        llama_sample_top_p(self.context, &candidatesP, self.samplingParameters.topP, minKeep)
-        llama_sample_min_p(self.context, &candidatesP, self.samplingParameters.minP, minKeep)
-        llama_sample_temp(self.context, &candidatesP, self.samplingParameters.temperature)
+        llama_sample_top_k(self.modelContext, &candidatesP, self.samplingParameters.topK, minKeep)
+        llama_sample_tail_free(self.modelContext, &candidatesP, self.samplingParameters.tfs, minKeep)
+        llama_sample_typical(self.modelContext, &candidatesP, self.samplingParameters.typicalP, minKeep)
+        llama_sample_top_p(self.modelContext, &candidatesP, self.samplingParameters.topP, minKeep)
+        llama_sample_min_p(self.modelContext, &candidatesP, self.samplingParameters.minP, minKeep)
+        llama_sample_temp(self.modelContext, &candidatesP, self.samplingParameters.temperature)
         
-        return llama_sample_token(self.context, &candidatesP)
+        return llama_sample_token(self.modelContext, &candidatesP)
     }
     
-    /// Build a typical Llama2 prompt format out of the user's input including the system prompt and all necessary instruction tokens.
+    /// Builds a typical Llama2 prompt format out of the ``LLMLlama/context`` including the system prompt and all necessary instruction tokens.
     ///
     /// The typical format of an Llama2 prompt looks like:
     /// """
@@ -138,22 +130,48 @@ extension LLMLlama {
     /// {user_message_1} [/INST] {model_reply_1}</s><s>[INST] {user_message_2} [/INST]
     /// """
     ///
-    /// - Parameters:
-    ///     - userInputString: String-based input prompt of the user.
     /// - Returns: Properly formatted Llama2 prompt including system prompt.
-    private func buildPrompt(with userInputString: String) -> String {
-        if self.generatedText.isEmpty {
-            """
-            \(Self.BOS)\(Self.BOINST) \(Self.BOSYS)
-            \(self.parameters.systemPrompt)
-            \(Self.EOSYS)
-            
-            \(userInputString) \(Self.EOINST)
-            """ + " "   // Add a spacer to the generated output from the model
-        } else {
-            """
-            \(Self.BOS)\(Self.BOINST) \(userInputString) \(Self.EOINST)
-            """ + " "   // Add a spacer to the generated output from the model
+    private func formatChat() async throws -> String {
+        // Ensure that system prompt as well as a first user prompt exist
+        guard let systemPrompt = await self.context.first,
+              systemPrompt.role == .system,
+              let initialUserPrompt = await self.context.indices.contains(1) ? self.context[1] : nil,
+              initialUserPrompt.role == .user else {
+            throw LLMLlamaError.illegalContext
         }
+        
+        /// Build the initial prompt
+        /// """
+        /// <s>[INST] <<SYS>>
+        /// {your_system_message}
+        /// <</SYS>>
+        ///
+        /// {user_message_1} [/INST]
+        /// """
+        var prompt = """
+        \(Self.BOS)\(Self.BOINST) \(Self.BOSYS)
+        \(systemPrompt.content)
+        \(Self.EOSYS)
+        
+        \(initialUserPrompt.content) \(Self.EOINST)
+        """ + " "   // Add a spacer to the generated output from the model
+        
+        for chatEntry in await self.context.dropFirst(2) {
+            if chatEntry.role == .assistant {
+                /// Append response from assistant
+                /// (already existing: {user_message_1} [/INST]){model_reply_1}</s>
+                prompt += """
+                \(chatEntry.content)\(Self.EOS)
+                """
+            } else if chatEntry.role == .user {
+                /// Append response from user
+                /// <s>[INST] {user_message_2} [/INST]
+                prompt += """
+                \(Self.BOS)\(Self.BOINST) \(chatEntry.content) \(Self.EOINST)
+                """ + " "   // Add a spacer to the generated output from the model
+            }
+        }
+        
+        return prompt
     }
 }
diff --git a/Sources/SpeziLLMLocal/LLMLlama.swift b/Sources/SpeziLLMLocal/LLMLlama.swift
index 912a80ff..fec465d3 100644
--- a/Sources/SpeziLLMLocal/LLMLlama.swift
+++ b/Sources/SpeziLLMLocal/LLMLlama.swift
@@ -9,6 +9,7 @@
 import Foundation
 import llama
 import os
+import SpeziChat
 import SpeziLLM
 
 
@@ -17,67 +18,72 @@ import SpeziLLM
 /// - Important: ``LLMLlama`` shouldn't be used on it's own but always wrapped by the Spezi `LLMRunner` as the runner handles
 /// all management overhead tasks. A code example on how to use ``LLMLlama`` in combination with the `LLMRunner` can be
 /// found in the documentation of the `LLMRunner`.
-public actor LLMLlama: LLM {
+@Observable
+public class LLMLlama: LLM {
     /// A Swift Logger that logs important information from the ``LLMLlama``.
     static let logger = Logger(subsystem: "edu.stanford.spezi", category: "SpeziLLM")
     public let type: LLMHostingType = .local
     @MainActor public var state: LLMState = .uninitialized
+    @MainActor public var context: Chat = []
     
     /// Parameters of the llama.cpp ``LLM``.
-    let parameters: LLMParameters
+    let parameters: LLMLocalParameters
     /// Context parameters of the llama.cpp ``LLM``.
-    let contextParameters: LLMContextParameters
+    let contextParameters: LLMLocalContextParameters
     /// Sampling parameters of the llama.cpp ``LLM``.
-    let samplingParameters: LLMSamplingParameters
+    let samplingParameters: LLMLocalSamplingParameters
     /// The on-device `URL` where the model is located.
     private let modelPath: URL
     /// A pointer to the allocated model via llama.cpp.
-    var model: OpaquePointer?
+    @ObservationIgnored var model: OpaquePointer?
     /// A pointer to the allocated model context from llama.cpp.
-    var context: OpaquePointer?
-    /// Keeps track of all already text being processed by the LLM, including the system prompt, instructions, and model responses.
-    var generatedText: String = ""
+    @ObservationIgnored var modelContext: OpaquePointer?
     
     
     /// Creates a ``LLMLlama`` instance that can then be passed to the `LLMRunner` for execution.
     ///
     /// - Parameters:
     ///   - modelPath: A local `URL` where the LLM file is stored. The format of the LLM must be in the llama.cpp `.gguf` format.
-    ///   - parameters: Parameterize the ``LLMLlama`` via ``LLMParameters``.
-    ///   - contextParameters: Configure the context of the ``LLMLlama`` via ``LLMContextParameters``.
-    ///   - samplingParameters: Parameterize the sampling methods of the ``LLMLlama`` via ``LLMSamplingParameters``.
+    ///   - parameters: Parameterize the ``LLMLlama`` via ``LLMLocalParameters``.
+    ///   - contextParameters: Configure the context of the ``LLMLlama`` via ``LLMLocalContextParameters``.
+    ///   - samplingParameters: Parameterize the sampling methods of the ``LLMLlama`` via ``LLMLocalSamplingParameters``.
     public init(
         modelPath: URL,
-        parameters: LLMParameters = .init(),
-        contextParameters: LLMContextParameters = .init(),
-        samplingParameters: LLMSamplingParameters = .init()
+        parameters: LLMLocalParameters = .init(),
+        contextParameters: LLMLocalContextParameters = .init(),
+        samplingParameters: LLMLocalSamplingParameters = .init()
     ) {
         self.modelPath = modelPath
         self.parameters = parameters
         self.contextParameters = contextParameters
         self.samplingParameters = samplingParameters
+        Task { @MainActor in
+            self.context.append(systemMessage: parameters.systemPrompt)
+        }
     }
     
     
     public func setup(runnerConfig: LLMRunnerConfiguration) async throws {
+        Self.logger.debug("SpeziLLMLocal: Local LLM is being initialized")
         await MainActor.run {
             self.state = .loading
         }
         
         guard let model = llama_load_model_from_file(modelPath.path().cString(using: .utf8), parameters.llamaCppRepresentation) else {
+            Self.logger.error("SpeziLLMLocal: Local LLM file could not be opened, indicating that the model file doesn't exist")
             await MainActor.run {
-                self.state = .error(error: LLMError.modelNotFound)
+                self.state = .error(error: LLMLlamaError.modelNotFound)
             }
-            throw LLMError.modelNotFound
+            throw LLMLlamaError.modelNotFound
         }
         
         /// Check if model was trained for the configured context window size
         guard self.contextParameters.contextWindowSize <= llama_n_ctx_train(model) else {
-            Self.logger.warning("Model was trained on only \(llama_n_ctx_train(model), privacy: .public) context tokens, not the configured \(self.contextParameters.contextWindowSize, privacy: .public) context tokens")
+            Self.logger.error("SpeziLLMLocal: Model was trained on only \(llama_n_ctx_train(model), privacy: .public) context tokens, not the configured \(self.contextParameters.contextWindowSize, privacy: .public) context tokens")
             await MainActor.run {
-                self.state = .error(error: LLMError.generationError)
+                self.state = .error(error: LLMLlamaError.contextSizeMismatch)
             }
-            throw LLMError.modelNotFound
+            throw LLMLlamaError.contextSizeMismatch
         }
         
         self.model = model
@@ -85,16 +91,19 @@ public actor LLMLlama: LLM {
         await MainActor.run {
             self.state = .ready
         }
+        Self.logger.debug("SpeziLLMLocal: Local LLM finished initializing, now ready to use")
     }
     
-    public func generate(prompt: String, continuation: AsyncThrowingStream<String, Error>.Continuation) async {
-        await _generate(prompt: prompt, continuation: continuation)
+    public func generate(continuation: AsyncThrowingStream<String, Error>.Continuation) async {
+        Self.logger.debug("SpeziLLMLocal: Local LLM started a new inference")
+        await _generate(continuation: continuation)
+        Self.logger.debug("SpeziLLMLocal: Local LLM completed an inference")
     }
     
     
     /// Upon deinit, free the context and the model via llama.cpp
     deinit {
-        llama_free(context)
+        llama_free(self.modelContext)
         llama_free_model(self.model)
     }
 }
diff --git a/Sources/SpeziLLMLocal/LLMLlamaError.swift b/Sources/SpeziLLMLocal/LLMLlamaError.swift
new file mode 100644
index 00000000..16e3ba68
--- /dev/null
+++ b/Sources/SpeziLLMLocal/LLMLlamaError.swift
@@ -0,0 +1,71 @@
+//
+// This source file is part of the Stanford Spezi open source project
+//
+// SPDX-FileCopyrightText: 2022 Stanford University and the project authors (see CONTRIBUTORS.md)
+//
+// SPDX-License-Identifier: MIT
+//
+
+import Foundation
+import SpeziLLM
+
+
+/// The ``LLMLlamaError`` describes possible errors that occur during the execution of ``LLMLlama`` via the SpeziLLM `LLMRunner`.
+public enum LLMLlamaError: LLMError {
+    /// Indicates that the local model file is not found.
+    case modelNotFound
+    /// Indicates that the ``LLMLlama`` is not yet ready, e.g., not initialized.
+    case modelNotReadyYet
+    /// Indicates that during generation an error occurred.
+    case generationError
+    /// Indicates error occurring during tokenizing the user input
+    case illegalContext
+    /// Indicates a mismatch between training context tokens and configured tokens
+    case contextSizeMismatch
+    
+    
+    public var errorDescription: String? {
+        switch self {
+        case .modelNotFound:
+            String(localized: LocalizedStringResource("LLM_MODEL_NOT_FOUND_ERROR_DESCRIPTION", bundle: .atURL(from: .module)))
+        case .modelNotReadyYet:
+            String(localized: LocalizedStringResource("LLM_MODEL_NOT_READY_ERROR_DESCRIPTION", bundle: .atURL(from: .module)))
+        case .generationError:
+            String(localized: LocalizedStringResource("LLM_GENERATION_ERROR_DESCRIPTION", bundle: .atURL(from: .module)))
+        case .illegalContext:
+            String(localized: LocalizedStringResource("LLM_ILLEGAL_CONTEXT_ERROR_DESCRIPTION", bundle: .atURL(from: .module)))
+        case .contextSizeMismatch:
+            String(localized: LocalizedStringResource("LLM_CONTEXT_SIZE_MISMATCH_ERROR_DESCRIPTION", bundle: .atURL(from: .module)))
+        }
+    }
+    
+    public var recoverySuggestion: String? {
+        switch self {
+        case .modelNotFound:
+            String(localized: LocalizedStringResource("LLM_MODEL_NOT_FOUND_RECOVERY_SUGGESTION", bundle: .atURL(from: .module)))
+        case .modelNotReadyYet:
+            String(localized: LocalizedStringResource("LLM_MODEL_NOT_READY_RECOVERY_SUGGESTION", bundle: .atURL(from: .module)))
+        case .generationError:
+            String(localized: LocalizedStringResource("LLM_GENERATION_ERROR_RECOVERY_SUGGESTION", bundle: .atURL(from: .module)))
+        case .illegalContext:
+            String(localized: LocalizedStringResource("LLM_ILLEGAL_CONTEXT_ERROR_RECOVERY_SUGGESTION", bundle: .atURL(from: .module)))
+        case .contextSizeMismatch:
+            String(localized: LocalizedStringResource("LLM_CONTEXT_SIZE_MISMATCH_ERROR_RECOVERY_SUGGESTION", bundle: .atURL(from: .module)))
+        }
+    }
+
+    public var failureReason: String? {
+        switch self {
+        case .modelNotFound:
+            String(localized: LocalizedStringResource("LLM_MODEL_NOT_FOUND_FAILURE_REASON", bundle: .atURL(from: .module)))
+        case .modelNotReadyYet:
+            String(localized: LocalizedStringResource("LLM_MODEL_NOT_READY_FAILURE_REASON", bundle: .atURL(from: .module)))
+        case .generationError:
+            String(localized: LocalizedStringResource("LLM_GENERATION_ERROR_FAILURE_REASON", bundle: .atURL(from: .module)))
+        case .illegalContext:
+            String(localized: LocalizedStringResource("LLM_ILLEGAL_CONTEXT_ERROR_FAILURE_REASON", bundle: .atURL(from: .module)))
+        case .contextSizeMismatch:
+            String(localized: LocalizedStringResource("LLM_CONTEXT_SIZE_MISMATCH_ERROR_FAILURE_REASON", bundle: .atURL(from: .module)))
+        }
+    }
+}
diff --git a/Sources/SpeziLLMLocal/Resources/Localizable.xcstrings b/Sources/SpeziLLMLocal/Resources/Localizable.xcstrings
index d2597c78..533cedcb 100644
--- a/Sources/SpeziLLMLocal/Resources/Localizable.xcstrings
+++ b/Sources/SpeziLLMLocal/Resources/Localizable.xcstrings
@@ -1,6 +1,156 @@
 {
   "sourceLanguage" : "en",
   "strings" : {
+    "LLM_CONTEXT_SIZE_MISMATCH_ERROR_DESCRIPTION" : {
+      "localizations" : {
+        "en" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "The configured LLM context size mismatched with the model."
+          }
+        }
+      }
+    },
+    "LLM_CONTEXT_SIZE_MISMATCH_ERROR_FAILURE_REASON" : {
+      "localizations" : {
+        "en" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "The LLM was trained on a context size that is smaller than the configured LLM context size."
+          }
+        }
+      }
+    },
+    "LLM_CONTEXT_SIZE_MISMATCH_ERROR_RECOVERY_SUGGESTION" : {
+      "localizations" : {
+        "en" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "Please ensure that the configured LLM context size is smaller or equal to the context the model was trained upon."
+          }
+        }
+      }
+    },
+    "LLM_GENERATION_ERROR_DESCRIPTION" : {
+      "localizations" : {
+        "en" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "Generation of LLM output failed."
+          }
+        }
+      }
+    },
+    "LLM_GENERATION_ERROR_FAILURE_REASON" : {
+      "localizations" : {
+        "en" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "An unknown error has occurred during the generation of the output."
+          }
+        }
+      }
+    },
+    "LLM_GENERATION_ERROR_RECOVERY_SUGGESTION" : {
+      "localizations" : {
+        "en" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "Ensure that the device has enough free computing and memory resources. Try restarting the application."
+          }
+        }
+      }
+    },
+    "LLM_ILLEGAL_CONTEXT_ERROR_DESCRIPTION" : {
+      "localizations" : {
+        "en" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "The LLM context is in an illegal state."
+          }
+        }
+      }
+    },
+    "LLM_ILLEGAL_CONTEXT_ERROR_FAILURE_REASON" : {
+      "localizations" : {
+        "en" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "The LLM context doesn't contain the required messages such as system prompt and a user message. These messages are required before triggering an inference."
+          }
+        }
+      }
+    },
+    "LLM_ILLEGAL_CONTEXT_ERROR_RECOVERY_SUGGESTION" : {
+      "localizations" : {
+        "en" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "Please retry the query or restart the applicaiton."
+          }
+        }
+      }
+    },
+    "LLM_MODEL_NOT_FOUND_ERROR_DESCRIPTION" : {
+      "localizations" : {
+        "en" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "LLM file not found."
+          }
+        }
+      }
+    },
+    "LLM_MODEL_NOT_FOUND_FAILURE_REASON" : {
+      "localizations" : {
+        "en" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "The specified LLM file could not be found on the device."
+          }
+        }
+      }
+    },
+    "LLM_MODEL_NOT_FOUND_RECOVERY_SUGGESTION" : {
+      "localizations" : {
+        "en" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "Ensure that the LLM file is downloaded successfully and properly stored on the device. Try restarting the application."
+          }
+        }
+      }
+    },
+    "LLM_MODEL_NOT_READY_ERROR_DESCRIPTION" : {
+      "localizations" : {
+        "en" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "LLM not ready yet."
+          }
+        }
+      }
+    },
+    "LLM_MODEL_NOT_READY_FAILURE_REASON" : {
+      "localizations" : {
+        "en" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "The LLM needs to be initialized before usage."
+          }
+        }
+      }
+    },
+    "LLM_MODEL_NOT_READY_RECOVERY_SUGGESTION" : {
+      "localizations" : {
+        "en" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "Make sure that the application initialized the LLM properly. Try restarting the application."
+          }
+        }
+      }
+    },
     "SPEZI_LLM_LOCAL_LLAMA_SYSTEM_PROMPT" : {
       "localizations" : {
         "en" : {
diff --git a/Sources/SpeziLLMLocal/SpeziLLMLocal.docc/SpeziLLMLocal.md b/Sources/SpeziLLMLocal/SpeziLLMLocal.docc/SpeziLLMLocal.md
index 7956ce54..c3521bdc 100644
--- a/Sources/SpeziLLMLocal/SpeziLLMLocal.docc/SpeziLLMLocal.md
+++ b/Sources/SpeziLLMLocal/SpeziLLMLocal.docc/SpeziLLMLocal.md
@@ -28,14 +28,14 @@ You need to add the SpeziLLM Swift package to
 
 ## Spezi LLM Local Components
 
-The core component of the ``SpeziLLMLocal`` target is the ``LLMLlama`` [Actor](https://developer.apple.com/documentation/swift/actor) which conforms to the [`LLM` protocol of SpeziLLM](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillm/llm). ``LLMLlama`` heavily utilizes the [llama.cpp library](https://github.com/ggerganov/llama.cpp) to perform the inference of the Language Model. 
+The core component of the ``SpeziLLMLocal`` target is the ``LLMLlama`` class which conforms to the [`LLM` protocol of SpeziLLM](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillm/llm). ``LLMLlama`` heavily utilizes the [llama.cpp library](https://github.com/ggerganov/llama.cpp) to perform the inference of the Language Model. 
 
 > Important: To execute a LLM locally, ``LLMLlama`` requires the model file being present on the local device. 
 > The model must be in the popular `.gguf` format introduced by the [llama.cpp library](https://github.com/ggerganov/llama.cpp)
 
 > Tip: In order to download the model file of the Language model to the local device, SpeziLLM provides the [SpeziLLMLocalDownload](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillmlocaldownload) target which provides model download and storage functionalities.
 
-``LLMLlama`` offers a variety of configuration possibilities, such as the used model file, the context window, the maximum output size or the batch size. These options can be set via the ``LLMLlama/init(modelPath:parameters:contextParameters:samplingParameters:)`` initializer and the ``LLMParameters``, ``LLMContextParameters``, and ``LLMSamplingParameters`` types. Keep in mind that the model file must be in the popular `.gguf` format!
+``LLMLlama`` offers a variety of configuration possibilities, such as the used model file, the context window, the maximum output size or the batch size. These options can be set via the ``LLMLlama/init(modelPath:parameters:contextParameters:samplingParameters:)`` initializer and the ``LLMLocalParameters``, ``LLMLocalContextParameters``, and ``LLMLocalSamplingParameters`` types. Keep in mind that the model file must be in the popular `.gguf` format!
 
 - Important: ``LLMLlama`` shouldn't be used on it's own but always wrapped by the Spezi `LLMRunner` as the runner handles all management overhead tasks.
 
@@ -65,17 +65,24 @@ class LocalLLMAppDelegate: SpeziAppDelegate {
 The code example below showcases the interaction with the ``LLMLlama`` through the the [SpeziLLM](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillm) [`LLMRunner`](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillm/llmrunner).
 Based on a `String` prompt, the `LLMGenerationTask/generate(prompt:)` method returns an `AsyncThrowingStream` which yields the inferred characters until the generation has completed.
 
+The ``LLMLlama`` contains the ``LLMLlama/context`` property which holds the entire history of the model interactions.
+This includes the system prompt, user input, but also assistant responses.
+Ensure the property always contains all necessary information, as the ``LLMLlama/generate(continuation:)`` function executes the inference based on the ``LLMLlama/context``
+
+> Tip: The model can be queried via the `LLMGenerationTask/generate()` and `LLMGenerationTask/generate(prompt:)` calls (returned from wrapping the ``LLMLlama`` in the `LLMRunner` from the [SpeziLLM](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillm) target).
+    The first method takes no input prompt at all but uses the current context of the model (so `LLM/context`) to query the model.
+    The second takes a `String`-based input from the user and appends it to the  context of the model (so `LLM/context`) before querying the model.
+
 > Important: The ``LLMLlama`` should only be used together with the [SpeziLLM](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillm) [`LLMRunner`](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillm/llmrunner)!
 
 ```swift
 struct LocalLLMChatView: View {
-   @Environment(LLMRunner.self) private var runner: LLMRunner
+   @Environment(LLMRunner.self) var runner: LLMRunner
 
    // The locally executed LLM
-   private let model: LLMLlama = .init(
+   @State var model: LLMLlama = .init(
         modelPath: ...
    )
-
    @State var responseText: String
 
    func executePrompt(prompt: String) {
@@ -97,9 +104,9 @@ struct LocalLLMChatView: View {
 
 ### Configuration
 
-- ``LLMParameters``
-- ``LLMContextParameters``
-- ``LLMSamplingParameters``
+- ``LLMLocalParameters``
+- ``LLMLocalContextParameters``
+- ``LLMLocalSamplingParameters``
 
 ### Setup
 
diff --git a/Sources/SpeziLLMOpenAI/Configuration/LLMOpenAIModelParameters.swift b/Sources/SpeziLLMOpenAI/Configuration/LLMOpenAIModelParameters.swift
new file mode 100644
index 00000000..e07cb541
--- /dev/null
+++ b/Sources/SpeziLLMOpenAI/Configuration/LLMOpenAIModelParameters.swift
@@ -0,0 +1,76 @@
+//
+// This source file is part of the Stanford Spezi open source project
+//
+// SPDX-FileCopyrightText: 2022 Stanford University and the project authors (see CONTRIBUTORS.md)
+//
+// SPDX-License-Identifier: MIT
+//
+
+import Foundation
+import OpenAI
+
+
+/// Represents the model-specific parameters of OpenAIs LLMs.
+public struct LLMOpenAIModelParameters: Sendable {
+    /// The format for model responses.
+    let responseFormat: ResponseFormat?
+    /// The sampling temperature (0 to 2). Higher values increase randomness, lower values enhance focus.
+    let temperature: Double?
+    /// Nucleus sampling threshold. Considers tokens with top_p probability mass. Alternative to temperature sampling.
+    let topP: Double?
+    /// The number of generated chat completions per input.
+    let completionsPerOutput: Int?
+    /// Sequences (up to 4) where generation stops. Output doesn't include these sequences.
+    let stopSequence: [String]
+    /// Maximum token count for each completion.
+    let maxOutputLength: Int?
+    /// Adjusts new topic exploration (-2.0 to 2.0). Higher values encourage novelty.
+    let presencePenalty: Double?
+    /// Controls repetition (-2.0 to 2.0). Higher values reduce the likelihood of repeating content.
+    let frequencyPenalty: Double?
+    /// Alters specific token's likelihood in completion.
+    let logitBias: [String: Int]
+    /// Unique identifier for the end-user, aiding in abuse monitoring.
+    let user: String?
+    
+    
+    /// Initializes ``LLMOpenAIModelParameters`` for OpenAI model configuration.
+    ///
+    /// - Parameters:
+    ///   - responseFormat: Format for model responses.
+    ///   - temperature: Sampling temperature (0 to 2); higher values (e.g., 0.8) increase randomness, lower values (e.g., 0.2) enhance focus. Adjust this or topP, not both.
+    ///   - topP: Nucleus sampling threshold; considers tokens with top_p probability mass. Alternative to temperature sampling.
+    ///   - completionsPerOutput: Number of generated chat completions per input.
+    ///   - stopSequence: Sequences (up to 4) where generation stops; output doesn't include these sequences.
+    ///   - maxOutputLength: Maximum token count for each completion.
+    ///   - presencePenalty: Adjusts new topic exploration (-2.0 to 2.0); higher values encourage novelty.
+    ///   - frequencyPenalty: Controls repetition (-2.0 to 2.0); higher values reduce likelihood of repeating content.
+    ///   - logitBias: Alters specific token's likelihood in completion.
+    ///   - user: Unique identifier for the end-user, aiding in abuse monitoring.
+    public init(
+        responseFormat: ResponseFormat? = nil,
+        temperature: Double? = nil,
+        topP: Double? = nil,
+        completionsPerOutput: Int? = nil,
+        stopSequence: [String] = [],
+        maxOutputLength: Int? = nil,
+        presencePenalty: Double? = nil,
+        frequencyPenalty: Double? = nil,
+        logitBias: [String: Int] = [:],
+        user: String? = nil
+    ) {
+        self.responseFormat = responseFormat
+        self.temperature = temperature
+        self.topP = topP
+        self.completionsPerOutput = completionsPerOutput
+        self.stopSequence = stopSequence
+        self.maxOutputLength = maxOutputLength
+        self.presencePenalty = presencePenalty
+        self.frequencyPenalty = frequencyPenalty
+        self.logitBias = logitBias
+        self.user = user
+    }
+}
+
+
+extension ResponseFormat: @unchecked Sendable {}
diff --git a/Sources/SpeziLLMOpenAI/Configuration/LLMOpenAIParameters.swift b/Sources/SpeziLLMOpenAI/Configuration/LLMOpenAIParameters.swift
new file mode 100644
index 00000000..8a209655
--- /dev/null
+++ b/Sources/SpeziLLMOpenAI/Configuration/LLMOpenAIParameters.swift
@@ -0,0 +1,45 @@
+//
+// This source file is part of the Stanford Spezi open source project
+//
+// SPDX-FileCopyrightText: 2022 Stanford University and the project authors (see CONTRIBUTORS.md)
+//
+// SPDX-License-Identifier: MIT
+//
+
+import Foundation
+import OpenAI
+
+
+/// Represents the parameters of OpenAIs LLMs.
+public struct LLMOpenAIParameters: Sendable {
+    /// Defaults of possible LLMs parameter settings.
+    public enum Defaults {
+        public static let defaultOpenAISystemPrompt: String = {
+            String(localized: LocalizedStringResource("SPEZI_LLM_OPENAI_SYSTEM_PROMPT", bundle: .atURL(from: .module)))
+        }()
+    }
+    
+    /// The to-be-used OpenAI model.
+    let modelType: Model
+    /// The to-be-used system prompt of the LLM.
+    let systemPrompt: String
+    /// Separate OpenAI token that overrides the one defined within the ``LLMRemoteRunnerSetupTask``.
+    let overwritingToken: String?
+    
+    
+    /// Creates the ``LLMOpenAIParameters``.
+    ///
+    /// - Parameters:
+    ///   - modelType: The to-be-used OpenAI model such as GPT3.5 or GPT4.
+    ///   - systemPrompt: The to-be-used system prompt of the LLM enabling fine-tuning of the LLMs behaviour. Defaults to the regular OpenAI chat-based GPT system prompt.
+    ///   - overwritingToken: Separate OpenAI token that overrides the one defined within the ``LLMOpenAIRunnerSetupTask``.
+    public init(
+        modelType: Model,
+        systemPrompt: String = Defaults.defaultOpenAISystemPrompt,
+        overwritingToken: String? = nil
+    ) {
+        self.modelType = modelType
+        self.systemPrompt = systemPrompt
+        self.overwritingToken = overwritingToken
+    }
+}
diff --git a/Sources/SpeziLLMOpenAI/Helpers/Chat+OpenAI.swift b/Sources/SpeziLLMOpenAI/Helpers/Chat+OpenAI.swift
new file mode 100644
index 00000000..c7396f46
--- /dev/null
+++ b/Sources/SpeziLLMOpenAI/Helpers/Chat+OpenAI.swift
@@ -0,0 +1,23 @@
+//
+// This source file is part of the Stanford Spezi open source project
+//
+// SPDX-FileCopyrightText: 2022 Stanford University and the project authors (see CONTRIBUTORS.md)
+//
+// SPDX-License-Identifier: MIT
+//
+
+import SpeziChat
+import struct OpenAI.Chat
+
+
+extension SpeziChat.ChatEntity.Role {
+    /// Maps the `SpeziChat/ChatEntity/Role`s to the `OpenAI/Chat/Role`s.
+    var openAIRepresentation: OpenAI.Chat.Role {
+        switch self {
+        case .assistant: .assistant
+        case .user: .user
+        case .system: .system
+        case .function: .function
+        }
+    }
+}
diff --git a/Sources/SpeziLLMOpenAI/OpenAIConstants.swift b/Sources/SpeziLLMOpenAI/Helpers/LLMOpenAIConstants.swift
similarity index 78%
rename from Sources/SpeziLLMOpenAI/OpenAIConstants.swift
rename to Sources/SpeziLLMOpenAI/Helpers/LLMOpenAIConstants.swift
index 57fb255c..eaa645fa 100644
--- a/Sources/SpeziLLMOpenAI/OpenAIConstants.swift
+++ b/Sources/SpeziLLMOpenAI/Helpers/LLMOpenAIConstants.swift
@@ -7,8 +7,8 @@
 //
 
 
-enum OpenAIConstants {
-    static let modelStorageKey = "OpenAIGPT.Model"
+/// Constants used throughout the `SpeziLLMOpenAI` target.
+enum LLMOpenAIConstants {
     static let credentialsServer = "openapi.org"
     static let credentialsUsername = "OpenAIGPT"
 }
diff --git a/Sources/SpeziLLMOpenAI/LLMOpenAI+Configuration.swift b/Sources/SpeziLLMOpenAI/LLMOpenAI+Configuration.swift
new file mode 100644
index 00000000..77bfed63
--- /dev/null
+++ b/Sources/SpeziLLMOpenAI/LLMOpenAI+Configuration.swift
@@ -0,0 +1,43 @@
+//
+// This source file is part of the Stanford Spezi open source project
+//
+// SPDX-FileCopyrightText: 2022 Stanford University and the project authors (see CONTRIBUTORS.md)
+//
+// SPDX-License-Identifier: MIT
+//
+
+import OpenAI
+
+extension LLMOpenAI {
+    /// Map the ``LLMOpenAI/context`` to the OpenAI `[Chat]` representation.
+    private var openAIContext: [Chat] {
+        get async {
+            await self.context.map { chatEntity in
+                Chat(
+                    role: chatEntity.role.openAIRepresentation,
+                    content: chatEntity.content
+                )
+            }
+        }
+    }
+    
+    /// Provides the ``LLMOpenAI/context``, the `` LLMOpenAIParameters`` and the ``LLMOpenAIModelParameters`` in an OpenAI `ChatQuery` representation used for querying the API.
+    var openAIChatQuery: ChatQuery {
+        get async {
+            await .init(
+                model: self.parameters.modelType,
+                messages: self.openAIContext,
+                responseFormat: self.modelParameters.responseFormat,
+                temperature: self.modelParameters.temperature,
+                topP: self.modelParameters.topP,
+                n: self.modelParameters.completionsPerOutput,
+                stop: self.modelParameters.stopSequence.isEmpty ? nil : self.modelParameters.stopSequence,
+                maxTokens: self.modelParameters.maxOutputLength,
+                presencePenalty: self.modelParameters.presencePenalty,
+                frequencyPenalty: self.modelParameters.frequencyPenalty,
+                logitBias: self.modelParameters.logitBias.isEmpty ? nil : self.modelParameters.logitBias,
+                user: self.modelParameters.user
+            )
+        }
+    }
+}
diff --git a/Sources/SpeziLLMOpenAI/LLMOpenAI.swift b/Sources/SpeziLLMOpenAI/LLMOpenAI.swift
new file mode 100644
index 00000000..591ffc59
--- /dev/null
+++ b/Sources/SpeziLLMOpenAI/LLMOpenAI.swift
@@ -0,0 +1,189 @@
+//
+// This source file is part of the Stanford Spezi open source project
+//
+// SPDX-FileCopyrightText: 2022 Stanford University and the project authors (see CONTRIBUTORS.md)
+//
+// SPDX-License-Identifier: MIT
+//
+
+import Foundation
+import struct OpenAI.Chat
+import struct OpenAI.ChatFunctionDeclaration
+import struct OpenAI.ChatQuery
+import class OpenAI.OpenAI
+import struct OpenAI.Model
+import struct OpenAI.ChatStreamResult
+import struct OpenAI.APIErrorResponse
+import os
+import SpeziChat
+import SpeziLLM
+
+
+/// The ``LLMOpenAI`` is a Spezi `LLM` and utilizes the OpenAI API to generate output via the OpenAI GPT models.
+/// ``LLMOpenAI`` provides access to text-based models from OpenAI, such as GPT-3.5 or GPT-4.
+///
+/// - Important: ``LLMOpenAI`` shouldn't be used on it's own but always wrapped by the Spezi `LLMRunner` as the runner handles
+/// all management overhead tasks.
+///
+/// ### Usage
+///
+/// The code section below showcases a complete code example on how to use the ``LLMOpenAI`` in combination with a `LLMRunner` from the [SpeziLLM](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillm) target.
+///
+/// - Important: The model can be queried via the `LLMGenerationTask/generate()` and `LLMGenerationTask/generate(prompt:)` calls (returned from wrapping the ``LLMOpenAI`` in the `LLMRunner` from the [SpeziLLM](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillm) target).
+/// The first method takes no input prompt at all but uses the current context of the model (so `LLM/context`) to query the model.
+/// The second takes a `String`-based input from the user and appends it to the  context of the model (so `LLM/context`) before querying the model.
+///
+/// ```swift
+/// class LLMOpenAIAppDelegate: SpeziAppDelegate {
+///     override var configuration: Configuration {
+///         Configuration {
+///             LLMRunner {
+///                 LLMOpenAIRunnerSetupTask()
+///             }
+///         }
+///     }
+/// }
+///
+/// struct LLMOpenAIChatView: View {
+///    // The runner responsible for executing the OpenAI LLM.
+///    @Environment(LLMRunner.self) var runner: LLMRunner
+///
+///    // The OpenAI LLM
+///    @State var model: LLMOpenAI = .init(
+///         parameters: .init(
+///             modelType: .gpt3_5Turbo,
+///             systemPrompt: "You're a helpful assistant that answers questions from users.",
+///             overwritingToken: "abc123"
+///         )
+///    )
+///    @State var responseText: String
+///
+///    func executePrompt(prompt: String) {
+///         // Execute the query on the runner, returning a stream of outputs
+///         let stream = try await runner(with: model).generate(prompt: "Hello LLM!")
+///
+///         for try await token in stream {
+///             responseText.append(token)
+///         }
+///    }
+/// }
+/// ```
+@Observable
+public class LLMOpenAI: LLM {
+    /// A Swift Logger that logs important information from the ``LLMOpenAI``.
+    static let logger = Logger(subsystem: "edu.stanford.spezi", category: "SpeziLLM")
+    
+    @MainActor public var state: LLMState = .uninitialized
+    @MainActor public var context: SpeziChat.Chat = []
+    
+    public let type: LLMHostingType = .cloud
+    let parameters: LLMOpenAIParameters
+    let modelParameters: LLMOpenAIModelParameters
+    @ObservationIgnored private var wrappedModel: OpenAI?
+    
+    
+    private var model: OpenAI {
+        guard let model = wrappedModel else {
+            preconditionFailure("""
+            SpeziLLMOpenAII: Illegal Access - Tried to access the wrapped OpenAI model of `LLMOpenAI` before being initialized.
+            Ensure that the `LLMOpenAIRunnerSetupTask` is passed to the `LLMRunner` within the Spezi `Configuration`.
+            """)
+        }
+        return model
+    }
+    
+    
+    /// Creates a ``LLMOpenAI`` instance that can then be passed to the `LLMRunner` for execution.
+    ///
+    /// - Parameters:
+    ///    - parameters: LLM Parameters
+    ///    - modelParameters: LLM Model Parameters
+    public init(
+        parameters: LLMOpenAIParameters,
+        modelParameters: LLMOpenAIModelParameters = .init()
+    ) {
+        self.parameters = parameters
+        self.modelParameters = modelParameters
+        Task { @MainActor in
+            self.context.append(systemMessage: parameters.systemPrompt)
+        }
+    }
+    
+    
+    public func setup(runnerConfig: LLMRunnerConfiguration) async throws {
+        await MainActor.run {
+            self.state = .loading
+        }
+        
+        // Overwrite API token if passed
+        if let overwritingToken = self.parameters.overwritingToken {
+            self.wrappedModel = OpenAI(
+                configuration: .init(
+                    token: overwritingToken,
+                    organizationIdentifier: LLMOpenAIRunnerSetupTask.openAIModel.configuration.organizationIdentifier,
+                    host: LLMOpenAIRunnerSetupTask.openAIModel.configuration.host,
+                    timeoutInterval: LLMOpenAIRunnerSetupTask.openAIModel.configuration.timeoutInterval
+                )
+            )
+        } else {
+            self.wrappedModel = LLMOpenAIRunnerSetupTask.openAIModel
+        }
+        
+        do {
+            _ = try await self.model.model(query: .init(model: self.parameters.modelType))
+        } catch let error as URLError {
+            throw LLMOpenAIError.connectivityIssues(error)
+        } catch {
+            LLMOpenAI.logger.error("""
+            SpeziLLMOpenAI: Couldn't access the specified OpenAI model.
+            Ensure the model exists and the configured API key is able to access the model.
+            """)
+            throw LLMOpenAIError.modelAccessError(error)
+        }
+        
+        await MainActor.run {
+            self.state = .ready
+        }
+    }
+    
+    public func generate(continuation: AsyncThrowingStream<String, Error>.Continuation) async {
+        Self.logger.debug("SpeziLLMOpenAI: OpenAI GPT started a new inference")
+        
+        let chatStream: AsyncThrowingStream<ChatStreamResult, Error> = await self.model.chatsStream(query: self.openAIChatQuery)
+        
+        do {
+            for try await chatStreamResult in chatStream {
+                guard let yieldedToken = chatStreamResult.choices.first?.delta.content,
+                      !yieldedToken.isEmpty else {
+                    continue
+                }
+                
+                LLMOpenAI.logger.debug("""
+                SpeziLLMOpenAI: Yielded token: \(yieldedToken, privacy: .public)
+                """)
+                continuation.yield(yieldedToken)
+            }
+            
+            continuation.finish()
+        } catch let error as APIErrorResponse {
+            if error.error.code == LLMOpenAIError.insufficientQuota.openAIErrorMessage {
+                LLMOpenAI.logger.error("""
+                SpeziLLMOpenAI: Quota limit of OpenAI is reached. Ensure the configured API key has enough resources.
+                """)
+                await finishGenerationWithError(LLMOpenAIError.insufficientQuota, on: continuation)
+            } else {
+                LLMOpenAI.logger.error("""
+                SpeziLLMOpenAI: OpenAI inference failed with a generation error.
+                """)
+                await finishGenerationWithError(LLMOpenAIError.generationError, on: continuation)
+            }
+        } catch {
+            LLMOpenAI.logger.error("""
+            SpeziLLMOpenAI: OpenAI inference failed with a generation error.
+            """)
+            await finishGenerationWithError(LLMOpenAIError.generationError, on: continuation)
+        }
+        
+        Self.logger.debug("SpeziLLMOpenAI: OpenAI GPT completed an inference")
+    }
+}
diff --git a/Sources/SpeziLLMOpenAI/LLMOpenAIError.swift b/Sources/SpeziLLMOpenAI/LLMOpenAIError.swift
new file mode 100644
index 00000000..e94de2f1
--- /dev/null
+++ b/Sources/SpeziLLMOpenAI/LLMOpenAIError.swift
@@ -0,0 +1,108 @@
+//
+// This source file is part of the Stanford Spezi open source project
+//
+// SPDX-FileCopyrightText: 2022 Stanford University and the project authors (see CONTRIBUTORS.md)
+//
+// SPDX-License-Identifier: MIT
+//
+
+import Foundation
+import SpeziLLM
+
+
+/// Errors that can occur by interacting with the OpenAI API.
+public enum LLMOpenAIError: LLMError {
+    /// OpenAI API token is invalid.
+    case invalidAPIToken
+    /// Connectivity error
+    case connectivityIssues(URLError)
+    /// Couldn't store the OpenAI token to a secure storage location
+    case storageError
+    /// Quota limit reached
+    case insufficientQuota
+    /// Error during generation
+    case generationError
+    /// Error during accessing the OpenAI Model
+    case modelAccessError(Error)
+    /// Unknown error
+    case unknownError(Error)
+    
+    
+    /// Maps the enum cases to error message from the OpenAI API
+    var openAIErrorMessage: String? {
+        switch self {
+        case .invalidAPIToken: "invalid_api_key"
+        case .insufficientQuota: "insufficient_quota"
+        default: nil
+        }
+    }
+    
+    public var errorDescription: String? {
+        switch self {
+        case .invalidAPIToken:
+            String(localized: LocalizedStringResource("LLM_INVALID_TOKEN_ERROR_DESCRIPTION", bundle: .atURL(from: .module)))
+        case .connectivityIssues:
+            String(localized: LocalizedStringResource("LLM_CONNECTIVITY_ERROR_DESCRIPTION", bundle: .atURL(from: .module)))
+        case .storageError:
+            String(localized: LocalizedStringResource("LLM_STORAGE_ERROR_DESCRIPTION", bundle: .atURL(from: .module)))
+        case .insufficientQuota:
+            String(localized: LocalizedStringResource("LLM_INSUFFICIENT_QUOTA_ERROR_DESCRIPTION", bundle: .atURL(from: .module)))
+        case .generationError:
+            String(localized: LocalizedStringResource("LLM_GENERATION_ERROR_DESCRIPTION", bundle: .atURL(from: .module)))
+        case .modelAccessError:
+            String(localized: LocalizedStringResource("LLM_MODEL_ACCESS_ERROR_DESCRIPTION", bundle: .atURL(from: .module)))
+        case .unknownError:
+            String(localized: LocalizedStringResource("LLM_UNKNOWN_ERROR_DESCRIPTION", bundle: .atURL(from: .module)))
+        }
+    }
+    
+    public var recoverySuggestion: String? {
+        switch self {
+        case .invalidAPIToken:
+            String(localized: LocalizedStringResource("LLM_INVALID_TOKEN_RECOVERY_SUGGESTION", bundle: .atURL(from: .module)))
+        case .connectivityIssues:
+            String(localized: LocalizedStringResource("LLM_CONNECTIVITY_ERROR_RECOVERY_SUGGESTION", bundle: .atURL(from: .module)))
+        case .storageError:
+            String(localized: LocalizedStringResource("LLM_STORAGE_ERROR_RECOVERY_SUGGESTION", bundle: .atURL(from: .module)))
+        case .insufficientQuota:
+            String(localized: LocalizedStringResource("LLM_INSUFFICIENT_QUOTA_RECOVERY_SUGGESTION", bundle: .atURL(from: .module)))
+        case .generationError:
+            String(localized: LocalizedStringResource("LLM_GENERATION_ERROR_RECOVERY_SUGGESTION", bundle: .atURL(from: .module)))
+        case .modelAccessError:
+            String(localized: LocalizedStringResource("LLM_MODEL_ACCESS_ERROR_RECOVERY_SUGGESTION", bundle: .atURL(from: .module)))
+        case .unknownError:
+            String(localized: LocalizedStringResource("LLM_UNKNOWN_ERROR_RECOVERY_SUGGESTION", bundle: .atURL(from: .module)))
+        }
+    }
+
+    public var failureReason: String? {
+        switch self {
+        case .invalidAPIToken:
+            String(localized: LocalizedStringResource("LLM_INVALID_TOKEN_FAILURE_REASON", bundle: .atURL(from: .module)))
+        case .connectivityIssues:
+            String(localized: LocalizedStringResource("LLM_CONNECTIVITY_ERROR_FAILURE_REASON", bundle: .atURL(from: .module)))
+        case .storageError:
+            String(localized: LocalizedStringResource("LLM_STORAGE_ERROR_FAILURE_REASON", bundle: .atURL(from: .module)))
+        case .insufficientQuota:
+            String(localized: LocalizedStringResource("LLM_INSUFFICIENT_QUOTA_FAILURE_REASON", bundle: .atURL(from: .module)))
+        case .generationError:
+            String(localized: LocalizedStringResource("LLM_GENERATION_ERROR_FAILURE_REASON", bundle: .atURL(from: .module)))
+        case .modelAccessError:
+            String(localized: LocalizedStringResource("LLM_MODEL_ACCESS_ERROR_FAILURE_REASON", bundle: .atURL(from: .module)))
+        case .unknownError:
+            String(localized: LocalizedStringResource("LLM_UNKNOWN_ERROR_FAILURE_REASON", bundle: .atURL(from: .module)))
+        }
+    }
+    
+    
+    public static func == (lhs: LLMOpenAIError, rhs: LLMOpenAIError) -> Bool {
+        switch (lhs, rhs) {
+        case (.invalidAPIToken, .invalidAPIToken): true
+        case (.connectivityIssues, .connectivityIssues): true
+        case (.insufficientQuota, .insufficientQuota): true
+        case (.generationError, .generationError): true
+        case (.unknownError, .unknownError): true
+        default: false
+        }
+    }
+}
diff --git a/Sources/SpeziLLMOpenAI/LLMOpenAIRunnerSetup.swift b/Sources/SpeziLLMOpenAI/LLMOpenAIRunnerSetup.swift
new file mode 100644
index 00000000..80040703
--- /dev/null
+++ b/Sources/SpeziLLMOpenAI/LLMOpenAIRunnerSetup.swift
@@ -0,0 +1,119 @@
+//
+// This source file is part of the Stanford Spezi open source project
+//
+// SPDX-FileCopyrightText: 2022 Stanford University and the project authors (see CONTRIBUTORS.md)
+//
+// SPDX-License-Identifier: MIT
+//
+
+import Foundation
+import OpenAI
+import Spezi
+import SpeziLLM
+import SpeziSecureStorage
+
+
+/// The ``LLMOpenAIRunnerSetupTask`` sets up the OpenAI environment in order to execute Spezi `LLM`s.
+/// The task needs to be stated within the `LLMRunner` initializer in the Spezi `Configuration`.
+///
+/// One is able to specify Spezi-wide configurations for the OpenAI interaction, such as the API key or a network timeout duration (however, not a requirement!).
+/// However, these configurations can be overwritten via individual ``LLMOpenAI`` instances.
+///
+/// ### Usage
+///
+/// A minimal example of using the ``LLMOpenAIRunnerSetupTask`` can be found below.
+///
+/// ```swift
+/// class LocalLLMAppDelegate: SpeziAppDelegate {
+///     override var configuration: Configuration {
+///         Configuration {
+///             LLMRunner {
+///                 LLMOpenAIRunnerSetupTask(apiToken: "<token123>")
+///             }
+///         }
+///     }
+/// }
+/// ```
+public class LLMOpenAIRunnerSetupTask: LLMRunnerSetupTask {
+    static var openAIModel: OpenAI {
+        guard let openAIModel = LLMOpenAIRunnerSetupTask.wrappedOpenAIModel else {
+            preconditionFailure("""
+            Illegal Access: Tried to access the wrapped OpenAI model of the `LLMOpenAIRunnerSetupTask` before being initialized.
+            Ensure that the `LLMOpenAIRunnerSetupTask` is passed to the `LLMRunner` within the Spezi `Configuration`.
+            """)
+        }
+        return openAIModel
+    }
+    private static var wrappedOpenAIModel: OpenAI?
+    
+    
+    @Module.Model private var tokenSaver: LLMOpenAITokenSaver
+    @Dependency private var secureStorage: SecureStorage
+    
+    public let type: LLMHostingType = .cloud
+    private let apiToken: String?
+    private let timeout: TimeInterval
+    
+    
+    public init(
+        apiToken: String? = nil,
+        timeout: TimeInterval = 60
+    ) {
+        self.apiToken = apiToken
+        self.timeout = timeout
+    }
+    
+    
+    public func configure() {
+        self.tokenSaver = LLMOpenAITokenSaver(secureStorage: secureStorage)
+    }
+    
+    public func setupRunner(
+        runnerConfig: LLMRunnerConfiguration
+    ) async throws {
+        // If token passed via init
+        if let apiToken {
+            LLMOpenAIRunnerSetupTask.wrappedOpenAIModel = OpenAI(
+                configuration: .init(
+                    token: apiToken,
+                    timeoutInterval: self.timeout
+                )
+            )
+            
+            try secureStorage.store(
+                credentials: Credentials(username: LLMOpenAIConstants.credentialsUsername, password: apiToken),
+                server: LLMOpenAIConstants.credentialsServer
+            )
+        } else {
+            // If token is present within the Spezi `SecureStorage`
+            guard let credentials = try? secureStorage.retrieveCredentials(LLMOpenAIConstants.credentialsUsername) else {
+                preconditionFailure("""
+                SpeziLLM: OpenAI Token wasn't properly set, please ensure that the token is either passed directly via the Spezi `Configuration`
+                or stored within the `SecureStorage` via the `LLMOpenAITokenSaver` before dispatching the first inference.
+                """)
+            }
+            
+            // Initialize the OpenAI model
+            LLMOpenAIRunnerSetupTask.wrappedOpenAIModel = OpenAI(
+                configuration: .init(
+                    token: credentials.password,
+                    timeoutInterval: self.timeout
+                )
+            )
+        }
+        
+        // Check validity of passed token by making a request to list all models
+        do {
+            _ = try await LLMOpenAIRunnerSetupTask.openAIModel.models()
+        } catch let error as URLError {
+            throw LLMOpenAIError.connectivityIssues(error)
+        } catch let error as APIErrorResponse {
+            if error.error.code == LLMOpenAIError.invalidAPIToken.openAIErrorMessage {
+                throw LLMOpenAIError.invalidAPIToken
+            }
+            throw LLMOpenAIError.unknownError(error)
+        } catch {
+            throw LLMOpenAIError.unknownError(error)
+        }
+    }
+}
diff --git a/Sources/SpeziLLMOpenAI/LLMOpenAITokenSaver.swift b/Sources/SpeziLLMOpenAI/LLMOpenAITokenSaver.swift
new file mode 100644
index 00000000..a209d6c5
--- /dev/null
+++ b/Sources/SpeziLLMOpenAI/LLMOpenAITokenSaver.swift
@@ -0,0 +1,106 @@
+//
+// This source file is part of the Stanford Spezi open source project
+//
+// SPDX-FileCopyrightText: 2022 Stanford University and the project authors (see CONTRIBUTORS.md)
+//
+// SPDX-License-Identifier: MIT
+//
+
+import Foundation
+import SpeziSecureStorage
+import SwiftUI
+
+/// Enables to store the OpenAI API key (token) inside the Spezi `SecureStorage` (secure enclave) from an arbitrary `View`.
+/// The ``LLMOpenAITokenSaver`` provides the ``LLMOpenAITokenSaver/token`` property to easily read and write to the `SecureStorage`.
+/// If a SwiftUI `Binding` is required (e.g., for a `TextField`), one can use the ``LLMOpenAITokenSaver/tokenBinding`` property.
+///
+/// One needs to specify the ``LLMOpenAIRunnerSetupTask`` within the Spezi `Configuration` to be able to access the ``LLMOpenAITokenSaver`` from within the SwiftUI `Environment`.
+///
+/// ### Usage
+///
+/// A minimal example using the ``LLMOpenAITokenSaver`` can be seen in the example below. The example includes the Spezi `Configuration` to showcase a complete example.
+///
+/// ```swift
+/// class SpeziConfiguration: SpeziAppDelegate {
+///     override var configuration: Configuration {
+///         Configuration {
+///             LLMRunner {
+///                 LLMOpenAIRunnerSetupTask()
+///             }
+///         }
+///     }
+/// }
+///
+/// struct LLMOpenAIAPITokenOnboardingStep: View {
+///     @Environment(LLMOpenAITokenSaver.self) private var tokenSaver
+///
+///     var body: some View {
+///         VStack {
+///             TextField("OpenAI API Key", text: tokenSaver.tokenBinding)
+///
+///             Button("Next") {
+///                 let openAIToken = tokenSaver.token
+///                 // ...
+///             }
+///                 .disabled(!tokenSaver.tokenPresent)
+///         }
+///     }
+/// }
+/// ```
+@Observable
+public class LLMOpenAITokenSaver {
+    private let secureStorage: SecureStorage
+    
+    
+    /// Indicates if a token is present within the Spezi `SecureStorage`.
+    public var tokenPresent: Bool {
+        self.token == nil ? false : true
+    }
+    
+    /// The API token used to interact with the OpenAI API.
+    /// Every write to this property is automatically persisted in the Spezi `SecureStorage`, reads are also done directly from the Spezi `SecureStorage`.
+    public var token: String? {
+        get {
+            access(keyPath: \.token)
+            return try? secureStorage.retrieveCredentials(
+                LLMOpenAIConstants.credentialsUsername,
+                server: LLMOpenAIConstants.credentialsServer
+            )?.password
+        }
+        set {
+            withMutation(keyPath: \.token) {
+                if let newValue {
+                    try? secureStorage.store(
+                        credentials: Credentials(username: LLMOpenAIConstants.credentialsUsername, password: newValue),
+                        server: LLMOpenAIConstants.credentialsServer
+                    )
+                } else {
+                    try? secureStorage.deleteCredentials(LLMOpenAIConstants.credentialsUsername, server: LLMOpenAIConstants.credentialsServer)
+                }
+            }
+        }
+    }
+    
+    /// Provides SwiftUI `Binding` access to the ``LLMOpenAITokenSaver/token`` property. Useful for, e.g., `TextField`s.
+    /// Similar to ``LLMOpenAITokenSaver/token``, all reads / writes are directly done from / to storage.
+    public var tokenBinding: Binding<String> {
+        Binding(
+            get: {
+                self.token ?? ""
+            },
+            set: { newValue in
+                guard !newValue.isEmpty else {
+                    self.token = nil
+                    return
+                }
+                
+                self.token = newValue
+            }
+        )
+    }
+    
+    
+    init(secureStorage: SecureStorage) {
+        self.secureStorage = secureStorage
+    }
+}
diff --git a/Sources/SpeziLLMOpenAI/OpenAIAPIKeyOnboardingStep.swift b/Sources/SpeziLLMOpenAI/Onboarding/LLMOpenAIAPITokenOnboardingStep.swift
similarity index 82%
rename from Sources/SpeziLLMOpenAI/OpenAIAPIKeyOnboardingStep.swift
rename to Sources/SpeziLLMOpenAI/Onboarding/LLMOpenAIAPITokenOnboardingStep.swift
index 8f7e194b..039e0360 100644
--- a/Sources/SpeziLLMOpenAI/OpenAIAPIKeyOnboardingStep.swift
+++ b/Sources/SpeziLLMOpenAI/Onboarding/LLMOpenAIAPITokenOnboardingStep.swift
@@ -13,28 +13,14 @@ import SwiftUI
 
 
 /// View to display an onboarding step for the user to enter an OpenAI API Key.
-public struct OpenAIAPIKeyOnboardingStep: View {
-    @Environment(OpenAIModel.self) private var openAI
+/// Ensure that the ``LLMOpenAIRunnerSetupTask`` is specified within the Spezi `Configuration` when using this view in the onboarding flow.
+public struct LLMOpenAIAPITokenOnboardingStep: View {
+    @Environment(LLMOpenAITokenSaver.self) private var tokenSaver
+    
     private let actionText: String
     private let action: () -> Void
     
     
-    private var apiToken: Binding<String> {
-        Binding(
-            get: {
-                openAI.apiToken ?? ""
-            },
-            set: { newValue in
-                guard !newValue.isEmpty else {
-                    openAI.apiToken = nil
-                    return
-                }
-                
-                openAI.apiToken = newValue
-            }
-        )
-    }
-    
     public var body: some View {
         OnboardingView(
             titleView: {
@@ -47,7 +33,7 @@ public struct OpenAIAPIKeyOnboardingStep: View {
                     VStack(spacing: 0) {
                         Text(String(localized: "OPENAI_API_KEY_SUBTITLE", bundle: .module))
                             .multilineTextAlignment(.center)
-                        TextField(String(localized: "OPENAI_API_KEY_PROMPT", bundle: .module), text: apiToken)
+                        TextField(String(localized: "OPENAI_API_KEY_PROMPT", bundle: .module), text: tokenSaver.tokenBinding)
                             .frame(height: 50)
                             .textFieldStyle(.roundedBorder)
                             .padding(.vertical, 16)
@@ -69,7 +55,7 @@ public struct OpenAIAPIKeyOnboardingStep: View {
                         action()
                     }
                 )
-                    .disabled(apiToken.wrappedValue.isEmpty)
+                    .disabled(!tokenSaver.tokenPresent)
             }
         )
     }
diff --git a/Sources/SpeziLLMOpenAI/OpenAIModelSelectionOnboardingStep.swift b/Sources/SpeziLLMOpenAI/Onboarding/LLMOpenAIModelOnboardingStep.swift
similarity index 69%
rename from Sources/SpeziLLMOpenAI/OpenAIModelSelectionOnboardingStep.swift
rename to Sources/SpeziLLMOpenAI/Onboarding/LLMOpenAIModelOnboardingStep.swift
index 1437412d..b3149d90 100644
--- a/Sources/SpeziLLMOpenAI/OpenAIModelSelectionOnboardingStep.swift
+++ b/Sources/SpeziLLMOpenAI/Onboarding/LLMOpenAIModelOnboardingStep.swift
@@ -13,25 +13,16 @@ import SwiftUI
 
 
 /// View to display an onboarding step for the user to enter change the OpenAI model.
-public struct OpenAIModelSelectionOnboardingStep: View {
+public struct LLMOpenAIModelOnboardingStep: View {
     public enum Default {
         public static let models = [Model.gpt3_5Turbo, Model.gpt4]
     }
     
-    fileprivate struct ModelSelection: Identifiable {
-        fileprivate let id: String
-        
-        
-        fileprivate var description: String {
-            id.replacing("-", with: " ").capitalized.replacing("Gpt", with: "GPT")
-        }
-    }
-    
     
-    @Environment(OpenAIModel.self) private var openAI
+    @State private var modelSelection: Model
     private let actionText: String
-    private let action: () -> Void
-    private let models: [ModelSelection]
+    private let action: (Model) -> Void
+    private let models: [Model]
     
     
     public var body: some View {
@@ -43,11 +34,10 @@ public struct OpenAIModelSelectionOnboardingStep: View {
                 )
             },
             contentView: {
-                @Bindable var openAI = openAI
-                Picker(String(localized: "OPENAI_MODEL_SELECTION_DESCRIPTION", bundle: .module), selection: $openAI.openAIModel) {
-                    ForEach(models) { model in
-                        Text(model.description)
-                            .tag(model.id)
+                Picker(String(localized: "OPENAI_MODEL_SELECTION_DESCRIPTION", bundle: .module), selection: $modelSelection) {
+                    ForEach(models, id: \.self) { model in
+                        Text(model.formattedModelDescription)
+                            .tag(model.formattedModelDescription)
                     }
                 }
                     .pickerStyle(.wheel)
@@ -57,7 +47,7 @@ public struct OpenAIModelSelectionOnboardingStep: View {
                 OnboardingActionsView(
                     verbatim: actionText,
                     action: {
-                        action()
+                        action(modelSelection)
                     }
                 )
             }
@@ -67,11 +57,11 @@ public struct OpenAIModelSelectionOnboardingStep: View {
     /// - Parameters:
     ///   - actionText: Localized text that should appear on the action button.
     ///   - models: The models that should be displayed in the picker user interface.
-    ///   - action: Action that should be performed after the openAI model selection has been persisted.
+    ///   - action: Action that should be performed after the openAI model selection has been done, selection is passed as closure argument.
     public init(
         actionText: LocalizedStringResource? = nil,
         models: [Model] = Default.models,
-        _ action: @escaping () -> Void
+        _ action: @escaping (Model) -> Void
     ) {
         self.init(
             actionText: actionText?.localizedString() ?? String(localized: "OPENAI_MODEL_SELECTION_SAVE_BUTTON", bundle: .module),
@@ -83,15 +73,23 @@ public struct OpenAIModelSelectionOnboardingStep: View {
     /// - Parameters:
     ///   - actionText: Text that should appear on the action button without localization.
     ///   - models: The models that should be displayed in the picker user interface.
-    ///   - action: Action that should be performed after the openAI model selection has been persisted.
+    ///   - action: Action that should be performed after the OpenAI model selection has been done, selection is passed as closure argument.
     @_disfavoredOverload
     public init<ActionText: StringProtocol>(
         actionText: ActionText,
         models: [Model] = Default.models,
-        _ action: @escaping () -> Void
+        _ action: @escaping (Model) -> Void
     ) {
         self.actionText = String(actionText)
-        self.models = models.map { ModelSelection(id: $0) }
+        self.models = models
         self.action = action
+        self._modelSelection = State(initialValue: models.first ?? .gpt3_5Turbo_1106)
+    }
+}
+
+
+extension Model {
+    fileprivate var formattedModelDescription: String {
+        self.replacing("-", with: " ").capitalized.replacing("Gpt", with: "GPT")
     }
 }
diff --git a/Sources/SpeziLLMOpenAI/OpenAIChatStreamResult+Sendable.swift b/Sources/SpeziLLMOpenAI/OpenAIChatStreamResult+Sendable.swift
deleted file mode 100644
index 1223af89..00000000
--- a/Sources/SpeziLLMOpenAI/OpenAIChatStreamResult+Sendable.swift
+++ /dev/null
@@ -1,9 +0,0 @@
-//
-// This source file is part of the Stanford Spezi open source project
-//
-// SPDX-FileCopyrightText: 2022 Stanford University and the project authors (see CONTRIBUTORS.md)
-//
-// SPDX-License-Identifier: MIT
-//
-
-extension OpenAI.ChatStreamResult: @unchecked Sendable {}
diff --git a/Sources/SpeziLLMOpenAI/OpenAIError.swift b/Sources/SpeziLLMOpenAI/OpenAIError.swift
deleted file mode 100644
index 94592820..00000000
--- a/Sources/SpeziLLMOpenAI/OpenAIError.swift
+++ /dev/null
@@ -1,14 +0,0 @@
-//
-// This source file is part of the Stanford Spezi open source project
-//
-// SPDX-FileCopyrightText: 2022 Stanford University and the project authors (see CONTRIBUTORS.md)
-//
-// SPDX-License-Identifier: MIT
-//
-
-
-/// An error that can appear from an API call to the OpenAI API.
-public enum OpenAIError: Error {
-    /// There was no OpenAI API token provided.
-    case noAPIToken
-}
diff --git a/Sources/SpeziLLMOpenAI/OpenAIModel.swift b/Sources/SpeziLLMOpenAI/OpenAIModel.swift
deleted file mode 100644
index 894fd955..00000000
--- a/Sources/SpeziLLMOpenAI/OpenAIModel.swift
+++ /dev/null
@@ -1,129 +0,0 @@
-//
-// This source file is part of the Stanford Spezi open source project
-//
-// SPDX-FileCopyrightText: 2022 Stanford University and the project authors (see CONTRIBUTORS.md)
-//
-// SPDX-License-Identifier: MIT
-//
-
-import struct OpenAI.Chat
-import struct OpenAI.ChatFunctionDeclaration
-import struct OpenAI.ChatQuery
-import class OpenAI.OpenAI
-@_exported import struct OpenAI.Model
-@_exported import struct OpenAI.ChatStreamResult
-import Foundation
-import Observation
-import SpeziChat
-import SpeziSecureStorage
-
-
-/// View model responsible for to coordinate the interactions with the OpenAI GPT API.
-@Observable
-public class OpenAIModel {
-    private enum Defaults {
-        static let defaultModel: Model = .gpt3_5Turbo
-    }
-    
-    
-    private let secureStorage: SecureStorage
-    
-    
-    /// The OpenAI GPT Model type that is used to interact with the OpenAI API
-    public var openAIModel: String {
-        get {
-            access(keyPath: \.openAIModel)
-            return UserDefaults.standard.value(forKey: OpenAIConstants.modelStorageKey) as? Model ?? Defaults.defaultModel
-        }
-        set {
-            withMutation(keyPath: \.openAIModel) {
-                UserDefaults.standard.set(newValue, forKey: OpenAIConstants.modelStorageKey)
-            }
-        }
-    }
-    
-    /// The API token used to interact with the OpenAI API
-    public var apiToken: String? {
-        get {
-            access(keyPath: \.apiToken)
-            return try? secureStorage.retrieveCredentials(OpenAIConstants.credentialsUsername, server: OpenAIConstants.credentialsServer)?.password
-        }
-        set {
-            withMutation(keyPath: \.apiToken) {
-                if let newValue {
-                    try? secureStorage.store(
-                        credentials: Credentials(username: OpenAIConstants.credentialsUsername, password: newValue),
-                        server: OpenAIConstants.credentialsServer
-                    )
-                } else {
-                    try? secureStorage.deleteCredentials(OpenAIConstants.credentialsUsername, server: OpenAIConstants.credentialsServer)
-                }
-            }
-        }
-    }
-    
-    
-    init(secureStorage: SecureStorage, apiToken defaultToken: String? = nil, openAIModel model: Model? = nil) {
-        self.secureStorage = secureStorage
-        
-        if UserDefaults.standard.object(forKey: OpenAIConstants.modelStorageKey) == nil {
-            self.openAIModel = model ?? Defaults.defaultModel
-        }
-        
-        if let apiTokenFromStorage = try? secureStorage.retrieveCredentials(
-            OpenAIConstants.credentialsUsername,
-            server: OpenAIConstants.credentialsServer
-        )?.password {
-            self.apiToken = apiTokenFromStorage
-        } else {
-            self.apiToken = defaultToken
-        }
-    }
-    
-    /// Queries the OpenAI API using the provided chat messages.
-    /// Builds on top of the [SpeziChat](https://github.com/StanfordSpezi/SpeziChat) module to handle the `SpeziChat.Chat` data structure.
-    ///
-    /// - Parameters:
-    ///   - chat: A collection of chat messages (from the `SpeziChat` dependency) used in the conversation.
-    ///
-    /// - Returns: The content of the response from the API.
-    public func queryAPI(
-        withChat chat: SpeziChat.Chat
-    ) throws -> AsyncThrowingStream<ChatStreamResult, Error> {
-        guard let apiToken, !apiToken.isEmpty else {
-            throw OpenAIError.noAPIToken
-        }
-        
-        let openAIChat: [Chat] = chat.map { speziChat in
-            .init(
-                role: Chat.Role(rawValue: speziChat.role.rawValue) ?? .assistant,
-                content: speziChat.content
-            )
-        }
-        
-        return try self.queryAPI(withOpenAIChat: openAIChat)
-    }
-    
-    /// Queries the OpenAI API using the provided chat messages.
-    /// Supports advanced OpenAI functionality like Function Calling using the native OpenAI `[OpenAI.Chat]` data structure.
-    ///
-    /// - Parameters:
-    ///   - chat: A collection of chat messages (from the `OpenAI` dependency) used in the conversation.
-    ///   - chatFunctionDeclaration: OpenAI functions that should be injected in the OpenAI query.
-    ///
-    /// - Returns: The content of the response from the API.
-    public func queryAPI(
-        withOpenAIChat chat: [Chat],
-        withFunction chatFunctionDeclaration: [ChatFunctionDeclaration] = []
-    ) throws -> AsyncThrowingStream<ChatStreamResult, Error> {
-        guard let apiToken, !apiToken.isEmpty else {
-            throw OpenAIError.noAPIToken
-        }
-        
-        let functions = chatFunctionDeclaration.isEmpty ? nil : chatFunctionDeclaration
-
-        let openAIClient = OpenAI(apiToken: apiToken)
-        let query = ChatQuery(model: openAIModel, messages: chat, functions: functions)
-        return openAIClient.chatsStream(query: query)
-    }
-}
diff --git a/Sources/SpeziLLMOpenAI/OpenAIModule.swift b/Sources/SpeziLLMOpenAI/OpenAIModule.swift
deleted file mode 100644
index 888b7620..00000000
--- a/Sources/SpeziLLMOpenAI/OpenAIModule.swift
+++ /dev/null
@@ -1,44 +0,0 @@
-//
-// This source file is part of the Stanford Spezi open source project
-//
-// SPDX-FileCopyrightText: 2022 Stanford University and the project authors (see CONTRIBUTORS.md)
-//
-// SPDX-License-Identifier: MIT
-//
-
-
-import OpenAI
-import Spezi
-import SpeziSecureStorage
-
-
-/// `OpenAIModule` is a module responsible for to coordinate the interactions with the OpenAI GPT API.
-public class OpenAIModule: Module, DefaultInitializable {
-    /// Model accessible to modules using the ``OpenAIModule`` as a dependency and injected in the SwiftUI environment.
-    @Module.Model public var model: OpenAIModel
-    @Dependency private var secureStorage: SecureStorage
-    
-    
-    private var defaultAPIToken: String?
-    private var defaultOpenAIModel: Model?
-    
-    
-    /// Initializes a new instance of `OpenAIModule` with the specified API token and OpenAI model.
-    ///
-    /// - Parameters:
-    ///   - apiToken: The API token for the OpenAI API.
-    ///   - openAIModel: The OpenAI model to use for querying.
-    public init(apiToken: String? = nil, openAIModel: Model? = nil) {
-        defaultAPIToken = apiToken
-        defaultOpenAIModel = openAIModel
-    }
-    
-    public required convenience init() {
-        self.init(apiToken: nil, openAIModel: nil)
-    }
-    
-    
-    public func configure() {
-        self.model = OpenAIModel(secureStorage: secureStorage, apiToken: defaultAPIToken, openAIModel: defaultOpenAIModel)
-    }
-}
diff --git a/Sources/SpeziLLMOpenAI/Resources/Localizable.xcstrings b/Sources/SpeziLLMOpenAI/Resources/Localizable.xcstrings
index acee49ae..1e2961b6 100644
--- a/Sources/SpeziLLMOpenAI/Resources/Localizable.xcstrings
+++ b/Sources/SpeziLLMOpenAI/Resources/Localizable.xcstrings
@@ -1,6 +1,216 @@
 {
   "sourceLanguage" : "en",
   "strings" : {
+    "LLM_CONNECTIVITY_ERROR_DESCRIPTION" : {
+      "localizations" : {
+        "en" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "Connectivity Error with the OpenAI API."
+          }
+        }
+      }
+    },
+    "LLM_CONNECTIVITY_ERROR_FAILURE_REASON" : {
+      "localizations" : {
+        "en" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "The network connection to the OpenAI servers couldn't be established, most probably the device doesn't have an internet connection."
+          }
+        }
+      }
+    },
+    "LLM_CONNECTIVITY_ERROR_RECOVERY_SUGGESTION" : {
+      "localizations" : {
+        "en" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "Please ensure that the device has a stable internet connection."
+          }
+        }
+      }
+    },
+    "LLM_GENERATION_ERROR_DESCRIPTION" : {
+      "localizations" : {
+        "en" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "Generation Error occurred during OpenAI inference."
+          }
+        }
+      }
+    },
+    "LLM_GENERATION_ERROR_FAILURE_REASON" : {
+      "localizations" : {
+        "en" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "The OpenAI API responded with an error during the output generation."
+          }
+        }
+      }
+    },
+    "LLM_GENERATION_ERROR_RECOVERY_SUGGESTION" : {
+      "localizations" : {
+        "en" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "Please retry the input query."
+          }
+        }
+      }
+    },
+    "LLM_INSUFFICIENT_QUOTA_ERROR_DESCRIPTION" : {
+      "localizations" : {
+        "en" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "OpenAI Quota limit reached."
+          }
+        }
+      }
+    },
+    "LLM_INSUFFICIENT_QUOTA_FAILURE_REASON" : {
+      "localizations" : {
+        "en" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "The specified OpenAI API key has reached the quota limit of the associated OpenAI account."
+          }
+        }
+      }
+    },
+    "LLM_INSUFFICIENT_QUOTA_RECOVERY_SUGGESTION" : {
+      "localizations" : {
+        "en" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "Please increase the OpenAI quota limits or try again later."
+          }
+        }
+      }
+    },
+    "LLM_INVALID_TOKEN_ERROR_DESCRIPTION" : {
+      "localizations" : {
+        "en" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "OpenAI API Key invalid."
+          }
+        }
+      }
+    },
+    "LLM_INVALID_TOKEN_FAILURE_REASON" : {
+      "localizations" : {
+        "en" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "The specified OpenAI API key is not valid."
+          }
+        }
+      }
+    },
+    "LLM_INVALID_TOKEN_RECOVERY_SUGGESTION" : {
+      "localizations" : {
+        "en" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "Please ensure that the specified OpenAI API key is valid."
+          }
+        }
+      }
+    },
+    "LLM_MODEL_ACCESS_ERROR_DESCRIPTION" : {
+      "localizations" : {
+        "en" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "OpenAI GPT Model couldn't be accessed."
+          }
+        }
+      }
+    },
+    "LLM_MODEL_ACCESS_ERROR_FAILURE_REASON" : {
+      "localizations" : {
+        "en" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "The specified GPT model in combination with the API key couldn't be accessed. Ensure that the API key can access the specified model."
+          }
+        }
+      }
+    },
+    "LLM_MODEL_ACCESS_ERROR_RECOVERY_SUGGESTION" : {
+      "localizations" : {
+        "en" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "Please ensure that the specified API key has access to the configured GPT model."
+          }
+        }
+      }
+    },
+    "LLM_STORAGE_ERROR_DESCRIPTION" : {
+      "localizations" : {
+        "en" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "Token could not be stored in a secure storage."
+          }
+        }
+      }
+    },
+    "LLM_STORAGE_ERROR_FAILURE_REASON" : {
+      "localizations" : {
+        "en" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "The configured OpenAI API key could not be stored in the secure enclave."
+          }
+        }
+      }
+    },
+    "LLM_STORAGE_ERROR_RECOVERY_SUGGESTION" : {
+      "localizations" : {
+        "en" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "Please try to restart the application or the phone."
+          }
+        }
+      }
+    },
+    "LLM_UNKNOWN_ERROR_DESCRIPTION" : {
+      "localizations" : {
+        "en" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "An unknown OpenAI error has occured."
+          }
+        }
+      }
+    },
+    "LLM_UNKNOWN_ERROR_FAILURE_REASON" : {
+      "localizations" : {
+        "en" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "The OpenAI API responded with an unknown error."
+          }
+        }
+      }
+    },
+    "LLM_UNKNOWN_ERROR_RECOVERY_SUGGESTION" : {
+      "localizations" : {
+        "en" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "Please retry the query."
+          }
+        }
+      }
+    },
     "OPENAI_API_KEY_PROMPT" : {
       "localizations" : {
         "de" : {
@@ -198,6 +408,16 @@
           }
         }
       }
+    },
+    "SPEZI_LLM_OPENAI_SYSTEM_PROMPT" : {
+      "localizations" : {
+        "en" : {
+          "stringUnit" : {
+            "state" : "translated",
+            "value" : "You are ChatGPT, a large language model trained by OpenAI, based on the GPT architecture."
+          }
+        }
+      }
     }
   },
   "version" : "1.0"
diff --git a/Sources/SpeziLLMOpenAI/SpeziLLMOpenAI.docc/Resources/OpenAIAPIKeyOnboardingStep.png b/Sources/SpeziLLMOpenAI/SpeziLLMOpenAI.docc/Resources/LLMOpenAIAPITokenOnboardingStep.png
similarity index 100%
rename from Sources/SpeziLLMOpenAI/SpeziLLMOpenAI.docc/Resources/OpenAIAPIKeyOnboardingStep.png
rename to Sources/SpeziLLMOpenAI/SpeziLLMOpenAI.docc/Resources/LLMOpenAIAPITokenOnboardingStep.png
diff --git a/Sources/SpeziLLMOpenAI/SpeziLLMOpenAI.docc/Resources/OpenAIAPIKeyOnboardingStep.png.license b/Sources/SpeziLLMOpenAI/SpeziLLMOpenAI.docc/Resources/LLMOpenAIAPITokenOnboardingStep.png.license
similarity index 100%
rename from Sources/SpeziLLMOpenAI/SpeziLLMOpenAI.docc/Resources/OpenAIAPIKeyOnboardingStep.png.license
rename to Sources/SpeziLLMOpenAI/SpeziLLMOpenAI.docc/Resources/LLMOpenAIAPITokenOnboardingStep.png.license
diff --git a/Sources/SpeziLLMOpenAI/SpeziLLMOpenAI.docc/Resources/OpenAIAPIKeyOnboardingStep~dark.png b/Sources/SpeziLLMOpenAI/SpeziLLMOpenAI.docc/Resources/LLMOpenAIAPITokenOnboardingStep~dark.png
similarity index 100%
rename from Sources/SpeziLLMOpenAI/SpeziLLMOpenAI.docc/Resources/OpenAIAPIKeyOnboardingStep~dark.png
rename to Sources/SpeziLLMOpenAI/SpeziLLMOpenAI.docc/Resources/LLMOpenAIAPITokenOnboardingStep~dark.png
diff --git a/Sources/SpeziLLMOpenAI/SpeziLLMOpenAI.docc/Resources/OpenAIAPIKeyOnboardingStep~dark.png.license b/Sources/SpeziLLMOpenAI/SpeziLLMOpenAI.docc/Resources/LLMOpenAIAPITokenOnboardingStep~dark.png.license
similarity index 100%
rename from Sources/SpeziLLMOpenAI/SpeziLLMOpenAI.docc/Resources/OpenAIAPIKeyOnboardingStep~dark.png.license
rename to Sources/SpeziLLMOpenAI/SpeziLLMOpenAI.docc/Resources/LLMOpenAIAPITokenOnboardingStep~dark.png.license
diff --git a/Sources/SpeziLLMOpenAI/SpeziLLMOpenAI.docc/Resources/OpenAIModelSelectionOnboardingStep.png b/Sources/SpeziLLMOpenAI/SpeziLLMOpenAI.docc/Resources/LLMOpenAIModelOnboardingStep.png
similarity index 100%
rename from Sources/SpeziLLMOpenAI/SpeziLLMOpenAI.docc/Resources/OpenAIModelSelectionOnboardingStep.png
rename to Sources/SpeziLLMOpenAI/SpeziLLMOpenAI.docc/Resources/LLMOpenAIModelOnboardingStep.png
diff --git a/Sources/SpeziLLMOpenAI/SpeziLLMOpenAI.docc/Resources/OpenAIModelSelectionOnboardingStep.png.license b/Sources/SpeziLLMOpenAI/SpeziLLMOpenAI.docc/Resources/LLMOpenAIModelOnboardingStep.png.license
similarity index 100%
rename from Sources/SpeziLLMOpenAI/SpeziLLMOpenAI.docc/Resources/OpenAIModelSelectionOnboardingStep.png.license
rename to Sources/SpeziLLMOpenAI/SpeziLLMOpenAI.docc/Resources/LLMOpenAIModelOnboardingStep.png.license
diff --git a/Sources/SpeziLLMOpenAI/SpeziLLMOpenAI.docc/Resources/OpenAIModelSelectionOnboardingStep~dark.png b/Sources/SpeziLLMOpenAI/SpeziLLMOpenAI.docc/Resources/LLMOpenAIModelOnboardingStep~dark.png
similarity index 100%
rename from Sources/SpeziLLMOpenAI/SpeziLLMOpenAI.docc/Resources/OpenAIModelSelectionOnboardingStep~dark.png
rename to Sources/SpeziLLMOpenAI/SpeziLLMOpenAI.docc/Resources/LLMOpenAIModelOnboardingStep~dark.png
diff --git a/Sources/SpeziLLMOpenAI/SpeziLLMOpenAI.docc/Resources/OpenAIModelSelectionOnboardingStep~dark.png.license b/Sources/SpeziLLMOpenAI/SpeziLLMOpenAI.docc/Resources/LLMOpenAIModelOnboardingStep~dark.png.license
similarity index 100%
rename from Sources/SpeziLLMOpenAI/SpeziLLMOpenAI.docc/Resources/OpenAIModelSelectionOnboardingStep~dark.png.license
rename to Sources/SpeziLLMOpenAI/SpeziLLMOpenAI.docc/Resources/LLMOpenAIModelOnboardingStep~dark.png.license
diff --git a/Sources/SpeziLLMOpenAI/SpeziLLMOpenAI.docc/SpeziLLMOpenAI.md b/Sources/SpeziLLMOpenAI/SpeziLLMOpenAI.docc/SpeziLLMOpenAI.md
index 90d8c18d..87365dcf 100644
--- a/Sources/SpeziLLMOpenAI/SpeziLLMOpenAI.docc/SpeziLLMOpenAI.md
+++ b/Sources/SpeziLLMOpenAI/SpeziLLMOpenAI.docc/SpeziLLMOpenAI.md
@@ -14,128 +14,112 @@ Interact with Large Language Models (LLMs) from OpenAI.
 
 ## Overview
 
-A module that allows you to interact with GPT-based large language models (LLMs) from OpenAI within your Spezi application.
+A module that allows you to interact with GPT-based Large Language Models (LLMs) from OpenAI within your Spezi application.
+``SpeziLLMOpenAI`` provides a pure Swift-based API for interacting with the OpenAI GPT API, building on top of the infrastructure of the [SpeziLLM target](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillm).
 
 @Row {
     @Column {
-        @Image(source: "OpenAIAPIKeyOnboardingStep", alt: "Screenshot displaying the OpenAI API Key Onboarding view from Spezi OpenAI") {
-            ``OpenAIAPIKeyOnboardingStep``
+        @Image(source: "LLMOpenAIAPITokenOnboardingStep", alt: "Screenshot displaying the OpenAI API Token Onboarding view from Spezi OpenAI") {
+            ``LLMOpenAIAPITokenOnboardingStep``
         }
     }
     @Column {
-        @Image(source: "OpenAIModelSelectionOnboardingStep", alt: "Screenshot displaying the Open AI Model Selection Onboarding Step"){
-            ``OpenAIModelSelectionOnboardingStep``
+        @Image(source: "LLMOpenAIModelOnboardingStep", alt: "Screenshot displaying the Open AI Model Selection Onboarding Step"){
+            ``LLMOpenAIModelOnboardingStep``
         }
     }
     @Column {
-        @Image(source: "ChatView", alt: "Screenshot displaying the usage of the OpenAIModule with the SpeziChat Chat View."){
-            ``OpenAIModule``
+        @Image(source: "ChatView", alt: "Screenshot displaying the usage of the LLMOpenAI with the SpeziChat Chat View."){
+            ``LLMOpenAI``
         }
     }
 }
 
 ## Setup
 
-### 1. Add Spezi LLM as a Dependency
+### Add Spezi LLM as a Dependency
 
-First, you will need to add the SpeziLLM Swift package to
+You need to add the SpeziLLM Swift package to
 [your app in Xcode](https://developer.apple.com/documentation/xcode/adding-package-dependencies-to-your-app#) or
-[Swift package](https://developer.apple.com/documentation/xcode/creating-a-standalone-swift-package-with-xcode#Add-a-dependency-on-another-Swift-package). When adding the package, select the `SpeziLLMOpenAI` target to add.
+[Swift package](https://developer.apple.com/documentation/xcode/creating-a-standalone-swift-package-with-xcode#Add-a-dependency-on-another-Swift-package).
 
-### 2. Register the Open AI Module
+> Important: If your application is not yet configured to use Spezi, follow the [Spezi setup article](https://swiftpackageindex.com/stanfordspezi/spezi/documentation/spezi/initial-setup) to set up the core Spezi infrastructure.
 
-> Note: If your application is not yet configured to use Spezi, follow the [Spezi setup article](https://swiftpackageindex.com/stanfordspezi/spezi/documentation/spezi/initial-setup) to set up the core Spezi infrastructure.
+## Spezi LLM OpenAI Components
 
-You can configure the ``OpenAIModule`` in the `SpeziAppDelegate` as follows.
-In the example, we configure the `OpenAIModule` to use the GPT-4 model with a default API key.
+The core component of the ``SpeziLLMOpenAI`` target is the ``LLMOpenAI`` class which conforms to the [`LLM` protocol of SpeziLLM](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillm/llm). ``LLMOpenAI`` uses the OpenAI API to perform textual inference on the GPT-3.5 or GPT-4 models from OpenAI.
 
-```swift
-import Spezi
-import SpeziLLMOpenAI
+> Important: To utilize an LLM from OpenAI, an OpenAI API Key is required. Ensure that the OpenAI account associated with the key has enough resources to access the specified model as well as enough credits to perform the actual inference.
 
+> Tip: In order to collect the OpenAI API Key or model type from the user, ``SpeziLLMOpenAI`` provides the ``LLMOpenAIAPITokenOnboardingStep`` and ``LLMOpenAIModelOnboardingStep`` views which can be used in the onboarding flow of the application.
 
-class ExampleDelegate: SpeziAppDelegate {
-    override var configuration: Configuration {
-        Configuration {
-            OpenAIModule(apiToken: "API_KEY", openAIModel: .gpt4)
-        }
-    }
-}
-```
+### LLM OpenAI
 
-The OpenAIModule injects an ``OpenAIModel`` in the SwiftUI environment to make it accessible throughout your application.
+``LLMOpenAI`` offers a variety of configuration possibilities that are supported by the OpenAI API, such as the model type, the system prompt, the temperature of the model, and many more. These options can be set via the ``LLMOpenAI/init(parameters:modelParameters:)`` initializer and the ``LLMOpenAIParameters`` and ``LLMOpenAIModelParameters``.
 
-```swift
-class ExampleView: View {
-    @Environment(OpenAIModel.self) var model
+- Important: ``LLMOpenAI`` shouldn't be used on it's own but always wrapped by the Spezi `LLMRunner` as the runner handles all management overhead tasks.
 
+#### Setup
 
-    var body: some View {
-        // ...
+In order to use ``LLMOpenAI``, the [SpeziLLM](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillm) [`LLMRunner`](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillm/llmrunner) needs to be initialized in the Spezi `Configuration`. Only after, the `LLMRunner` can be used to execute the ``LLMOpenAI``.
+See the [SpeziLLM documentation](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillm) for more details.
+
+```swift
+class LLMOpenAIAppDelegate: SpeziAppDelegate {
+    override var configuration: Configuration {
+         Configuration {
+             LLMRunner {
+                LLMOpenAIRunnerSetupTask()
+            }
+        }
     }
 }
 ```
 
-> Tip: The choice of model and API key are persisted across application launches. The `apiToken` and `openAIModel` can also be accessed and changed at runtime. 
+#### Usage
 
-The `SpeziLLMOpenAI` package also provides an `OpenAIAPIKeyOnboardingStep` that can be used to allow the user to provide their API key during the onboarding process instead (see `Examples` below). If using the `OpenAIAPIKeyOnboardingStep`, the `apiToken` property can be omitted here.
+The code example below showcases the interaction with the ``LLMOpenAI`` through the the [SpeziLLM](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillm) [`LLMRunner`](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillm/llmrunner), which is injected into the SwiftUI `Environment` via the `Configuration` shown above.
+Based on a `String` prompt, the `LLMGenerationTask/generate(prompt:)` method returns an `AsyncThrowingStream` which yields the inferred characters until the generation has completed.
 
-> Tip: You can learn more about a [`Module` in the Spezi documentation](https://swiftpackageindex.com/stanfordspezi/spezi/documentation/spezi/module).
+The ``LLMOpenAI`` contains the ``LLMOpenAI/context`` property which holds the entire history of the model interactions.
+This includes the system prompt, user input, but also assistant responses.
+Ensure the property always contains all necessary information, as the ``LLMOpenAI/generate(continuation:)`` function executes the inference based on the ``LLMOpenAI/context``
 
-## Examples
+> Tip: The model can be queried via the `LLMGenerationTask/generate()` and `LLMGenerationTask/generate(prompt:)` calls (returned from wrapping the ``LLMOpenAI`` in the `LLMRunner` from the [SpeziLLM](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillm) target).
+    The first method takes no input prompt at all but uses the current context of the model (so `LLM/context`) to query the model.
+    The second takes a `String`-based input from the user and appends it to the  context of the model (so `LLM/context`) before querying the model.
 
-### Creating a Chat Interface
-
-In this example, we will create a chat interface that allows the user to converse with the model. Responses from the model will be streamed.
-To properly visualize the chat interface with the LLM, the example utilizes the [SpeziChat](https://github.com/StanfordSpezi/SpeziChat) module of the Spezi ecosystem, providing developers with easy to use chat interfaces like the `ChatView`.
+> Important: The ``LLMOpenAI`` should only be used together with the [SpeziLLM](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillm) [`LLMRunner`](https://swiftpackageindex.com/stanfordspezi/spezillm/documentation/spezillm/llmrunner)!
 
 ```swift
-import SpeziChat
-import SpeziLLMOpenAI
-import SwiftUI
-
-
-struct OpenAIChatView: View {
-    @Environment(OpenAIModel.self) var model
-    @State private var chat: Chat = [
-        .init(role: .assistant, content: "Assistant Message!")
-    ]
-    
-    var body: some View {
-        ChatView($chat)
-            .onChange(of: chat) { _, _ in
-                Task {
-                    let chatStreamResults = try model.queryAPI(withChat: chat)
-                    
-                    for try await chatStreamResult in chatStreamResults {
-                        for choice in chatStreamResult.choices {
-                            guard let newContent = choice.delta.content else {
-                                continue
-                            }
-                            
-                            if chat.last?.role == .assistant, let previousContent = chat.last?.content {
-                                await MainActor.run {
-                                    chat[chat.count - 1] = ChatEntity(
-                                        role: .assistant,
-                                        content: previousContent + newContent
-                                    )
-                                }
-                            } else {
-                                await MainActor.run {
-                                    chat.append(ChatEntity(role: .assistant, content: newContent))
-                                }
-                            }
-                        }
-                    }
-                }
-            }
+struct LLMOpenAIChatView: View {
+    // The runner responsible for executing the OpenAI LLM.
+    @Environment(LLMRunner.self) var runner: LLMRunner
+
+    // The OpenAI LLM
+    @State var model: LLMOpenAI = .init(
+        parameters: .init(
+            modelType: .gpt3_5Turbo,
+            systemPrompt: "You're a helpful assistant that answers questions from users.",
+            overwritingToken: "abc123"
+        )
+    )
+    @State var responseText: String
+
+    func executePrompt(prompt: String) {
+        // Execute the query on the runner, returning a stream of outputs
+        let stream = try await runner(with: model).generate(prompt: "Hello LLM!")
+
+        for try await token in stream {
+            responseText.append(token)
+        }
     }
 }
 ```
 
-### Setting the API Key During Onboarding
+### Onboarding Flow
 
-The `OpenAIAPIKeyOnboardingStep` provides a view that can be used for the user to enter an OpenAI API key during onboarding in your Spezi application. We will show an example of how you can add an OpenAI onboarding step within an application created from the Spezi Template Application below.
+The ``LLMOpenAIAPITokenOnboardingStep`` provides a view that can be used for the user to enter an OpenAI API key during onboarding in your Spezi application. The example below showcases of how to can add an OpenAI onboarding step within an application created from the Spezi Template Application below.
 
 First, create a new view to show the onboarding step:
 
@@ -149,7 +133,7 @@ struct OpenAIAPIKey: View {
     @EnvironmentObject private var onboardingNavigationPath: OnboardingNavigationPath
     
     var body: some View {
-        OpenAIAPIKeyOnboardingStep {
+        LLMOpenAIAPITokenOnboardingStep {
             onboardingNavigationPath.nextStep()
         }
     }
@@ -179,3 +163,23 @@ struct OnboardingFlow: View {
 ```
 
 Now the OpenAI API Key entry view will appear within your application's onboarding process. The API Key entered will be persisted across application launches.
+
+## Topics
+
+### Model
+
+- ``LLMOpenAI``
+
+### Configuration
+
+- ``LLMOpenAIParameters``
+- ``LLMOpenAIModelParameters``
+
+### Setup
+
+- ``LLMOpenAIRunnerSetupTask``
+
+### Onboarding
+
+- ``LLMOpenAIAPITokenOnboardingStep``
+- ``LLMOpenAIModelOnboardingStep``
diff --git a/Tests/UITests/TestApp/FeatureFlags.swift b/Tests/UITests/TestApp/FeatureFlags.swift
index dba1217a..6965a45b 100644
--- a/Tests/UITests/TestApp/FeatureFlags.swift
+++ b/Tests/UITests/TestApp/FeatureFlags.swift
@@ -10,6 +10,6 @@ import Foundation
 
 
 enum FeatureFlags {
-    /// Configures the local LLM to mock all generated responses in order to simplify development and write UI Tests.
-    static let mockLocalLLM = ProcessInfo.processInfo.arguments.contains("--mockLocalLLM")
+    /// Configures the LLMs to mock all generated responses in order to simplify development and write UI Tests.
+    static let mockMode = ProcessInfo.processInfo.arguments.contains("--mockMode")
 }
diff --git a/Tests/UITests/TestApp/LLMLocal/LLMLocalChatTestView.swift b/Tests/UITests/TestApp/LLMLocal/LLMLocalChatTestView.swift
index 37c757e1..2832e68a 100644
--- a/Tests/UITests/TestApp/LLMLocal/LLMLocalChatTestView.swift
+++ b/Tests/UITests/TestApp/LLMLocal/LLMLocalChatTestView.swift
@@ -14,8 +14,8 @@ import SwiftUI
 /// Presents a chat view that enables user's to interact with the local LLM.
 struct LLMLocalChatTestView: View {
     /// The Spezi `LLM` that is configured and executed on the `LLMRunner`
-    @State private var model: LLM = {
-        if FeatureFlags.mockLocalLLM {
+    private var model: LLM = {
+        if FeatureFlags.mockMode {
             LLMMock()
         } else {
             LLMLlama(
@@ -29,13 +29,7 @@ struct LLMLocalChatTestView: View {
     
     var body: some View {
         LLMChatView(
-            model: model,
-            initialAssistantPrompt: [
-                .init(
-                    role: .assistant,
-                    content: "Hello! I'm a locally executed Llama 2 7B model, enabled by the Spezi ecosystem!"
-                )
-            ]
+            model: model
         )
             .navigationTitle("LLM_LOCAL_CHAT_VIEW_TITLE")
     }
diff --git a/Tests/UITests/TestApp/LLMLocal/Onboarding/LLMLocalOnboardingFlow.swift b/Tests/UITests/TestApp/LLMLocal/Onboarding/LLMLocalOnboardingFlow.swift
index 3d083bfc..8b3df80c 100644
--- a/Tests/UITests/TestApp/LLMLocal/Onboarding/LLMLocalOnboardingFlow.swift
+++ b/Tests/UITests/TestApp/LLMLocal/Onboarding/LLMLocalOnboardingFlow.swift
@@ -19,7 +19,7 @@ struct LLMLocalOnboardingFlow: View {
         OnboardingStack(onboardingFlowComplete: $completedOnboardingFlow) {
             LLMLocalOnboardingWelcomeView()
             
-            if !FeatureFlags.mockLocalLLM {
+            if !FeatureFlags.mockMode {
                 LLMLocalOnboardingDownloadView()
             }
         }
diff --git a/Tests/UITests/TestApp/LLMOpenAI/LLMOpenAIChatTestView.swift b/Tests/UITests/TestApp/LLMOpenAI/LLMOpenAIChatTestView.swift
index 4d62db04..337f0561 100644
--- a/Tests/UITests/TestApp/LLMOpenAI/LLMOpenAIChatTestView.swift
+++ b/Tests/UITests/TestApp/LLMOpenAI/LLMOpenAIChatTestView.swift
@@ -7,22 +7,33 @@
 //
 
 import SpeziChat
+import SpeziLLM
+import SpeziLLMOpenAI
 import SwiftUI
 
 
 struct LLMOpenAIChatTestView: View {
-    @State var chat: Chat = [
-        .init(role: .system, content: "System Message!"),
-        .init(role: .system, content: "System Message (hidden)!"),
-        .init(role: .function, content: "Function Message!"),
-        .init(role: .user, content: "User Message!"),
-        .init(role: .assistant, content: "Assistant Message!")
-    ]
     @State var showOnboarding = false
+
+    /// The Spezi `LLM` that is configured and executed on the `LLMRunner`
+    private var model: LLM = {
+        if FeatureFlags.mockMode {
+            LLMMock()
+        } else {
+            LLMOpenAI(
+                parameters: .init(
+                    modelType: .gpt3_5Turbo,
+                    systemPrompt: "You're a helpful assistant that answers questions from users."
+                )
+            )
+        }
+    }()
     
     
     var body: some View {
-        ChatView($chat)
+        LLMChatView(
+            model: model
+        )
             .navigationTitle("LLM_OPENAI_CHAT_VIEW_TITLE")
             .toolbar {
                 ToolbarItem {
@@ -34,6 +45,6 @@ struct LLMOpenAIChatTestView: View {
             .sheet(isPresented: $showOnboarding) {
                 LLMOpenAIOnboardingView()
             }
-            .accentColor(Color(red: 0, green: 166 / 255, blue: 126 / 255))  // OpenAI Green Color
+            .accentColor(Color(red: 0, green: 166 / 255, blue: 126 / 255))  // OpenAI Green
     }
 }
diff --git a/Tests/UITests/TestApp/LLMOpenAI/LLMOpenAIOnboardingView.swift b/Tests/UITests/TestApp/LLMOpenAI/LLMOpenAIOnboardingView.swift
index 018d6fe2..80fc4565 100644
--- a/Tests/UITests/TestApp/LLMOpenAI/LLMOpenAIOnboardingView.swift
+++ b/Tests/UITests/TestApp/LLMOpenAI/LLMOpenAIOnboardingView.swift
@@ -6,33 +6,16 @@
 // SPDX-License-Identifier: MIT
 //
 
-import SpeziLLMOpenAI
+import SpeziOnboarding
 import SwiftUI
 import XCTSpezi
 
 
 struct LLMOpenAIOnboardingView: View {
-    enum Step: String, Codable {
-        case modelSelection
-    }
-
-    
-    @State private var steps: [Step] = []
-
-    
     var body: some View {
-        NavigationStack(path: $steps) {
-            OpenAIAPIKeyOnboardingStep {
-                steps.append(.modelSelection)
-            }
-                .navigationDestination(for: Step.self) { step in
-                    switch step {
-                    case .modelSelection:
-                        OpenAIModelSelectionOnboardingStep {
-                            steps.removeLast()
-                        }
-                    }
-                }
+        OnboardingStack {
+            LLMOpenAITokenOnboarding()
+            LLMOpenAIModelOnboarding()
         }
     }
 }
diff --git a/Tests/UITests/TestApp/LLMOpenAI/Onboarding/LLMOpenAIModelOnboarding.swift b/Tests/UITests/TestApp/LLMOpenAI/Onboarding/LLMOpenAIModelOnboarding.swift
new file mode 100644
index 00000000..6082e9cb
--- /dev/null
+++ b/Tests/UITests/TestApp/LLMOpenAI/Onboarding/LLMOpenAIModelOnboarding.swift
@@ -0,0 +1,23 @@
+//
+// This source file is part of the Stanford Spezi open source project
+//
+// SPDX-FileCopyrightText: 2022 Stanford University and the project authors (see CONTRIBUTORS.md)
+//
+// SPDX-License-Identifier: MIT
+//
+
+import SpeziLLMOpenAI
+import SpeziOnboarding
+import SwiftUI
+
+
+struct LLMOpenAIModelOnboarding: View {
+    @Environment(OnboardingNavigationPath.self) private var path
+
+    
+    var body: some View {
+        LLMOpenAIModelOnboardingStep { _ in
+            path.removeLast()
+        }
+    }
+}
diff --git a/Tests/UITests/TestApp/LLMOpenAI/Onboarding/LLMOpenAITokenOnboarding.swift b/Tests/UITests/TestApp/LLMOpenAI/Onboarding/LLMOpenAITokenOnboarding.swift
new file mode 100644
index 00000000..9e5bf87f
--- /dev/null
+++ b/Tests/UITests/TestApp/LLMOpenAI/Onboarding/LLMOpenAITokenOnboarding.swift
@@ -0,0 +1,23 @@
+//
+// This source file is part of the Stanford Spezi open source project
+//
+// SPDX-FileCopyrightText: 2022 Stanford University and the project authors (see CONTRIBUTORS.md)
+//
+// SPDX-License-Identifier: MIT
+//
+
+import SpeziLLMOpenAI
+import SpeziOnboarding
+import SwiftUI
+
+
+struct LLMOpenAITokenOnboarding: View {
+    @Environment(OnboardingNavigationPath.self) private var path
+
+    
+    var body: some View {
+        LLMOpenAIAPITokenOnboardingStep {
+            path.nextStep()
+        }
+    }
+}
diff --git a/Tests/UITests/TestApp/TestAppDelegate.swift b/Tests/UITests/TestApp/TestAppDelegate.swift
index 311a3b86..b40b2764 100644
--- a/Tests/UITests/TestApp/TestAppDelegate.swift
+++ b/Tests/UITests/TestApp/TestAppDelegate.swift
@@ -17,8 +17,8 @@ class TestAppDelegate: SpeziAppDelegate {
         Configuration {
             LLMRunner {
                 LLMLocalRunnerSetupTask()
+                LLMOpenAIRunnerSetupTask()
             }
-            OpenAIModule()
         }
     }
 }
diff --git a/Tests/UITests/TestAppUITests/TestAppLLMLocalUITests.swift b/Tests/UITests/TestAppUITests/TestAppLLMLocalUITests.swift
index 48618ada..7ca4593c 100644
--- a/Tests/UITests/TestAppUITests/TestAppLLMLocalUITests.swift
+++ b/Tests/UITests/TestAppUITests/TestAppLLMLocalUITests.swift
@@ -17,7 +17,7 @@ class TestAppLLMLocalUITests: XCTestCase {
         continueAfterFailure = false
         
         let app = XCUIApplication()
-        app.launchArguments = ["--mockLocalLLM"]
+        app.launchArguments = ["--mockMode"]
         app.deleteAndLaunch(withSpringboardAppName: "TestApp")
     }
     
@@ -36,9 +36,9 @@ class TestAppLLMLocalUITests: XCTestCase {
         XCTAssert(app.buttons["Next"].waitForExistence(timeout: 2))
         app.buttons["Next"].tap()
         
-        // Chat
-        XCTAssert(app.staticTexts["Hello! I'm a locally executed Llama 2 7B model, enabled by the Spezi ecosystem!"].waitForExistence(timeout: 2))
+        sleep(1)
         
+        // Chat
         try app.textViews["Message Input Textfield"].enter(value: "New Message!", dismissKeyboard: false)
         
         XCTAssert(app.buttons["Send Message"].waitForExistence(timeout: 2))
diff --git a/Tests/UITests/TestAppUITests/TestAppLLMOpenAIUITests.swift b/Tests/UITests/TestAppUITests/TestAppLLMOpenAIUITests.swift
index c2c33c90..a73673de 100644
--- a/Tests/UITests/TestAppUITests/TestAppLLMOpenAIUITests.swift
+++ b/Tests/UITests/TestAppUITests/TestAppLLMOpenAIUITests.swift
@@ -17,6 +17,7 @@ class TestAppLLMOpenAIUITests: XCTestCase {
         continueAfterFailure = false
         
         let app = XCUIApplication()
+        app.launchArguments = ["--mockMode"]
         app.deleteAndLaunch(withSpringboardAppName: "TestApp")
     }
     
@@ -27,9 +28,6 @@ class TestAppLLMOpenAIUITests: XCTestCase {
         XCTAssert(app.buttons["LLMOpenAI"].waitForExistence(timeout: 2))
         app.buttons["LLMOpenAI"].tap()
         
-        app.staticTexts["User Message!"].tap()
-        app.staticTexts["Assistant Message!"].tap()
-        
         app.buttons["Onboarding"].tap()
         
         try app.textFields["OpenAI API Key"].enter(value: "New Token")
@@ -58,9 +56,8 @@ class TestAppLLMOpenAIUITests: XCTestCase {
         XCTAssert(app.textFields["New Token"].waitForExistence(timeout: 2))
         sleep(1)
         
-        XCTAssert(app.buttons["Next"].waitForExistence(timeout: 2))
         app.buttons["Next"].tap()
-        XCTAssert(app.pickerWheels["GPT 4"].waitForExistence(timeout: 2))
+        XCTAssert(app.pickerWheels["GPT 3.5 Turbo"].waitForExistence(timeout: 2))
         
         app.deleteAndLaunch(withSpringboardAppName: "TestApp")
         
@@ -83,20 +80,19 @@ class TestAppLLMOpenAIUITests: XCTestCase {
         XCTAssert(app.buttons["LLMOpenAI"].waitForExistence(timeout: 2))
         app.buttons["LLMOpenAI"].tap()
         
-        XCTAssert(app.staticTexts["User Message!"].waitForExistence(timeout: 2))
-        XCTAssert(app.staticTexts["Assistant Message!"].waitForExistence(timeout: 2))
         XCTAssert(app.buttons["Record Message"].waitForExistence(timeout: 2))
         
-        XCTAssertFalse(app.staticTexts["System Message!"].waitForExistence(timeout: 2))
-        XCTAssertFalse(app.staticTexts["Function Message!"].waitForExistence(timeout: 2))
+        XCTAssertFalse(app.staticTexts["You're a helpful assistant that answers questions from users."].waitForExistence(timeout: 2))
         
         XCTAssert(app.buttons["Record Message"].isEnabled)
+        
         try app.textViews["Message Input Textfield"].enter(value: "New Message!", dismissKeyboard: false)
-        XCTAssert(app.buttons["Send Message"].isEnabled)
         
-        sleep(1)
+        XCTAssert(app.buttons["Send Message"].waitForExistence(timeout: 2))
         app.buttons["Send Message"].tap()
-                
-        XCTAssert(app.staticTexts["New Message!"].waitForExistence(timeout: 2))
+        
+        sleep(3)
+        
+        XCTAssert(app.staticTexts["Mock Message from SpeziLLM!"].waitForExistence(timeout: 5))
     }
 }
diff --git a/Tests/UITests/UITests.xcodeproj/project.pbxproj b/Tests/UITests/UITests.xcodeproj/project.pbxproj
index 89725908..3e24a382 100644
--- a/Tests/UITests/UITests.xcodeproj/project.pbxproj
+++ b/Tests/UITests/UITests.xcodeproj/project.pbxproj
@@ -29,6 +29,8 @@
 		9772D6802B03381400E62B9D /* LLMOpenAIOnboardingView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 9772D67F2B03381400E62B9D /* LLMOpenAIOnboardingView.swift */; };
 		9772D6822B033D5500E62B9D /* LLMOpenAIChatTestView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 9772D6812B033D5500E62B9D /* LLMOpenAIChatTestView.swift */; };
 		977E49A02B035563001485D4 /* LLMLocalTestView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 977E499F2B035563001485D4 /* LLMLocalTestView.swift */; };
+		97A25C942B28DDAB0073B990 /* LLMOpenAIModelOnboarding.swift in Sources */ = {isa = PBXBuildFile; fileRef = 97A25C922B28DDAB0073B990 /* LLMOpenAIModelOnboarding.swift */; };
+		97A25C952B28DDAB0073B990 /* LLMOpenAITokenOnboarding.swift in Sources */ = {isa = PBXBuildFile; fileRef = 97A25C932B28DDAB0073B990 /* LLMOpenAITokenOnboarding.swift */; };
 		97DD56BB2B02F7E400389331 /* SpeziChat in Frameworks */ = {isa = PBXBuildFile; productRef = 97DD56BA2B02F7E400389331 /* SpeziChat */; };
 		97DD56BD2B02F80100389331 /* SpeziLLMOpenAI in Frameworks */ = {isa = PBXBuildFile; productRef = 97DD56BC2B02F80100389331 /* SpeziLLMOpenAI */; };
 /* End PBXBuildFile section */
@@ -66,6 +68,8 @@
 		9772D6812B033D5500E62B9D /* LLMOpenAIChatTestView.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = LLMOpenAIChatTestView.swift; sourceTree = "<group>"; };
 		977438092B05709700EC6527 /* libc++.tbd */ = {isa = PBXFileReference; lastKnownFileType = "sourcecode.text-based-dylib-definition"; name = "libc++.tbd"; path = "usr/lib/libc++.tbd"; sourceTree = SDKROOT; };
 		977E499F2B035563001485D4 /* LLMLocalTestView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LLMLocalTestView.swift; sourceTree = "<group>"; };
+		97A25C922B28DDAB0073B990 /* LLMOpenAIModelOnboarding.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = LLMOpenAIModelOnboarding.swift; sourceTree = "<group>"; };
+		97A25C932B28DDAB0073B990 /* LLMOpenAITokenOnboarding.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = LLMOpenAITokenOnboarding.swift; sourceTree = "<group>"; };
 /* End PBXFileReference section */
 
 /* Begin PBXFrameworksBuildPhase section */
@@ -173,6 +177,15 @@
 			path = Resources;
 			sourceTree = "<group>";
 		};
+		97A25C912B28DDAB0073B990 /* Onboarding */ = {
+			isa = PBXGroup;
+			children = (
+				97A25C922B28DDAB0073B990 /* LLMOpenAIModelOnboarding.swift */,
+				97A25C932B28DDAB0073B990 /* LLMOpenAITokenOnboarding.swift */,
+			);
+			path = Onboarding;
+			sourceTree = "<group>";
+		};
 		97DD56B32B02F72D00389331 /* LLMLocal */ = {
 			isa = PBXGroup;
 			children = (
@@ -187,6 +200,7 @@
 		97DD56B42B02F72D00389331 /* LLMOpenAI */ = {
 			isa = PBXGroup;
 			children = (
+				97A25C912B28DDAB0073B990 /* Onboarding */,
 				9772D6812B033D5500E62B9D /* LLMOpenAIChatTestView.swift */,
 				9772D67F2B03381400E62B9D /* LLMOpenAIOnboardingView.swift */,
 			);
@@ -336,12 +350,14 @@
 				9756D25E2B0316A30006B6BD /* LLMLocalChatTestView.swift in Sources */,
 				9756D2532B0316240006B6BD /* LLMLocalOnboardingDownloadView.swift in Sources */,
 				9756D2542B0316240006B6BD /* LLMLocalOnboardingWelcomeView.swift in Sources */,
+				97A25C952B28DDAB0073B990 /* LLMOpenAITokenOnboarding.swift in Sources */,
 				977E49A02B035563001485D4 /* LLMLocalTestView.swift in Sources */,
 				9756D2522B0316240006B6BD /* StorageKeys.swift in Sources */,
 				9772D6822B033D5500E62B9D /* LLMOpenAIChatTestView.swift in Sources */,
 				976179542B03501100E1046E /* FeatureFlags.swift in Sources */,
 				976179502B034E0400E1046E /* TestAppDelegate.swift in Sources */,
 				9756D2552B0316240006B6BD /* LLMLocalOnboardingFlow.swift in Sources */,
+				97A25C942B28DDAB0073B990 /* LLMOpenAIModelOnboarding.swift in Sources */,
 				2FA7382C290ADFAA007ACEB9 /* TestApp.swift in Sources */,
 				9756D2512B0316240006B6BD /* Binding+Negate.swift in Sources */,
 			);
diff --git a/Tests/UITests/UITests.xcodeproj/xcshareddata/xcschemes/TestApp.xcscheme b/Tests/UITests/UITests.xcodeproj/xcshareddata/xcschemes/TestApp.xcscheme
index 3c2223e0..f3dcdbdf 100644
--- a/Tests/UITests/UITests.xcodeproj/xcshareddata/xcschemes/TestApp.xcscheme
+++ b/Tests/UITests/UITests.xcodeproj/xcshareddata/xcschemes/TestApp.xcscheme
@@ -134,7 +134,7 @@
       </BuildableProductRunnable>
       <CommandLineArguments>
          <CommandLineArgument
-            argument = "--mockLocalLLM"
+            argument = "--mockMode"
             isEnabled = "NO">
          </CommandLineArgument>
       </CommandLineArguments>