From f1baad1ec8a4aedda2b730432e897af3815d0288 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Tuszy=C5=84ski?= <srgtuszy@gmail.com>
Date: Mon, 14 Oct 2024 17:55:37 +0200
Subject: [PATCH] Updated readme

---
 README.mdown | 24 +++++++++++++++++-------
 1 file changed, 17 insertions(+), 7 deletions(-)
diff --git a/README.mdown b/README.mdown
index b90d760..887e7b5 100644
--- a/README.mdown
+++ b/README.mdown
@@ -2,12 +2,17 @@
 
 Swift bindings for [llama.cpp](https://github.com/ggerganov/llama.cpp) thanks to which you'll be able to run compatible LLM models directly on your device.
 
+## Features
+
+- Lightweight and easy to use
+- Works on macOS and Linux
+- Supports streaming via structured concurrency
+- Swift 6 ready!
+
 ## TODO
 
 - [ ] Unit tests
 
-- [ ] Support streaming mode with `AsyncStream`
-
 ## How to install
 
 Use swift package manager:
@@ -18,11 +23,16 @@ Use swift package manager:
 
 ## How to use
 
-Currently, the library supports non-streaming inference. It's as simple as initializing with a path to model and passing a prompt:
+Here's a quick example on how to use it. For more, please refer to an example app in `example/` folder.
 
 ```swift
-let llama = try LLama(modelPath: "<path to model in gguf>")
-let prompt = "Identify yourself, large language model!"
-let result = try await llama.infer(prompt: prompt, maxTokens: 1024)
-print(result)
+// Initialize model
+let model = try Model(modelPath: "<model path>")
+let llama = try LLama(modelLoader: model)
+
+// Results are delivered through an `AsyncStream`
+let prompt = "what is the meaning of life?"
+for try await token in await llama.infer(prompt: prompt, maxTokens: 1024) {
+    print(token, terminator: "")
+}
 ```