From 48cd514b31a9439ae46e67638b25ba547dba0c78 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9lina?= <hanouticelina@gmail.com>
Date: Tue, 4 Feb 2025 11:24:01 +0100
Subject: [PATCH] text-to-image: replace nested dict by `height` and `width`
 properties in the input schema (#1158)

Flattening `height` and `width` parameters for `text-to-image`, making
the API simpler for users and making provider-specific transformations
(dict/enum) easier to handle for us.

yes, It's a breaking change but I expect the usage of `target_size` to
be really minimal so far.
---
 .../src/tasks/text-to-image/inference.ts      | 16 ++++++----------
 .../src/tasks/text-to-image/spec/input.json   | 19 +++++++------------
 2 files changed, 13 insertions(+), 22 deletions(-)

diff --git a/packages/tasks/src/tasks/text-to-image/inference.ts b/packages/tasks/src/tasks/text-to-image/inference.ts
index 562bcaecf..5bdc8a340 100644
--- a/packages/tasks/src/tasks/text-to-image/inference.ts
+++ b/packages/tasks/src/tasks/text-to-image/inference.ts
@@ -26,6 +26,10 @@ export interface TextToImageParameters {
 	 * the text prompt, but values too high may cause saturation and other artifacts.
 	 */
 	guidance_scale?: number;
+	/**
+	 * The height in pixels of the output image
+	 */
+	height?: number;
 	/**
 	 * One prompt to guide what NOT to include in image generation.
 	 */
@@ -44,17 +48,9 @@ export interface TextToImageParameters {
 	 */
 	seed?: number;
 	/**
-	 * The size in pixel of the output image
+	 * The width in pixels of the output image
 	 */
-	target_size?: TargetSize;
-	[property: string]: unknown;
-}
-/**
- * The size in pixel of the output image
- */
-export interface TargetSize {
-	height: number;
-	width: number;
+	width?: number;
 	[property: string]: unknown;
 }
 /**
diff --git a/packages/tasks/src/tasks/text-to-image/spec/input.json b/packages/tasks/src/tasks/text-to-image/spec/input.json
index 4e430073d..f94e0bbc3 100644
--- a/packages/tasks/src/tasks/text-to-image/spec/input.json
+++ b/packages/tasks/src/tasks/text-to-image/spec/input.json
@@ -31,18 +31,13 @@
 					"type": "integer",
 					"description": "The number of denoising steps. More denoising steps usually lead to a higher quality image at the expense of slower inference."
 				},
-				"target_size": {
-					"type": "object",
-					"description": "The size in pixel of the output image",
-					"properties": {
-						"width": {
-							"type": "integer"
-						},
-						"height": {
-							"type": "integer"
-						}
-					},
-					"required": ["width", "height"]
+				"width": {
+					"type": "integer",
+					"description": "The width in pixels of the output image"
+				},
+				"height": {
+					"type": "integer",
+					"description": "The height in pixels of the output image"
 				},
 				"scheduler": {
 					"type": "string",