From 48cd514b31a9439ae46e67638b25ba547dba0c78 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9lina?= Date: Tue, 4 Feb 2025 11:24:01 +0100 Subject: [PATCH] text-to-image: replace nested dict by `height` and `width` properties in the input schema (#1158) Flattening `height` and `width` parameters for `text-to-image`, making the API simpler for users and making provider-specific transformations (dict/enum) easier to handle for us. yes, It's a breaking change but I expect the usage of `target_size` to be really minimal so far. --- .../src/tasks/text-to-image/inference.ts | 16 ++++++---------- .../src/tasks/text-to-image/spec/input.json | 19 +++++++------------ 2 files changed, 13 insertions(+), 22 deletions(-) diff --git a/packages/tasks/src/tasks/text-to-image/inference.ts b/packages/tasks/src/tasks/text-to-image/inference.ts index 562bcaecf..5bdc8a340 100644 --- a/packages/tasks/src/tasks/text-to-image/inference.ts +++ b/packages/tasks/src/tasks/text-to-image/inference.ts @@ -26,6 +26,10 @@ export interface TextToImageParameters { * the text prompt, but values too high may cause saturation and other artifacts. */ guidance_scale?: number; + /** + * The height in pixels of the output image + */ + height?: number; /** * One prompt to guide what NOT to include in image generation. */ @@ -44,17 +48,9 @@ export interface TextToImageParameters { */ seed?: number; /** - * The size in pixel of the output image + * The width in pixels of the output image */ - target_size?: TargetSize; - [property: string]: unknown; -} -/** - * The size in pixel of the output image - */ -export interface TargetSize { - height: number; - width: number; + width?: number; [property: string]: unknown; } /** diff --git a/packages/tasks/src/tasks/text-to-image/spec/input.json b/packages/tasks/src/tasks/text-to-image/spec/input.json index 4e430073d..f94e0bbc3 100644 --- a/packages/tasks/src/tasks/text-to-image/spec/input.json +++ b/packages/tasks/src/tasks/text-to-image/spec/input.json @@ -31,18 +31,13 @@ "type": "integer", "description": "The number of denoising steps. More denoising steps usually lead to a higher quality image at the expense of slower inference." }, - "target_size": { - "type": "object", - "description": "The size in pixel of the output image", - "properties": { - "width": { - "type": "integer" - }, - "height": { - "type": "integer" - } - }, - "required": ["width", "height"] + "width": { + "type": "integer", + "description": "The width in pixels of the output image" + }, + "height": { + "type": "integer", + "description": "The height in pixels of the output image" }, "scheduler": { "type": "string",