From 22fbdcde3d67071b46c8c3b58f8dd63e04c4a310 Mon Sep 17 00:00:00 2001 From: Phil Weir Date: Sat, 23 Nov 2024 15:40:09 +0200 Subject: [PATCH 1/6] wip(kfp): first version of kfp renderer --- pyproject.toml | 1 + src/dewret/renderers/kubeflow.py | 863 +++++++++++++++++++++++++++++++ 2 files changed, 864 insertions(+) create mode 100644 src/dewret/renderers/kubeflow.py diff --git a/pyproject.toml b/pyproject.toml index 36db113c..5f87b6c2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -57,6 +57,7 @@ test = ["pytest", "coverage", "ruff", "mypy"] docs = [ "tmp_fat_portray >= v1.8.0" ] +kubeflow = ["kfp"] [tool.pixi.project] channels = ["conda-forge"] diff --git a/src/dewret/renderers/kubeflow.py b/src/dewret/renderers/kubeflow.py new file mode 100644 index 00000000..f3cffbea --- /dev/null +++ b/src/dewret/renderers/kubeflow.py @@ -0,0 +1,863 @@ +# Copyright 2024- Flax & Teal Limited. All Rights Reserved. +# Copyright 2022 The Kubeflow Authors [portions from Kubeflow Pipelines] +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""CWL Renderer. + +Outputs a [Common Workflow Language](https://www.commonwl.org/) representation of the +current workflow. +""" + +import uuid +from google.protobuf import json_format +from kfp.pipeline_spec import pipeline_spec_pb2 +from kfp.compiler import pipeline_spec_builder as builder +from kfp import dsl +from kfp.dsl.types import type_utils +from kfp.dsl.pipeline_context import Pipeline +from attrs import define, has as attrs_has, fields as attrs_fields, AttrsInstance +from dataclasses import is_dataclass, fields as dataclass_fields +from collections.abc import Mapping +import yaml +from typing import ( + TypedDict, + NotRequired, + get_origin, + get_args, + cast, + Any, + Unpack, + Iterable, +) +from types import UnionType +from inspect import isclass +from sympy import Basic, Tuple, Dict, jscode, Symbol +from contextvars import ContextVar + +from dewret.core import ( + Raw, + RawType, + FirmType, +) +from dewret.workflow import ( + FactoryCall, + Workflow, + BaseStep, + StepReference, + ParameterReference, + expr_to_references, +) +from dewret.utils import ( + crawl_raw, + DataclassProtocol, + firm_to_raw, + flatten_if_set, + Unset, +) +from dewret.render import base_render +from dewret.core import Reference, get_render_configuration, set_render_configuration + +PIPELINE: Pipeline = ContextVar("pipeline") + + +def register_task_handler( + task: dsl.pipeline_task.PipelineTask, +) -> dsl.pipeline_task.PipelineTask: + """Registers task handler for attaching tasks to pipelines. + + Args: + task: task to add to pipeline. + """ + pipeline = PIPELINE.get() + name = pipeline.add_task( + task=task, add_to_group=not getattr(task, "is_exit_handler", False) + ) + return name + + +dsl.pipeline_task.PipelineTask._register_task_handler = register_task_handler + + +class BuilderPipeline(Pipeline): + """ContextVar-based Pipeline.""" + + def __enter__(self) -> "BuilderPipeline": + """Ensure a pipeline is set for tasks created in this context.""" + if Pipeline._default_pipeline: + raise Exception("Nested pipelines are not allowed.") + + Pipeline._default_pipeline = self + + PIPELINE.set(self) + + return self + + def __exit__(self, *_: Any) -> None: + """Reset the pipeline for new tasks to None.""" + PIPELINE.set(None) + Pipeline._default_pipeline = None + + +class CommandInputSchema(TypedDict): + """Structure for referring to a raw type in CWL. + + Encompasses several CWL types. In future, it may be best to + use _cwltool_ or another library for these basic structures. + + Attributes: + type: CWL type of this input. + label: name to show for this input. + fields: (for `record`) individual fields in a dict-like structure. + items: (for `array`) type that each field will have. + """ + + type: "InputSchemaType" + label: str + fields: NotRequired[dict[str, "CommandInputSchema"]] + items: NotRequired["InputSchemaType"] + default: NotRequired[RawType] + + +InputSchemaType = ( + str + | CommandInputSchema + | list[str] + | list["InputSchemaType"] + | dict[str, "str | InputSchemaType"] +) + + +def render_expression(ref: Any) -> "ReferenceDefinition": + """Turn a rich (sympy) expression into a CWL JS expression. + + Args: + ref: a structure whose elements are all string-renderable or sympy Basic. + + Returns: a ReferenceDefinition containing a string representation of the expression in the form `$(...)`. + """ + + def _render(ref: Any) -> Basic | RawType: + if not isinstance(ref, Basic): + if isinstance(ref, Mapping): + ref = Dict({key: _render(val) for key, val in ref.items()}) + elif not isinstance(ref, str | bytes) and isinstance(ref, Iterable): + ref = Tuple(*(_render(val) for val in ref)) + return ref + + expr = _render(ref) + if isinstance(expr, Basic): + values = list(expr.free_symbols) + step_syms = [sym for sym in expr.free_symbols if isinstance(sym, StepReference)] + param_syms = [ + sym for sym in expr.free_symbols if isinstance(sym, ParameterReference) + ] + + if set(values) != set(step_syms) | set(param_syms): + raise NotImplementedError( + f"Can only build expressions for step results and param results: {ref}" + ) + + if len(step_syms) > 1: + raise NotImplementedError( + f"Can only create expressions with 1 step reference: {ref}" + ) + if not (step_syms or param_syms): + ... + if values == [ref]: + if isinstance(ref, StepReference): + return ReferenceDefinition(source=to_name(ref), value_from=None) + else: + return ReferenceDefinition(source=ref.name, value_from=None) + source = None + for ref in values: + if isinstance(ref, StepReference): + field = with_field(ref) + parts = field.split("/") + base = f"/{parts[0]}" if parts and parts[0] else "" + if len(parts) > 1: + expr = expr.subs(ref, f"self.{'.'.join(parts[1:])}") + else: + expr = expr.subs(ref, "self") + source = f"{ref.__root_name__}{base}" + else: + expr = expr.subs(ref, Symbol(f"inputs.{ref.name}")) + return ReferenceDefinition( + source=source, value_from=f"$({jscode(_render(expr))})" + ) + return ReferenceDefinition(source=str(expr), value_from=None) + + +class CWLRendererConfiguration(TypedDict): + """Configuration for the renderer. + + Attributes: + allow_complex_types: can input/output types be other than raw? + factories_as_params: should factories be treated as input or steps? + """ + + allow_complex_types: NotRequired[bool] + factories_as_params: NotRequired[bool] + + +def default_config() -> CWLRendererConfiguration: + """Default configuration for this renderer. + + This is a hook-like call to give a configuration dict that this renderer + will respect, and sets any necessary default values. + + Returns: a dict with (preferably) raw type structures to enable easy setting + from YAML/JSON. + """ + return { + "allow_complex_types": False, + "factories_as_params": False, + } + + +def with_type(result: Any) -> type | Any: + """Get a Python type from a value. + + Does so either by using its `__type__` field (for example, for References) + or if unavailable, using `type()`. + + Returns: a Python type. + """ + if hasattr(result, "__type__"): + return result.__type__ + return type(result) + + +def with_field(result: Any) -> str: + """Get a string representing any 'field' suffix of a value. + + This only makes sense in the context of a Reference, which can represent + a deep reference with a known variable (parameter or step result, say) using + its `__field__` attribute. Defaults to `"out"` as this produces compliant CWL + where every output has a "fieldname". + + Returns: a string representation of the field portion of the passed value or `"out"`. + """ + if hasattr(result, "__field__") and result.__field__: + return str(result.__field_str__) + else: + return "out" + + +def to_name(result: Reference[Any]) -> str: + """Take a reference and get a name representing it. + + The primary purpose of this method is to deal with the case where a reference is to the + whole result, as we always put this into an imagined `out` field for CWL consistency. + + Returns: the name of the reference, including any field portion, appending an `"out"` fieldname if none. + """ + if ( + hasattr(result, "__field__") + and not result.__field__ + and isinstance(result, StepReference) + ): + return f"{result.__name__}/out" + return result.__name__ + + +@define +class ReferenceDefinition: + """CWL-renderable internal reference. + + Normally points to a value or a step. + """ + + source: str | None + value_from: str | None + + @classmethod + def from_reference(cls, ref: Reference[Any]) -> "ReferenceDefinition": + """Build from a `Reference`. + + Converts a `dewret.workflow.Reference` into a CWL-rendering object. + + Args: + ref: reference to convert. + """ + return render_expression(ref) + + def render(self) -> dict[str, RawType]: + """Render to a dict-like structure. + + Returns: + Reduced form as a native Python dict structure for + serialization. + """ + representation: dict[str, RawType] = {} + if self.source is not None: + representation["source"] = self.source + if self.value_from is not None: + representation["valueFrom"] = self.value_from + return representation + + +@define +class StepDefinition: + """CWL-renderable step. + + Coerces the dewret structure of a step into that + needed for valid CWL. + + Attributes: + name: identifier to call this step by. + run: task to execute for this step. + in_: inputs from values or other steps. + """ + + name: str + run: str + out: dict[str, "CommandInputSchema"] | list[str] + in_: Mapping[str, ReferenceDefinition | Raw] + + @classmethod + def from_step(cls, step: BaseStep) -> "StepDefinition": + """Build from a `BaseStep`. + + Converts a `dewret.workflow.Step` into a CWL-rendering object. + + Args: + step: step to convert. + """ + inputs = {} + for key, param in step.arguments.items(): + typ = with_type(param) + typ = type_utils._annotation_to_type_struct(typ) + input_output_spec_args = {"type": typ, "is_artifact_list": False} + inputs[key] = dsl.structures.InputSpec( + **input_output_spec_args, + ) + container = dsl.structures.ContainerSpecImplementation( + image="python:x.xx", + command=["python"], + args=[], + ) + component_spec = dsl.structures.ComponentSpec( + name=step.name, + description=f"{step.name} via dewret", + inputs=inputs, + # outputs=to_output_schema("out", step.return_type)["fields"], # make_output_spec(return_ann) + outputs={}, # make_output_spec(return_ann) + implementation=dsl.structures.Implementation(container), + ) + python_cmpt = dsl.python_component.PythonComponent( + component_spec=component_spec, python_func=step.task.target + ) + task_spec = dsl.pipeline_task.PipelineTask(python_cmpt.component_spec, {}) + component_spec.implementation = dsl.structures.Implementation( + container=dsl.structures.ContainerSpecImplementation( + image="IMAGE", + command="python", + args=[ + "--executor_input", + dsl.PIPELINE_TASK_EXECUTOR_INPUT_PLACEHOLDER, + "--function_to_execute", + step.task.name, + ], + ) + ) + + def render(self) -> dict[str, RawType]: + """Render to a dict-like structure. + + Returns: + Reduced form as a native Python dict structure for + serialization. + """ + return { + "run": self.run, + "in": { + key: ( + ref.render() + if isinstance(ref, ReferenceDefinition) + else render_expression(ref).render() + if isinstance(ref, Basic) + else {"default": firm_to_raw(ref.value)} + if hasattr(ref, "value") + else render_expression(ref).render() + ) + for key, ref in self.in_.items() + }, + "out": crawl_raw(self.out), + } + + +def cwl_type_from_value(label: str, val: RawType | Unset) -> CommandInputSchema: + """Find a CWL type for a given (possibly Unset) value. + + Args: + label: the label for the variable being checked to prefill the input def and improve debugging info. + val: a raw Python variable or an unset variable. + + Returns: + Input schema type. + """ + if val is not None and hasattr(val, "__type__"): + raw_type = val.__type__ + else: + raw_type = type(val) + + return to_cwl_type(label, raw_type) + + +def to_cwl_type(label: str, typ: type) -> CommandInputSchema: + """Map Python types to CWL types. + + Args: + label: the label for the variable being checked to prefill the input def and improve debugging info. + typ: a Python basic type. + + Returns: + CWL specification type dict. + """ + typ_dict: CommandInputSchema = {"label": label, "type": ""} + base: Any | None = typ + args = get_args(typ) + if args: + base = get_origin(typ) + + if base == type(None): + typ_dict["type"] = "null" + elif base == int: + typ_dict["type"] = "int" + elif base == bool: + typ_dict["type"] = "boolean" + elif base == dict or (isinstance(base, type) and attrs_has(base)): + typ_dict["type"] = "record" + elif base == float: + typ_dict["type"] = "float" + elif base == str: + typ_dict["type"] = "string" + elif base == bytes: + typ_dict["type"] = "bytes" + elif isinstance(typ, UnionType): + typ_dict.update( + {"type": tuple(to_cwl_type(label, item)["type"] for item in args)} + ) + elif isclass(base) and issubclass(base, Iterable): + try: + if len(args) > 1: + typ_dict.update( + { + "type": "array", + "items": [to_cwl_type(label, t)["type"] for t in args], + } + ) + elif len(args) == 1: + typ_dict.update( + {"type": "array", "items": to_cwl_type(label, args[0])["type"]} + ) + else: + typ_dict["type"] = "array" + except IndexError as err: + raise TypeError( + f"Cannot render complex type ({typ}) to CWL for {label}, have you enabled allow_complex_types configuration?" + ) from err + elif get_render_configuration("allow_complex_types"): + typ_dict["type"] = typ if isinstance(typ, str) else typ.__name__ + else: + raise TypeError(f"Cannot render type ({typ}) to CWL for {label}") + return typ_dict + + +class CommandOutputSchema(CommandInputSchema): + """Structure for referring to an output in CWL. + + As a simplification, this is an input schema with an extra + `outputSource` field. + + Attributes: + outputSource: step result to use for this output. + """ + + outputSource: NotRequired[str] + expression: NotRequired[str] + source: NotRequired[list[str]] + + +def raw_to_command_input_schema(label: str, value: RawType | Unset) -> InputSchemaType: + """Infer the CWL input structure for this value. + + Inspects the value, to work out an appropriate structure + describing it in CWL. + + Args: + label: name of the variable. + value: basic-typed variable from which to build structure. + + Returns: + Structure used to define (possibly compound) basic types for input. + """ + if isinstance(value, dict) or isinstance(value, list): + return _raw_to_command_input_schema_internal(label, value) + else: + return cwl_type_from_value(label, value) + + +def to_output_schema( + label: str, + typ: type[RawType | AttrsInstance | DataclassProtocol], + output_source: str | None = None, +) -> CommandOutputSchema: + """Turn a step's output into an output schema. + + Takes a source, type and label and provides a description for CWL. + + Args: + label: name of this field. + typ: either a basic type, compound of basic types, or a TypedDict representing a pre-defined result structure. + output_source: if provided, a CWL step result reference to input here. + + Returns: + CWL CommandOutputSchema-like structure for embedding into an `outputs` block + """ + fields = None + if attrs_has(typ): + fields = { + str(field.name): cast( + CommandInputSchema, to_output_schema(field.name, field.type) + ) + for field in attrs_fields(typ) + } + elif is_dataclass(typ): + fields = { + str(field.name): cast( + CommandInputSchema, to_output_schema(field.name, field.type) + ) + for field in dataclass_fields(typ) + } + + if fields: + output = CommandOutputSchema( + type="record", + label=label, + fields=fields, + ) + else: + # TODO: this complains because NotRequired keys are never present, + # but that does not seem like a problem here - likely a better solution. + output = CommandOutputSchema( + **to_cwl_type(label, typ) # type: ignore + ) + if output_source is not None: + output["outputSource"] = output_source + return output + + +def _raw_to_command_input_schema_internal( + label: str, value: RawType | Unset +) -> CommandInputSchema: + structure: CommandInputSchema = cwl_type_from_value(label, value) + if isinstance(value, dict): + structure["fields"] = { + key: _raw_to_command_input_schema_internal(key, val) + for key, val in value.items() + } + elif isinstance(value, list): + typeset = set(get_args(value)) + if not typeset: + typeset = { + item.__type__ + if item is not None and hasattr(item, "__type__") + else type(item) + for item in value + } + if len(typeset) != 1: + raise RuntimeError( + "For CWL, an input array must have a consistent type, " + "and we need at least one element to infer it, or an explicit typehint." + ) + structure["items"] = to_cwl_type(label, typeset.pop())["type"] + elif not isinstance(value, Unset): + structure["default"] = firm_to_raw(value) + return structure + + +@define +class InputsDefinition: + """CWL-renderable representation of an input parameter block. + + Turns dewret results into a CWL input block. + + Attributes: + input: sequence of results from a workflow. + """ + + inputs: dict[str, "CommandInputParameter"] + + @define + class CommandInputParameter: + """CWL-renderable reference to a specific input. + + Attributes: + type: type of variable + name: fully-qualified name of the input. + """ + + type: InputSchemaType + default: RawType | Unset + label: str + + @classmethod + def from_parameters( + cls, parameters: list[ParameterReference[Any] | FactoryCall] + ) -> "InputsDefinition": + """Takes a list of parameters into a CWL structure. + + Uses the parameters to fill out the necessary input fields. + + Returns: + CWL-like structure representing all workflow outputs. + """ + parameters_dedup = { + p._.parameter for p in parameters if isinstance(p, ParameterReference) + } + parameters = list(parameters_dedup) + [ + p for p in parameters if not isinstance(p, ParameterReference) + ] + return cls( + inputs={ + input.name: cls.CommandInputParameter( + label=input.__name__, + default=(default := flatten_if_set(input.__default__)), + type=raw_to_command_input_schema( + label=input.__original_name__, value=default + ), + ) + for input in parameters + } + ) + + def render(self) -> dict[str, RawType]: + """Render to a dict-like structure. + + Returns: + Reduced form as a native Python dict structure for + serialization. + """ + result: dict[str, RawType] = {} + for key, input in self.inputs.items(): + # Would rather not cast, but CommandInputSchema is dict[RawType] + # by construction, where type is seen as a TypedDict subclass. + item = firm_to_raw(cast(FirmType, input.type)) + if isinstance(item, dict) and not isinstance(input.default, Unset): + item["default"] = firm_to_raw(input.default) + result[key] = item + return result + + +@define +class OutputsDefinition: + """CWL-renderable set of workflow outputs. + + Turns dewret results into a CWL output block. + + Attributes: + outputs: sequence of results from a workflow. + """ + + outputs: ( + dict[str, "CommandOutputSchema"] + | list["CommandOutputSchema"] + | CommandOutputSchema + ) + + @classmethod + def from_results( + cls, + results: dict[str, StepReference[Any]] + | list[StepReference[Any]] + | tuple[StepReference[Any], ...], + ) -> "OutputsDefinition": + """Takes a mapping of results into a CWL structure. + + Pulls the result type from the signature, ultimately, if possible. + + Returns: + CWL-like structure representing all workflow outputs. + """ + + def _build_results(result: Any) -> RawType: + if isinstance(result, Reference): + # TODO: need to work out how to tell mypy that a TypedDict is also dict[str, RawType] + return to_output_schema( # type: ignore + with_field(result), with_type(result), output_source=to_name(result) + ) + results = result + return ( + [_build_results(result) for result in results] + if isinstance(results, list | tuple | Tuple) + else {key: _build_results(result) for key, result in results.items()} + ) + + try: + # TODO: sort out this nested type building. + return cls(outputs=_build_results(results)) # type: ignore + except AttributeError: + expr, references = expr_to_references(results) + reference_names = sorted( + { + str(ref._.parameter) + if isinstance(ref, ParameterReference) + else str(ref._.step) + for ref in references + } + ) + return cls( + outputs={ + "out": { + "type": "float", # WARNING: we assume any arithmetic expression returns a float. + "label": "out", + "expression": str(expr), + "source": reference_names, + } + } + ) + + def render(self) -> dict[str, RawType] | list[RawType]: + """Render to a dict-like structure. + + Returns: + Reduced form as a native Python dict structure for + serialization. + """ + return ( + [crawl_raw(output) for output in self.outputs] + if isinstance(self.outputs, list) + else {key: crawl_raw(output) for key, output in self.outputs.items()} + ) + + +class WorkflowDefinition(dsl.base_component.BaseComponent): + """CWL-renderable workflow. + + Coerces the dewret structure of a workflow into that + needed for valid CWL. + + Attributes: + steps: sequence of steps in the workflow. + """ + + @classmethod + def from_workflow( + cls, workflow: Workflow, name: None | str = None + ) -> "WorkflowDefinition": + """Build from a `Workflow`. + + Converts a `dewret.workflow.Workflow` into a CWL-rendering object. + + Args: + workflow: workflow to convert. + name: name of this workflow, if it should have one. + """ + parameters: list[ParameterReference[Any] | FactoryCall] = list( + workflow.find_parameters( + include_factory_calls=not get_render_configuration( + "factories_as_params" + ) + ) + ) + if get_render_configuration("factories_as_params"): + parameters += list(workflow.find_factories().values()) + + with BuilderPipeline(name or "myname") as dsl_pipeline: + for step in workflow.indexed_steps.values(): + if isinstance(step, FactoryCall) and get_render_configuration( + "factories_as_params" + ): + continue + StepDefinition.from_step(step) + + description = "DESCRIPTION" + component_name = "NAME" + component_spec = dsl.structures.ComponentSpec( + name=component_name, + description=description, + inputs={}, + outputs={}, + implementation=dsl.structures.Implementation(), + ) + graph_component = cls(component_spec=component_spec) + pipeline_group = dsl_pipeline.groups[0] + pipeline_group.name = uuid.uuid4().hex + + pipeline_spec, platform_spec = builder.create_pipeline_spec( + pipeline=dsl_pipeline, + component_spec=graph_component.component_spec, + pipeline_outputs={}, + pipeline_config={}, + ) + + # pipeline_root = getattr(pipeline_func, 'pipeline_root', None) + # if pipeline_root is not None: + # pipeline_spec.default_pipeline_root = pipeline_root + # if display_name is not None: + # pipeline_spec.pipeline_info.display_name = display_name + if component_spec.description is not None: + pipeline_spec.pipeline_info.description = component_spec.description + + graph_component.component_spec.implementation.graph = pipeline_spec + graph_component.component_spec.platform_spec = platform_spec + return graph_component + + @property + def pipeline_spec(self) -> pipeline_spec_pb2.PipelineSpec: + """Returns the pipeline spec of the component.""" + return self.component_spec.implementation.graph + + def execute(self, **kwargs: Any) -> None: + raise RuntimeError("Graph component has no local execution mode.") + + def render(self) -> dict[str, RawType]: + """Render to a dict-like structure. + + Returns: + Reduced form as a native Python dict structure for + serialization. + """ + pipeline_spec_dict = json_format.MessageToDict(self.pipeline_spec) + # yaml_comments = extract_comments_from_pipeline_spec(pipeline_spec_dict, + # self.description) + # has_platform_specific_features = len(self.platform_spec.platforms) > 0 + + # documents = [pipeline_spec_dict] + # if has_platform_specific_features: + # documents.append(json_format.MessageToDict(self.platform_spec)) + return yaml.safe_dump(pipeline_spec_dict, sort_keys=True) + + +def render( + workflow: Workflow, **kwargs: Unpack[CWLRendererConfiguration] +) -> dict[str, dict[str, RawType]]: + """Render to a dict-like structure. + + Args: + workflow: workflow to evaluate result. + **kwargs: additional configuration arguments - these should match CWLRendererConfiguration. + + Returns: + Reduced form as a native Python dict structure for + serialization. + """ + # TODO: Again, convincing mypy that a TypedDict has RawType values. + with set_render_configuration(kwargs): # type: ignore + rendered = base_render( + workflow, + lambda workflow: WorkflowDefinition.from_workflow(workflow).render(), + ) + return rendered From 3e9c5faa87d40ed7ade2aecf587dadcc557220c9 Mon Sep 17 00:00:00 2001 From: Phil Weir Date: Sat, 23 Nov 2024 18:42:55 +0200 Subject: [PATCH 2/6] wip: with inputs/outputs --- src/dewret/renderers/kubeflow.py | 190 ++++++++++++++++++++++++++----- 1 file changed, 162 insertions(+), 28 deletions(-) diff --git a/src/dewret/renderers/kubeflow.py b/src/dewret/renderers/kubeflow.py index f3cffbea..45dea167 100644 --- a/src/dewret/renderers/kubeflow.py +++ b/src/dewret/renderers/kubeflow.py @@ -20,6 +20,7 @@ """ import uuid +import itertools from google.protobuf import json_format from kfp.pipeline_spec import pipeline_spec_pb2 from kfp.compiler import pipeline_spec_builder as builder @@ -68,8 +69,108 @@ from dewret.render import base_render from dewret.core import Reference, get_render_configuration, set_render_configuration -PIPELINE: Pipeline = ContextVar("pipeline") +PIPELINE: ContextVar[Pipeline] = ContextVar("pipeline") +CHANNELS: ContextVar[dict[Reference[Any], dsl.pipeline_channel.PipelineChannel]] = ContextVar("channels") + +def ensure_channels(expression: Any) -> Any: + def remap(ref): + if isinstance(ref, Reference): + if ref not in channels: + channels[ref] = dsl.pipeline_channel.create_pipeline_channel( + name=ref.name, + channel_type=to_cwl_type(ref.name, ref.__type__)["type"], # type: ignore + task_name=ref._.step.name, + is_artifact_list=False, + ) + return channels[ref] + channels = CHANNELS.get() + expr, to_check = expr_to_references(expression, remap=remap) + return expr + +class DewretPipelineTask(dsl.pipeline_task.PipelineTask): + def __init__( + self, + component_spec: dsl.structures.ComponentSpec, + args: dict[str, Any], + execute_locally: bool = False, + execution_caching_default: bool = True, + output: StepReference[Any] | None = None, + ) -> None: + """Initilizes a PipelineTask instance.""" + # import within __init__ to avoid circular import + from kfp.dsl.tasks_group import TasksGroup + self.state = dsl.pipeline_task.TaskState.FUTURE + self.parent_task_group: None | TasksGroup = None + args = args or {} + + for input_name, argument_value in args.items(): + if input_name not in component_spec.inputs: + raise ValueError( + f'Component {component_spec.name!r} got an unexpected input:' + f' {input_name!r}.') + + input_spec = component_spec.inputs[input_name] + + type_utils.verify_type_compatibility( + given_value=argument_value, + expected_spec=input_spec, + error_message_prefix=( + f'Incompatible argument passed to the input ' + f'{input_name!r} of component {component_spec.name!r}: '), + ) + self.component_spec = component_spec + + self._task_spec = dsl.structures.TaskSpec( + name=self._register_task_handler(), + inputs=dict(args.items()), + dependent_tasks=[], + component_ref=component_spec.name, + enable_caching=execution_caching_default) + self._run_after: list[str] = [] + + self.importer_spec = None + self.container_spec = None + self.pipeline_spec = None + self._ignore_upstream_failure_tag = False + # platform_config for this primitive task; empty if task is for a graph component + self.platform_config = {} + + def validate_placeholder_types( + component_spec: dsl.structures.ComponentSpec) -> None: + inputs_dict = component_spec.inputs or {} + outputs_dict = component_spec.outputs or {} + for arg in itertools.chain( + (component_spec.implementation.container.command or []), + (component_spec.implementation.container.args or [])): + dsl.pipeline_task.check_primitive_placeholder_is_used_for_correct_io_type( + inputs_dict, outputs_dict, arg) + + if component_spec.implementation.container is not None: + validate_placeholder_types(component_spec) + self.container_spec = self._extract_container_spec_and_convert_placeholders( + component_spec=component_spec) + elif component_spec.implementation.importer is not None: + self.importer_spec = component_spec.implementation.importer + self.importer_spec.artifact_uri = args['uri'] + else: + self.pipeline_spec = self.component_spec.implementation.graph + + self._outputs = {output.name: ensure_channels(output)} + + args = {arg.name: ensure_channels(arg) for arg in args} + self._inputs = args + + self._channel_inputs = [ + value for _, value in args.items() + if isinstance(value, dsl.pipeline_channel.PipelineChannel) + ] + dsl.pipeline_channel.extract_pipeline_channels_from_any([ + value for _, value in args.items() + if not isinstance(value, dsl.pipeline_channel.PipelineChannel) + ]) + + if execute_locally: + self._execute_locally(args=args) def register_task_handler( task: dsl.pipeline_task.PipelineTask, @@ -100,12 +201,14 @@ def __enter__(self) -> "BuilderPipeline": Pipeline._default_pipeline = self PIPELINE.set(self) + CHANNELS.set({}) return self def __exit__(self, *_: Any) -> None: """Reset the pipeline for new tasks to None.""" PIPELINE.set(None) + CHANNELS.set({}) Pipeline._default_pipeline = None @@ -347,18 +450,22 @@ def from_step(cls, step: BaseStep) -> "StepDefinition": command=["python"], args=[], ) + + rettyp = to_output_schema(dsl.component_factory.SINGLE_OUTPUT_NAME, step.return_type) + outputs = {} + outputs[dsl.component_factory.SINGLE_OUTPUT_NAME] = rettyp component_spec = dsl.structures.ComponentSpec( name=step.name, description=f"{step.name} via dewret", inputs=inputs, # outputs=to_output_schema("out", step.return_type)["fields"], # make_output_spec(return_ann) - outputs={}, # make_output_spec(return_ann) + outputs=outputs, # make_output_spec(return_ann) implementation=dsl.structures.Implementation(container), ) python_cmpt = dsl.python_component.PythonComponent( component_spec=component_spec, python_func=step.task.target ) - task_spec = dsl.pipeline_task.PipelineTask(python_cmpt.component_spec, {}) + task_spec = DewretPipelineTask(python_cmpt.component_spec, {}, output=step.make_reference(workflow=step.__workflow__)) component_spec.implementation = dsl.structures.Implementation( container=dsl.structures.ContainerSpecImplementation( image="IMAGE", @@ -371,6 +478,7 @@ def from_step(cls, step: BaseStep) -> "StepDefinition": ], ) ) + return task_spec def render(self) -> dict[str, RawType]: """Render to a dict-like structure. @@ -434,17 +542,17 @@ def to_cwl_type(label: str, typ: type) -> CommandInputSchema: if base == type(None): typ_dict["type"] = "null" elif base == int: - typ_dict["type"] = "int" + typ_dict["type"] = "Integer" elif base == bool: - typ_dict["type"] = "boolean" + typ_dict["type"] = "Boolean" elif base == dict or (isinstance(base, type) and attrs_has(base)): - typ_dict["type"] = "record" + typ_dict["type"] = "Dict" elif base == float: - typ_dict["type"] = "float" + typ_dict["type"] = "Float" elif base == str: - typ_dict["type"] = "string" + typ_dict["type"] = "String" elif base == bytes: - typ_dict["type"] = "bytes" + raise RuntimeError("KFP cannot currently handle bytes as a annotation type.") elif isinstance(typ, UnionType): typ_dict.update( {"type": tuple(to_cwl_type(label, item)["type"] for item in args)} @@ -513,7 +621,7 @@ def to_output_schema( label: str, typ: type[RawType | AttrsInstance | DataclassProtocol], output_source: str | None = None, -) -> CommandOutputSchema: +) -> dsl.structures.OutputSpec: """Turn a step's output into an output schema. Takes a source, type and label and provides a description for CWL. @@ -530,32 +638,31 @@ def to_output_schema( if attrs_has(typ): fields = { str(field.name): cast( - CommandInputSchema, to_output_schema(field.name, field.type) + dsl.structures.OutputSpec, to_output_schema(field.name, field.type) ) for field in attrs_fields(typ) } elif is_dataclass(typ): fields = { str(field.name): cast( - CommandInputSchema, to_output_schema(field.name, field.type) + dsl.structures.OutputSpec, to_output_schema(field.name, field.type) ) for field in dataclass_fields(typ) } if fields: - output = CommandOutputSchema( - type="record", - label=label, - fields=fields, + output = dsl.structures.OutputSpec( + type=fields, ) else: # TODO: this complains because NotRequired keys are never present, # but that does not seem like a problem here - likely a better solution. - output = CommandOutputSchema( - **to_cwl_type(label, typ) # type: ignore + print(to_cwl_type(label, typ)["type"]) + output = dsl.structures.OutputSpec( + type=to_cwl_type(label, typ)["type"] # type: ignore ) - if output_source is not None: - output["outputSource"] = output_source + # if output_source is not None: + # output["outputSource"] = output_source return output @@ -742,8 +849,7 @@ def render(self) -> dict[str, RawType] | list[RawType]: else {key: crawl_raw(output) for key, output in self.outputs.items()} ) - -class WorkflowDefinition(dsl.base_component.BaseComponent): +class DewretGraphComponent(dsl.base_component.BaseComponent): """CWL-renderable workflow. Coerces the dewret structure of a workflow into that @@ -756,7 +862,7 @@ class WorkflowDefinition(dsl.base_component.BaseComponent): @classmethod def from_workflow( cls, workflow: Workflow, name: None | str = None - ) -> "WorkflowDefinition": + ) -> "DewretGraphComponent": """Build from a `Workflow`. Converts a `dewret.workflow.Workflow` into a CWL-rendering object. @@ -772,9 +878,11 @@ def from_workflow( ) ) ) + if get_render_configuration("factories_as_params"): parameters += list(workflow.find_factories().values()) + step_outputs = {} with BuilderPipeline(name or "myname") as dsl_pipeline: for step in workflow.indexed_steps.values(): if isinstance(step, FactoryCall) and get_render_configuration( @@ -782,27 +890,53 @@ def from_workflow( ): continue StepDefinition.from_step(step) + pipeline_outputs = {dsl.component_factory.SINGLE_OUTPUT_NAME: ensure_channels(workflow.result)} + + inputs = {} + for param in parameters: + typ = with_type(param) + typ = type_utils._annotation_to_type_struct(typ) + input_output_spec_args = {"type": typ, "is_artifact_list": False} + inputs[param.name] = dsl.structures.InputSpec( + **input_output_spec_args, + ) + + rettyp = to_output_schema(dsl.component_factory.SINGLE_OUTPUT_NAME, workflow.result.__type__) + outputs = {} + outputs[dsl.component_factory.SINGLE_OUTPUT_NAME] = rettyp + print(dsl.component_factory.SINGLE_OUTPUT_NAME) description = "DESCRIPTION" component_name = "NAME" component_spec = dsl.structures.ComponentSpec( name=component_name, description=description, - inputs={}, - outputs={}, + inputs=inputs, + outputs=outputs, implementation=dsl.structures.Implementation(), ) + + args_list = [] + for parameter in parameters: + input_spec = component_spec.inputs[parameter.name] + args_list.append( + dsl.pipeline_channel.create_pipeline_channel( + name=parameter.name, + channel_type=input_spec.type, + is_artifact_list=input_spec.is_artifact_list, + )) + graph_component = cls(component_spec=component_spec) pipeline_group = dsl_pipeline.groups[0] pipeline_group.name = uuid.uuid4().hex + print(outputs, pipeline_outputs) pipeline_spec, platform_spec = builder.create_pipeline_spec( pipeline=dsl_pipeline, component_spec=graph_component.component_spec, - pipeline_outputs={}, + pipeline_outputs=pipeline_outputs, pipeline_config={}, ) - # pipeline_root = getattr(pipeline_func, 'pipeline_root', None) # if pipeline_root is not None: # pipeline_spec.default_pipeline_root = pipeline_root @@ -858,6 +992,6 @@ def render( with set_render_configuration(kwargs): # type: ignore rendered = base_render( workflow, - lambda workflow: WorkflowDefinition.from_workflow(workflow).render(), + lambda workflow: DewretGraphComponent.from_workflow(workflow).render(), ) return rendered From 1785ea9c97e95aa7df3b2ed4a401ee9f8b25d30d Mon Sep 17 00:00:00 2001 From: Phil Weir Date: Sat, 18 Jan 2025 13:06:30 +0000 Subject: [PATCH 3/6] wip(kubeflow): first implementation that is execution-checked --- example/components_pipeline.yaml | 460 +++++++++++ .../components_pipeline_with_dewret_ROOT.yaml | 43 + ...nents_pipeline_with_dewret_pipeline-1.yaml | 540 +++++++++++++ example/kfp_example.py | 178 +++++ example/kfp_example_housing.csv | 521 ++++++++++++ example/kfp_example_with_dewret.py | 170 ++++ example/kubeflow_config.yaml | 8 + src/dewret/core.py | 22 +- src/dewret/data.py | 25 + src/dewret/render.py | 1 + src/dewret/renderers/kubeflow.py | 744 ++++++++++++------ src/dewret/tasks.py | 14 +- src/dewret/utils.py | 15 +- src/dewret/workflow.py | 39 +- 14 files changed, 2511 insertions(+), 269 deletions(-) create mode 100644 example/components_pipeline.yaml create mode 100644 example/components_pipeline_with_dewret_ROOT.yaml create mode 100644 example/components_pipeline_with_dewret_pipeline-1.yaml create mode 100644 example/kfp_example.py create mode 100644 example/kfp_example_housing.csv create mode 100644 example/kfp_example_with_dewret.py create mode 100644 example/kubeflow_config.yaml create mode 100644 src/dewret/data.py diff --git a/example/components_pipeline.yaml b/example/components_pipeline.yaml new file mode 100644 index 00000000..2d030480 --- /dev/null +++ b/example/components_pipeline.yaml @@ -0,0 +1,460 @@ +components: + comp-evaluate: + executorLabel: exec-evaluate + inputDefinitions: + artifacts: + predictions: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + y_test: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + outputDefinitions: + artifacts: + metrics_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + comp-load-dataset-from-gcs: + executorLabel: exec-load-dataset-from-gcs + inputDefinitions: + parameters: + blob_name: + parameterType: STRING + bucket_name: + parameterType: STRING + outputDefinitions: + artifacts: + output_dataset: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + comp-model-training: + executorLabel: exec-model-training + inputDefinitions: + artifacts: + X_test_input: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + X_train_input: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + y_train_input: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + outputDefinitions: + artifacts: + X_test_scaled: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + model_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + comp-predict: + executorLabel: exec-predict + inputDefinitions: + artifacts: + X_test: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + trained_model: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + outputDefinitions: + artifacts: + prediction: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + comp-preprocess-the-dataset: + executorLabel: exec-preprocess-the-dataset + inputDefinitions: + artifacts: + dataset_content: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + outputDefinitions: + artifacts: + out_data: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + comp-train-test-split: + executorLabel: exec-train-test-split + inputDefinitions: + artifacts: + input_df: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + outputDefinitions: + artifacts: + X_test_artifact: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + X_train_artifact: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + y_test_artifact: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + y_train_artifact: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 +defaultPipelineRoot: gs://boston-house-pred +deploymentSpec: + executors: + exec-evaluate: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - evaluate + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.10.1'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' &&\ + \ python3 -m pip install --quiet --no-warn-script-location 'pandas' 'scikit-learn'\ + \ 'numpy' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef evaluate(y_test: Input[Dataset], predictions: Input[Dataset],\ + \ metrics_output: Output[Artifact]):\n from sklearn.metrics import mean_squared_error,\ + \ mean_absolute_error\n import pandas as pd\n import numpy as np\n\ + \ y_test_data = pd.read_csv(y_test.path)\n predictions_data = pd.read_csv(predictions.path)\n\ + \n mae = mean_absolute_error(y_test_data, predictions_data)\n mse\ + \ = mean_squared_error(y_test_data, predictions_data)\n rmse = np.sqrt(mse)\n\ + \n with open(metrics_output.path, 'w') as f:\n f.write(f'MAE:\ + \ {mae}\\n')\n f.write(f'MSE: {mse}\\n')\n f.write(f'RMSE:\ + \ {rmse}\\n')\n\n" + image: python:3.9 + exec-load-dataset-from-gcs: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - load_dataset_from_gcs + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.10.1'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' &&\ + \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-storage'\ + \ 'pandas' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef load_dataset_from_gcs(bucket_name: str, blob_name: str, output_dataset:\ + \ Output[Dataset]): \n from google.cloud import storage\n import pandas\ + \ as pd\n from io import StringIO\n storage_client = storage.Client()\n\ + \ bucket = storage_client.get_bucket(bucket_name)\n blob = bucket.blob(blob_name)\n\ + \n dataset_content = blob.download_as_string().decode('utf-8')\n\n \ + \ data = pd.read_csv(StringIO(dataset_content), header=None, delim_whitespace=True)\n\ + \ data.to_csv(output_dataset.path, header=True, index=False)\n\n" + image: python:3.9 + exec-model-training: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - model_training + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.10.1'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' &&\ + \ python3 -m pip install --quiet --no-warn-script-location 'numpy' 'scikit-learn'\ + \ 'joblib' 'pandas' 'google-cloud-storage' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef model_training(X_train_input: Input[Dataset],\n \ + \ X_test_input: Input[Dataset],\n y_train_input:\ + \ Input[Dataset],\n X_test_scaled: Output[Dataset],\n\ + \ model_output: Output[Artifact]):\n from sklearn.preprocessing\ + \ import StandardScaler\n from sklearn.linear_model import LinearRegression\n\ + \ import joblib\n from google.cloud import storage\n import pandas\ + \ as pd\n scaler = StandardScaler()\n\n X_train = pd.read_csv(X_train_input.path)\n\ + \ X_test = pd.read_csv(X_test_input.path)\n y_train = pd.read_csv(y_train_input.path)\n\ + \n X_train_scaled = scaler.fit_transform(X_train)\n X_test_scaled2=\ + \ pd.DataFrame(scaler.transform(X_test))\n X_test_scaled2.to_csv(X_test_scaled.path,\ + \ index=False) # Fixing typo here\n\n regression = LinearRegression()\n\ + \ regression.fit(X_train_scaled, y_train)\n\n model_file = '/trained_model.joblib'\n\ + \ joblib.dump(regression, model_file)\n # Upload the model file to\ + \ Google Cloud Storage\n storage_client = storage.Client()\n bucket\ + \ = storage_client.bucket('boston-house-data')\n blob = bucket.blob('data/model.pkl')\n\ + \ blob.upload_from_filename(model_file)\n model_output.file = model_file\n\ + \n" + image: python:3.9 + exec-predict: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - predict + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.10.1'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' &&\ + \ python3 -m pip install --quiet --no-warn-script-location 'pandas' 'joblib'\ + \ 'google-cloud-storage' 'scikit-learn' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef predict(X_test: Input[Dataset], trained_model: Input[Artifact],\ + \ prediction: Output[Dataset]):\n import joblib\n import pandas as\ + \ pd\n from google.cloud import storage\n import sklearn\n X_test_data\ + \ = pd.read_csv(X_test.path)\n\n storage_client = storage.Client()\n\ + \ bucket = storage_client.bucket(\"boston-house-data\")\n blob = bucket.blob(\"\ + data/model.pkl\")\n model_file = 'model.pkl'\n blob.download_to_filename(model_file)\n\ + \n\n regression = joblib.load(model_file)\n\n predictions = regression.predict(X_test_data)\n\ + \ pd.DataFrame(predictions).to_csv(prediction.path, index=False)\n\n" + image: python:3.9 + exec-preprocess-the-dataset: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - preprocess_the_dataset + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.10.1'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' &&\ + \ python3 -m pip install --quiet --no-warn-script-location 'pandas' &&\ + \ \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef preprocess_the_dataset(dataset_content: Input[Dataset], out_data:\ + \ Output[Dataset]):\n import pandas as pd\n data = pd.read_csv(dataset_content.path,\ + \ header=0)\n if data.isna().sum().any():\n raise ValueError(\"\ + The data needs preprocessing (remove missing values)\")\n\n data.to_csv(out_data.path,\ + \ index=False)\n\n" + image: python:3.9 + exec-train-test-split: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - train_test_split + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.10.1'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' &&\ + \ python3 -m pip install --quiet --no-warn-script-location 'scikit-learn'\ + \ 'pandas' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef train_test_split(input_df: Input[Dataset], \n \ + \ X_train_artifact: Output[Dataset], \n X_test_artifact:\ + \ Output[Dataset], \n y_train_artifact: Output[Dataset],\ + \ \n y_test_artifact: Output[Dataset]):\n from sklearn.model_selection\ + \ import train_test_split\n import pandas as pd\n df = pd.read_csv(input_df.path)\n\ + \ X = df.iloc[:, :-1]\n y = df.iloc[:, -1]\n X_train, X_test, y_train,\ + \ y_test = train_test_split(X, y, test_size=0.3, random_state=42)\n\n \ + \ X_train.to_csv(X_train_artifact.path, index=False)\n X_test.to_csv(X_test_artifact.path,\ + \ index=False)\n y_train.to_csv(y_train_artifact.path, index=False)\n\ + \ y_test.to_csv(y_test_artifact.path, index=False)\n\n" + image: python:3.9 +pipelineInfo: + description: A pipeline to prepare dataset, split into train and test sets, train + a model, and predict + name: boston-house-training-prediction +root: + dag: + tasks: + evaluate: + cachingOptions: + enableCache: true + componentRef: + name: comp-evaluate + dependentTasks: + - predict + - train-test-split + inputs: + artifacts: + predictions: + taskOutputArtifact: + outputArtifactKey: prediction + producerTask: predict + y_test: + taskOutputArtifact: + outputArtifactKey: y_test_artifact + producerTask: train-test-split + taskInfo: + name: evaluate + load-dataset-from-gcs: + cachingOptions: + enableCache: true + componentRef: + name: comp-load-dataset-from-gcs + inputs: + parameters: + blob_name: + runtimeValue: + constant: data/housing.csv + bucket_name: + runtimeValue: + constant: boston-house-data + taskInfo: + name: load-dataset-from-gcs + model-training: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-training + dependentTasks: + - train-test-split + inputs: + artifacts: + X_test_input: + taskOutputArtifact: + outputArtifactKey: X_test_artifact + producerTask: train-test-split + X_train_input: + taskOutputArtifact: + outputArtifactKey: X_train_artifact + producerTask: train-test-split + y_train_input: + taskOutputArtifact: + outputArtifactKey: y_train_artifact + producerTask: train-test-split + taskInfo: + name: model-training + predict: + cachingOptions: + enableCache: true + componentRef: + name: comp-predict + dependentTasks: + - model-training + inputs: + artifacts: + X_test: + taskOutputArtifact: + outputArtifactKey: X_test_scaled + producerTask: model-training + trained_model: + taskOutputArtifact: + outputArtifactKey: model_output + producerTask: model-training + taskInfo: + name: predict + preprocess-the-dataset: + cachingOptions: + enableCache: true + componentRef: + name: comp-preprocess-the-dataset + dependentTasks: + - load-dataset-from-gcs + inputs: + artifacts: + dataset_content: + taskOutputArtifact: + outputArtifactKey: output_dataset + producerTask: load-dataset-from-gcs + taskInfo: + name: preprocess-the-dataset + train-test-split: + cachingOptions: + enableCache: true + componentRef: + name: comp-train-test-split + dependentTasks: + - preprocess-the-dataset + inputs: + artifacts: + input_df: + taskOutputArtifact: + outputArtifactKey: out_data + producerTask: preprocess-the-dataset + taskInfo: + name: train-test-split +schemaVersion: 2.1.0 +sdkVersion: kfp-2.10.1 diff --git a/example/components_pipeline_with_dewret_ROOT.yaml b/example/components_pipeline_with_dewret_ROOT.yaml new file mode 100644 index 00000000..a886cff5 --- /dev/null +++ b/example/components_pipeline_with_dewret_ROOT.yaml @@ -0,0 +1,43 @@ +components: + comp-pipeline-1: + executorLabel: exec-pipeline-1 + outputDefinitions: + artifacts: + pipeline_1: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 +deploymentSpec: + executors: + exec-pipeline-1: + container: + command: + - python + image: python:3.9 +pipelineInfo: + description: DESCRIPTION + name: myname +root: + dag: + outputs: + artifacts: + Output: + artifactSelectors: + - outputArtifactKey: pipeline_1 + producerSubtask: pipeline-1 + tasks: + pipeline-1: + cachingOptions: + enableCache: true + componentRef: + name: comp-pipeline-1 + taskInfo: + name: pipeline-1 + outputDefinitions: + artifacts: + Output: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 +schemaVersion: 2.1.0 +sdkVersion: kfp-2.10.1 diff --git a/example/components_pipeline_with_dewret_pipeline-1.yaml b/example/components_pipeline_with_dewret_pipeline-1.yaml new file mode 100644 index 00000000..9579f811 --- /dev/null +++ b/example/components_pipeline_with_dewret_pipeline-1.yaml @@ -0,0 +1,540 @@ +components: + comp-evaluate-1-1: + executorLabel: exec-evaluate-1-1 + inputDefinitions: + artifacts: + predictions: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + y_test: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + outputDefinitions: + artifacts: + evaluate_1_1: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + comp-load-dataset-from-gcs-1-1: + executorLabel: exec-load-dataset-from-gcs-1-1 + inputDefinitions: + parameters: + blob_name: + parameterType: STRING + bucket_name: + parameterType: STRING + outputDefinitions: + artifacts: + load_dataset_from_gcs_1_1: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + comp-model-training-1-1: + executorLabel: exec-model-training-1-1 + inputDefinitions: + artifacts: + X_test_input: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + X_train_input: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + y_train_input: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + outputDefinitions: + artifacts: + model_training_1_1__0: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + model_training_1_1__1: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + comp-predict-1-1: + executorLabel: exec-predict-1-1 + inputDefinitions: + artifacts: + X_test: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + trained_model: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + outputDefinitions: + artifacts: + predict_1_1: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + comp-preprocess-the-dataset-1-1: + executorLabel: exec-preprocess-the-dataset-1-1 + inputDefinitions: + artifacts: + dataset_content: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + outputDefinitions: + artifacts: + preprocess_the_dataset_1_1: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + comp-train-test-split-1-1: + executorLabel: exec-train-test-split-1-1 + inputDefinitions: + artifacts: + input_df: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + outputDefinitions: + artifacts: + train_test_split_1_1__0: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + train_test_split_1_1__1: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + train_test_split_1_1__2: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + train_test_split_1_1__3: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 +deploymentSpec: + executors: + exec-evaluate-1-1: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - evaluate_ + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.10.1'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' &&\ + \ python3 -m pip install --quiet --no-warn-script-location 'joblib' 'minio==7.1.14'\ + \ 'numpy' 'pandas' 'scikit-learn' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef evaluate(y_test: Dataset, predictions: Dataset) -> Dataset:\n\ + \ from sklearn.metrics import mean_squared_error, mean_absolute_error\n\ + \ import pandas as pd\n import numpy as np\n y_test_data = pd.read_csv(y_test)\n\ + \ predictions_data = pd.read_csv(predictions)\n\n mae = mean_absolute_error(y_test_data,\ + \ predictions_data)\n mse = mean_squared_error(y_test_data, predictions_data)\n\ + \ rmse = np.sqrt(mse)\n\n with open(metrics_output, 'w') as f:\n \ + \ f.write(f'MAE: {mae}\\n')\n f.write(f'MSE: {mse}\\n')\n \ + \ f.write(f'RMSE: {rmse}\\n')\n\n return metrics_output\n\n\nfrom\ + \ kfp.dsl.types.artifact_types import *\nimport typing\nfrom typing import\ + \ NamedTuple\nimport os\nimport shutil\nfrom tempfile import mkstemp\nfrom\ + \ pathlib import Path\ndef evaluate_(y_test: Input[Dataset], predictions:\ + \ Input[Dataset], evaluate_1_1: dsl.Output[Dataset]):\n paths = {}\n\ + \ unpaths = {}\n y_test = y_test.path\n predictions = predictions.path\n\ + \ f, metrics_output = mkstemp(); os.close(f)\n paths['metrics_output']\ + \ = Path(metrics_output)\n unpaths[Path(metrics_output)] = 0\n globals().update(paths)\n\ + \ final_output = evaluate(y_test=y_test, predictions=predictions)\n \ + \ shutil.move(final_output, evaluate_1_1.path)\n for p in unpaths:\ + \ shutil.rmtree(str(p), ignore_errors=True)\n" + image: python:3.9 + exec-load-dataset-from-gcs-1-1: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - load_dataset_from_gcs_ + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.10.1'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' &&\ + \ python3 -m pip install --quiet --no-warn-script-location 'joblib' 'minio==7.1.14'\ + \ 'numpy' 'pandas' 'scikit-learn' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef load_dataset_from_gcs(bucket_name: str, blob_name: str) -> Dataset:\n\ + \ import pandas as pd\n from minio import Minio\n from minio.error\ + \ import S3Error\n import io\n client = Minio('minio-service.default:9000',\n\ + \ 'minio',\n 'minio123',\n \ + \ secure=False)\n response = client.get_object(bucket_name,\ + \ blob_name)\n\n data = pd.read_csv(io.BytesIO(response.data), header=None,\ + \ delim_whitespace=True, comment=\"#\")\n data.to_csv(output_dataset,\ + \ header=True, index=False)\n\n return output_dataset\n\n\nfrom kfp.dsl.types.artifact_types\ + \ import *\nimport typing\nfrom typing import NamedTuple\nimport os\nimport\ + \ shutil\nfrom tempfile import mkstemp\nfrom pathlib import Path\ndef load_dataset_from_gcs_(bucket_name:\ + \ str, blob_name: str, load_dataset_from_gcs_1_1: dsl.Output[Dataset]):\n\ + \ paths = {}\n unpaths = {}\n f, output_dataset = mkstemp(); os.close(f)\n\ + \ paths['output_dataset'] = Path(output_dataset)\n unpaths[Path(output_dataset)]\ + \ = 0\n globals().update(paths)\n final_output = load_dataset_from_gcs(bucket_name=bucket_name,\ + \ blob_name=blob_name)\n shutil.move(final_output, load_dataset_from_gcs_1_1.path)\n\ + \ for p in unpaths: shutil.rmtree(str(p), ignore_errors=True)\n" + image: python:3.9 + exec-model-training-1-1: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - model_training_ + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.10.1'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' &&\ + \ python3 -m pip install --quiet --no-warn-script-location 'joblib' 'minio==7.1.14'\ + \ 'numpy' 'pandas' 'scikit-learn' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef model_training(\n X_train_input: Dataset,\n X_test_input:\ + \ Dataset,\n y_train_input: Dataset,\n) -> tuple[Dataset, Artifact]:\n\ + \ from sklearn.preprocessing import StandardScaler\n from sklearn.linear_model\ + \ import LinearRegression\n import joblib\n from minio import Minio\n\ + \ from minio.error import S3Error\n import io\n import pandas as\ + \ pd\n scaler = StandardScaler()\n\n X_train = pd.read_csv(X_train_input)\n\ + \ X_test = pd.read_csv(X_test_input)\n y_train = pd.read_csv(y_train_input)\n\ + \n X_train_scaled = scaler.fit_transform(X_train)\n X_test_scaled2=\ + \ pd.DataFrame(scaler.transform(X_test))\n X_test_scaled2.to_csv(X_test_scaled,\ + \ index=False) # Fixing typo here\n\n regression = LinearRegression()\n\ + \ regression.fit(X_train_scaled, y_train)\n\n joblib.dump(regression,\ + \ model_file)\n client = Minio('minio-service.default:9000',\n \ + \ 'minio',\n 'minio123',\n \ + \ secure=False)\n client.fput_object('boston-house-data', 'data/model.pkl',\ + \ str(model_file))\n return X_test_scaled, model_file\n\nmodel_training_1_1\ + \ = NamedTuple('model_training_1_1', (('model_training_1_1__0', Dataset),\ + \ ('model_training_1_1__1', Artifact)))\n\nfrom kfp.dsl.types.artifact_types\ + \ import *\nimport typing\nfrom typing import NamedTuple\nimport os\nimport\ + \ shutil\nfrom tempfile import mkstemp\nfrom pathlib import Path\ndef model_training_(X_train_input:\ + \ Input[Dataset], X_test_input: Input[Dataset], y_train_input: Input[Dataset],\ + \ model_training_1_1__0: dsl.Output[Dataset], model_training_1_1__1: dsl.Output[Artifact]):\n\ + \ paths = {}\n unpaths = {}\n X_train_input = X_train_input.path\n\ + \ X_test_input = X_test_input.path\n y_train_input = y_train_input.path\n\ + \ f, X_test_scaled = mkstemp(); os.close(f)\n paths['X_test_scaled']\ + \ = Path(X_test_scaled)\n unpaths[Path(X_test_scaled)] = 0\n f, model_file\ + \ = mkstemp(); os.close(f)\n paths['model_file'] = Path(model_file)\n\ + \ unpaths[Path(model_file)] = 0\n globals().update(paths)\n final_output\ + \ = model_training(X_train_input=X_train_input, X_test_input=X_test_input,\ + \ y_train_input=y_train_input)\n model_training_1_1 = (model_training_1_1__0,\ + \ model_training_1_1__1)\n for p, q in zip(final_output, model_training_1_1):\ + \ shutil.move(p, q.path)\n for p in unpaths: shutil.rmtree(str(p), ignore_errors=True)\n" + image: python:3.9 + exec-predict-1-1: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - predict_ + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.10.1'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' &&\ + \ python3 -m pip install --quiet --no-warn-script-location 'joblib' 'minio==7.1.14'\ + \ 'numpy' 'pandas' 'scikit-learn' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef predict(X_test: Dataset, trained_model: Artifact) -> Dataset:\n\ + \ import joblib\n import pandas as pd\n from minio import Minio\n\ + \ from minio.error import S3Error\n X_test_data = pd.read_csv(X_test)\n\ + \n client = Minio('minio-service.default:9000',\n 'minio',\n\ + \ 'minio123',\n secure=False)\n model_file\ + \ = 'model.pkl'\n client.fget_object('boston-house-data', 'data/model.pkl',\ + \ model_file)\n regression = joblib.load(model_file)\n\n predictions\ + \ = regression.predict(X_test_data)\n pd.DataFrame(predictions).to_csv(prediction,\ + \ index=False)\n return prediction\n\n\nfrom kfp.dsl.types.artifact_types\ + \ import *\nimport typing\nfrom typing import NamedTuple\nimport os\nimport\ + \ shutil\nfrom tempfile import mkstemp\nfrom pathlib import Path\ndef predict_(X_test:\ + \ Input[Dataset], trained_model: Input[Artifact], predict_1_1: dsl.Output[Dataset]):\n\ + \ paths = {}\n unpaths = {}\n X_test = X_test.path\n trained_model\ + \ = trained_model.path\n f, prediction = mkstemp(); os.close(f)\n \ + \ paths['prediction'] = Path(prediction)\n unpaths[Path(prediction)]\ + \ = 0\n globals().update(paths)\n final_output = predict(X_test=X_test,\ + \ trained_model=trained_model)\n shutil.move(final_output, predict_1_1.path)\n\ + \ for p in unpaths: shutil.rmtree(str(p), ignore_errors=True)\n" + image: python:3.9 + exec-preprocess-the-dataset-1-1: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - preprocess_the_dataset_ + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.10.1'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' &&\ + \ python3 -m pip install --quiet --no-warn-script-location 'joblib' 'minio==7.1.14'\ + \ 'numpy' 'pandas' 'scikit-learn' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef preprocess_the_dataset(dataset_content: Dataset) -> Dataset:\n\ + \ import pandas as pd\n data = pd.read_csv(dataset_content, header=0)\n\ + \ if data.isna().sum().any():\n raise ValueError(\"The data needs\ + \ preprocessing (remove missing values)\")\n\n data.to_csv(out_data,\ + \ index=False)\n return out_data\n\n\nfrom kfp.dsl.types.artifact_types\ + \ import *\nimport typing\nfrom typing import NamedTuple\nimport os\nimport\ + \ shutil\nfrom tempfile import mkstemp\nfrom pathlib import Path\ndef preprocess_the_dataset_(dataset_content:\ + \ Input[Dataset], preprocess_the_dataset_1_1: dsl.Output[Dataset]):\n \ + \ paths = {}\n unpaths = {}\n dataset_content = dataset_content.path\n\ + \ f, out_data = mkstemp(); os.close(f)\n paths['out_data'] = Path(out_data)\n\ + \ unpaths[Path(out_data)] = 0\n globals().update(paths)\n final_output\ + \ = preprocess_the_dataset(dataset_content=dataset_content)\n shutil.move(final_output,\ + \ preprocess_the_dataset_1_1.path)\n for p in unpaths: shutil.rmtree(str(p),\ + \ ignore_errors=True)\n" + image: python:3.9 + exec-train-test-split-1-1: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - train_test_split_ + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.10.1'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' &&\ + \ python3 -m pip install --quiet --no-warn-script-location 'joblib' 'minio==7.1.14'\ + \ 'numpy' 'pandas' 'scikit-learn' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef train_test_split(\n input_df: Dataset, \n) -> tuple[Dataset,\ + \ Dataset, Dataset, Dataset]:\n from sklearn.model_selection import train_test_split\n\ + \ import pandas as pd\n df = pd.read_csv(input_df)\n X = df.iloc[:,\ + \ :-1]\n y = df.iloc[:, -1]\n X_train, X_test, y_train, y_test = train_test_split(X,\ + \ y, test_size=0.3, random_state=42)\n\n X_train.to_csv(X_train_artifact,\ + \ index=False)\n X_test.to_csv(X_test_artifact, index=False)\n y_train.to_csv(y_train_artifact,\ + \ index=False)\n y_test.to_csv(y_test_artifact, index=False)\n\n return\ + \ (\n X_train_artifact,\n X_test_artifact,\n y_train_artifact,\n\ + \ y_test_artifact,\n )\n\ntrain_test_split_1_1 = NamedTuple('train_test_split_1_1',\ + \ (('train_test_split_1_1__0', Dataset), ('train_test_split_1_1__1', Dataset),\ + \ ('train_test_split_1_1__2', Dataset), ('train_test_split_1_1__3', Dataset)))\n\ + \nfrom kfp.dsl.types.artifact_types import *\nimport typing\nfrom typing\ + \ import NamedTuple\nimport os\nimport shutil\nfrom tempfile import mkstemp\n\ + from pathlib import Path\ndef train_test_split_(input_df: Input[Dataset],\ + \ train_test_split_1_1__0: dsl.Output[Dataset], train_test_split_1_1__1:\ + \ dsl.Output[Dataset], train_test_split_1_1__2: dsl.Output[Dataset], train_test_split_1_1__3:\ + \ dsl.Output[Dataset]):\n paths = {}\n unpaths = {}\n input_df\ + \ = input_df.path\n f, X_train_artifact = mkstemp(); os.close(f)\n \ + \ paths['X_train_artifact'] = Path(X_train_artifact)\n unpaths[Path(X_train_artifact)]\ + \ = 0\n f, X_test_artifact = mkstemp(); os.close(f)\n paths['X_test_artifact']\ + \ = Path(X_test_artifact)\n unpaths[Path(X_test_artifact)] = 0\n f,\ + \ y_train_artifact = mkstemp(); os.close(f)\n paths['y_train_artifact']\ + \ = Path(y_train_artifact)\n unpaths[Path(y_train_artifact)] = 0\n \ + \ f, y_test_artifact = mkstemp(); os.close(f)\n paths['y_test_artifact']\ + \ = Path(y_test_artifact)\n unpaths[Path(y_test_artifact)] = 0\n globals().update(paths)\n\ + \ final_output = train_test_split(input_df=input_df)\n train_test_split_1_1\ + \ = (train_test_split_1_1__0, train_test_split_1_1__1, train_test_split_1_1__2,\ + \ train_test_split_1_1__3)\n for p, q in zip(final_output, train_test_split_1_1):\ + \ shutil.move(p, q.path)\n for p in unpaths: shutil.rmtree(str(p), ignore_errors=True)\n" + image: python:3.9 +pipelineInfo: + description: DESCRIPTION + name: myname +root: + dag: + outputs: + artifacts: + Output: + artifactSelectors: + - outputArtifactKey: evaluate_1_1 + producerSubtask: evaluate-1-1 + tasks: + evaluate-1-1: + cachingOptions: + enableCache: true + componentRef: + name: comp-evaluate-1-1 + dependentTasks: + - predict-1-1 + - train-test-split-1-1 + inputs: + artifacts: + predictions: + taskOutputArtifact: + outputArtifactKey: predict_1_1 + producerTask: predict-1-1 + y_test: + taskOutputArtifact: + outputArtifactKey: train_test_split_1_1__3 + producerTask: train-test-split-1-1 + taskInfo: + name: evaluate-1-1 + load-dataset-from-gcs-1-1: + cachingOptions: + enableCache: true + componentRef: + name: comp-load-dataset-from-gcs-1-1 + inputs: + parameters: + blob_name: + runtimeValue: + constant: data/housing.csv + bucket_name: + runtimeValue: + constant: boston-house-data + taskInfo: + name: load-dataset-from-gcs-1-1 + model-training-1-1: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-training-1-1 + dependentTasks: + - train-test-split-1-1 + inputs: + artifacts: + X_test_input: + taskOutputArtifact: + outputArtifactKey: train_test_split_1_1__1 + producerTask: train-test-split-1-1 + X_train_input: + taskOutputArtifact: + outputArtifactKey: train_test_split_1_1__0 + producerTask: train-test-split-1-1 + y_train_input: + taskOutputArtifact: + outputArtifactKey: train_test_split_1_1__2 + producerTask: train-test-split-1-1 + taskInfo: + name: model-training-1-1 + predict-1-1: + cachingOptions: + enableCache: true + componentRef: + name: comp-predict-1-1 + dependentTasks: + - model-training-1-1 + inputs: + artifacts: + X_test: + taskOutputArtifact: + outputArtifactKey: model_training_1_1__0 + producerTask: model-training-1-1 + trained_model: + taskOutputArtifact: + outputArtifactKey: model_training_1_1__1 + producerTask: model-training-1-1 + taskInfo: + name: predict-1-1 + preprocess-the-dataset-1-1: + cachingOptions: + enableCache: true + componentRef: + name: comp-preprocess-the-dataset-1-1 + dependentTasks: + - load-dataset-from-gcs-1-1 + inputs: + artifacts: + dataset_content: + taskOutputArtifact: + outputArtifactKey: load_dataset_from_gcs_1_1 + producerTask: load-dataset-from-gcs-1-1 + taskInfo: + name: preprocess-the-dataset-1-1 + train-test-split-1-1: + cachingOptions: + enableCache: true + componentRef: + name: comp-train-test-split-1-1 + dependentTasks: + - preprocess-the-dataset-1-1 + inputs: + artifacts: + input_df: + taskOutputArtifact: + outputArtifactKey: preprocess_the_dataset_1_1 + producerTask: preprocess-the-dataset-1-1 + taskInfo: + name: train-test-split-1-1 + outputDefinitions: + artifacts: + Output: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 +schemaVersion: 2.1.0 +sdkVersion: kfp-2.10.1 diff --git a/example/kfp_example.py b/example/kfp_example.py new file mode 100644 index 00000000..a13f2a39 --- /dev/null +++ b/example/kfp_example.py @@ -0,0 +1,178 @@ +# Copyright Shray15, 2024- +# https://github.com/Shray15/Boston_House_Pricing/blob/53837c94643531f93c0ff00b40a7fbef9793f17d/boston-house-kfp.py +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from kfp import dsl, compiler +from kfp.dsl import Input, Output, Artifact, Dataset +from pathlib import Path + +EXPERIMENT_NAME = 'Boston-house-pred' + +@dsl.component(base_image='python:3.9', packages_to_install=["pandas", 'minio==7.1.14']) +def load_dataset_from_gcs(bucket_name: str, blob_name: str, output_dataset: Output[Dataset]): + import pandas as pd + from minio import Minio + from minio.error import S3Error + import io + client = Minio('minio-service.default:9000', + 'minio', + 'minio123', + secure=False) + response = client.get_object(bucket_name, blob_name) + + data = pd.read_csv(io.BytesIO(response.data), header=None, delim_whitespace=True, comment="#") + data.to_csv(output_dataset.path, header=True, index=False) + +@dsl.component(base_image='python:3.9', packages_to_install=["pandas"]) +def preprocess_the_dataset(dataset_content: Input[Dataset], out_data: Output[Dataset]): + import pandas as pd + data = pd.read_csv(dataset_content.path, header=0) + if data.isna().sum().any(): + raise ValueError("The data needs preprocessing (remove missing values)") + + data.to_csv(out_data.path, index=False) + +@dsl.component(base_image='python:3.9', packages_to_install=["scikit-learn", "pandas"]) +def train_test_split(input_df: Input[Dataset], + X_train_artifact: Output[Dataset], + X_test_artifact: Output[Dataset], + y_train_artifact: Output[Dataset], + y_test_artifact: Output[Dataset]): + from sklearn.model_selection import train_test_split + import pandas as pd + df = pd.read_csv(input_df.path) + X = df.iloc[:, :-1] + y = df.iloc[:, -1] + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42) + + X_train.to_csv(X_train_artifact.path, index=False) + X_test.to_csv(X_test_artifact.path, index=False) + y_train.to_csv(y_train_artifact.path, index=False) + y_test.to_csv(y_test_artifact.path, index=False) + +@dsl.component(base_image='python:3.9', packages_to_install=['numpy', 'scikit-learn', 'joblib', "pandas", 'minio==7.1.14']) +def model_training(X_train_input: Input[Dataset], + X_test_input: Input[Dataset], + y_train_input: Input[Dataset], + X_test_scaled: Output[Dataset], + model_output: Output[Artifact]): + from sklearn.preprocessing import StandardScaler + from sklearn.linear_model import LinearRegression + import joblib + from minio import Minio + from minio.error import S3Error + import io + import pandas as pd + scaler = StandardScaler() + + X_train = pd.read_csv(X_train_input.path) + X_test = pd.read_csv(X_test_input.path) + y_train = pd.read_csv(y_train_input.path) + + X_train_scaled = scaler.fit_transform(X_train) + X_test_scaled2= pd.DataFrame(scaler.transform(X_test)) + X_test_scaled2.to_csv(X_test_scaled.path, index=False) # Fixing typo here + + regression = LinearRegression() + regression.fit(X_train_scaled, y_train) + + model_file = '/trained_model.joblib' + joblib.dump(regression, model_file) + # Upload the model file to Google Cloud Storage + client = Minio('minio-service.default:9000', + 'minio', + 'minio123', + secure=False) + client.fput_object('boston-house-data', 'data/model.pkl', model_file) + + model_output.file = model_file + ### + # Upload the model file to Google Cloud Storage + model_output.file = model_file + +@dsl.component(base_image='python:3.9', packages_to_install=["pandas", "joblib", 'minio==7.1.14',"scikit-learn"]) +def predict(X_test: Input[Dataset], trained_model: Input[Artifact], prediction: Output[Dataset]): + import joblib + import pandas as pd + from minio import Minio + from minio.error import S3Error + import sklearn + X_test_data = pd.read_csv(X_test.path) + + client = Minio('minio-service.default:9000', + 'minio', + 'minio123', + secure=False) + model_file = 'model.pkl' + client.fget_object('boston-house-data', 'data/model.pkl', model_file) + regression = joblib.load(model_file) + + predictions = regression.predict(X_test_data) + pd.DataFrame(predictions).to_csv(prediction.path, index=False) + +@dsl.component(base_image='python:3.9', packages_to_install=["pandas", "scikit-learn", "numpy"]) +def evaluate(y_test: Input[Dataset], predictions: Input[Dataset], metrics_output: Output[Artifact]): + from sklearn.metrics import mean_squared_error, mean_absolute_error + import pandas as pd + import numpy as np + y_test_data = pd.read_csv(y_test.path) + predictions_data = pd.read_csv(predictions.path) + + mae = mean_absolute_error(y_test_data, predictions_data) + mse = mean_squared_error(y_test_data, predictions_data) + rmse = np.sqrt(mse) + + with open(metrics_output.path, 'w') as f: + f.write(f'MAE: {mae}\n') + f.write(f'MSE: {mse}\n') + f.write(f'RMSE: {rmse}\n') + +@dsl.pipeline( + name="Boston-house-training-prediction", + description='A pipeline to prepare dataset, split into train and test sets, train a model, and predict', + pipeline_root='gs://boston-house-pred' +) +def pipeline(): + read_data = load_dataset_from_gcs(bucket_name="boston-house-data", blob_name="data/housing.csv") + preprocess_data = preprocess_the_dataset(dataset_content=read_data.outputs['output_dataset']) + split = train_test_split(input_df=preprocess_data.outputs['out_data']) + trained_model = model_training(X_train_input=split.outputs['X_train_artifact'], + X_test_input=split.outputs['X_test_artifact'], + y_train_input=split.outputs['y_train_artifact'] + ) # Fixed typo here + predicted_value = predict(X_test=trained_model.outputs['X_test_scaled'], trained_model=trained_model.outputs['model_output']) + evaluate(y_test= split.outputs['y_test_artifact'], predictions=predicted_value.outputs['prediction']) # Fixed typo here + + + +pipeline_file = 'components_pipeline.yaml' +compiler.Compiler().compile(pipeline_func=pipeline, package_path=pipeline_file) + +def upload(): + from minio import Minio + from minio.error import S3Error + # You need to port-forward manually before running + client = Minio('localhost:9000', + 'minio', + 'minio123', + secure=False) + bucket_name = 'boston-house-data' + found = client.bucket_exists(bucket_name) + if not found: + client.make_bucket(bucket_name) + print("Created bucket", bucket_name) + else: + print("Bucket", bucket_name, "already exists") + client.fput_object(bucket_name, 'data/housing.csv', Path(__file__).parent / 'kfp_example_housing.csv') +#upload() diff --git a/example/kfp_example_housing.csv b/example/kfp_example_housing.csv new file mode 100644 index 00000000..1521927e --- /dev/null +++ b/example/kfp_example_housing.csv @@ -0,0 +1,521 @@ +# Copyright Shray15, 2024- +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# https://raw.githubusercontent.com/Shray15/Boston_House_Pricing/53837c94643531f93c0ff00b40a7fbef9793f17d/housing.csv + 0.00632 18.00 2.310 0 0.5380 6.5750 65.20 4.0900 1 296.0 15.30 396.90 4.98 24.00 + 0.02731 0.00 7.070 0 0.4690 6.4210 78.90 4.9671 2 242.0 17.80 396.90 9.14 21.60 + 0.02729 0.00 7.070 0 0.4690 7.1850 61.10 4.9671 2 242.0 17.80 392.83 4.03 34.70 + 0.03237 0.00 2.180 0 0.4580 6.9980 45.80 6.0622 3 222.0 18.70 394.63 2.94 33.40 + 0.06905 0.00 2.180 0 0.4580 7.1470 54.20 6.0622 3 222.0 18.70 396.90 5.33 36.20 + 0.02985 0.00 2.180 0 0.4580 6.4300 58.70 6.0622 3 222.0 18.70 394.12 5.21 28.70 + 0.08829 12.50 7.870 0 0.5240 6.0120 66.60 5.5605 5 311.0 15.20 395.60 12.43 22.90 + 0.14455 12.50 7.870 0 0.5240 6.1720 96.10 5.9505 5 311.0 15.20 396.90 19.15 27.10 + 0.21124 12.50 7.870 0 0.5240 5.6310 100.00 6.0821 5 311.0 15.20 386.63 29.93 16.50 + 0.17004 12.50 7.870 0 0.5240 6.0040 85.90 6.5921 5 311.0 15.20 386.71 17.10 18.90 + 0.22489 12.50 7.870 0 0.5240 6.3770 94.30 6.3467 5 311.0 15.20 392.52 20.45 15.00 + 0.11747 12.50 7.870 0 0.5240 6.0090 82.90 6.2267 5 311.0 15.20 396.90 13.27 18.90 + 0.09378 12.50 7.870 0 0.5240 5.8890 39.00 5.4509 5 311.0 15.20 390.50 15.71 21.70 + 0.62976 0.00 8.140 0 0.5380 5.9490 61.80 4.7075 4 307.0 21.00 396.90 8.26 20.40 + 0.63796 0.00 8.140 0 0.5380 6.0960 84.50 4.4619 4 307.0 21.00 380.02 10.26 18.20 + 0.62739 0.00 8.140 0 0.5380 5.8340 56.50 4.4986 4 307.0 21.00 395.62 8.47 19.90 + 1.05393 0.00 8.140 0 0.5380 5.9350 29.30 4.4986 4 307.0 21.00 386.85 6.58 23.10 + 0.78420 0.00 8.140 0 0.5380 5.9900 81.70 4.2579 4 307.0 21.00 386.75 14.67 17.50 + 0.80271 0.00 8.140 0 0.5380 5.4560 36.60 3.7965 4 307.0 21.00 288.99 11.69 20.20 + 0.72580 0.00 8.140 0 0.5380 5.7270 69.50 3.7965 4 307.0 21.00 390.95 11.28 18.20 + 1.25179 0.00 8.140 0 0.5380 5.5700 98.10 3.7979 4 307.0 21.00 376.57 21.02 13.60 + 0.85204 0.00 8.140 0 0.5380 5.9650 89.20 4.0123 4 307.0 21.00 392.53 13.83 19.60 + 1.23247 0.00 8.140 0 0.5380 6.1420 91.70 3.9769 4 307.0 21.00 396.90 18.72 15.20 + 0.98843 0.00 8.140 0 0.5380 5.8130 100.00 4.0952 4 307.0 21.00 394.54 19.88 14.50 + 0.75026 0.00 8.140 0 0.5380 5.9240 94.10 4.3996 4 307.0 21.00 394.33 16.30 15.60 + 0.84054 0.00 8.140 0 0.5380 5.5990 85.70 4.4546 4 307.0 21.00 303.42 16.51 13.90 + 0.67191 0.00 8.140 0 0.5380 5.8130 90.30 4.6820 4 307.0 21.00 376.88 14.81 16.60 + 0.95577 0.00 8.140 0 0.5380 6.0470 88.80 4.4534 4 307.0 21.00 306.38 17.28 14.80 + 0.77299 0.00 8.140 0 0.5380 6.4950 94.40 4.4547 4 307.0 21.00 387.94 12.80 18.40 + 1.00245 0.00 8.140 0 0.5380 6.6740 87.30 4.2390 4 307.0 21.00 380.23 11.98 21.00 + 1.13081 0.00 8.140 0 0.5380 5.7130 94.10 4.2330 4 307.0 21.00 360.17 22.60 12.70 + 1.35472 0.00 8.140 0 0.5380 6.0720 100.00 4.1750 4 307.0 21.00 376.73 13.04 14.50 + 1.38799 0.00 8.140 0 0.5380 5.9500 82.00 3.9900 4 307.0 21.00 232.60 27.71 13.20 + 1.15172 0.00 8.140 0 0.5380 5.7010 95.00 3.7872 4 307.0 21.00 358.77 18.35 13.10 + 1.61282 0.00 8.140 0 0.5380 6.0960 96.90 3.7598 4 307.0 21.00 248.31 20.34 13.50 + 0.06417 0.00 5.960 0 0.4990 5.9330 68.20 3.3603 5 279.0 19.20 396.90 9.68 18.90 + 0.09744 0.00 5.960 0 0.4990 5.8410 61.40 3.3779 5 279.0 19.20 377.56 11.41 20.00 + 0.08014 0.00 5.960 0 0.4990 5.8500 41.50 3.9342 5 279.0 19.20 396.90 8.77 21.00 + 0.17505 0.00 5.960 0 0.4990 5.9660 30.20 3.8473 5 279.0 19.20 393.43 10.13 24.70 + 0.02763 75.00 2.950 0 0.4280 6.5950 21.80 5.4011 3 252.0 18.30 395.63 4.32 30.80 + 0.03359 75.00 2.950 0 0.4280 7.0240 15.80 5.4011 3 252.0 18.30 395.62 1.98 34.90 + 0.12744 0.00 6.910 0 0.4480 6.7700 2.90 5.7209 3 233.0 17.90 385.41 4.84 26.60 + 0.14150 0.00 6.910 0 0.4480 6.1690 6.60 5.7209 3 233.0 17.90 383.37 5.81 25.30 + 0.15936 0.00 6.910 0 0.4480 6.2110 6.50 5.7209 3 233.0 17.90 394.46 7.44 24.70 + 0.12269 0.00 6.910 0 0.4480 6.0690 40.00 5.7209 3 233.0 17.90 389.39 9.55 21.20 + 0.17142 0.00 6.910 0 0.4480 5.6820 33.80 5.1004 3 233.0 17.90 396.90 10.21 19.30 + 0.18836 0.00 6.910 0 0.4480 5.7860 33.30 5.1004 3 233.0 17.90 396.90 14.15 20.00 + 0.22927 0.00 6.910 0 0.4480 6.0300 85.50 5.6894 3 233.0 17.90 392.74 18.80 16.60 + 0.25387 0.00 6.910 0 0.4480 5.3990 95.30 5.8700 3 233.0 17.90 396.90 30.81 14.40 + 0.21977 0.00 6.910 0 0.4480 5.6020 62.00 6.0877 3 233.0 17.90 396.90 16.20 19.40 + 0.08873 21.00 5.640 0 0.4390 5.9630 45.70 6.8147 4 243.0 16.80 395.56 13.45 19.70 + 0.04337 21.00 5.640 0 0.4390 6.1150 63.00 6.8147 4 243.0 16.80 393.97 9.43 20.50 + 0.05360 21.00 5.640 0 0.4390 6.5110 21.10 6.8147 4 243.0 16.80 396.90 5.28 25.00 + 0.04981 21.00 5.640 0 0.4390 5.9980 21.40 6.8147 4 243.0 16.80 396.90 8.43 23.40 + 0.01360 75.00 4.000 0 0.4100 5.8880 47.60 7.3197 3 469.0 21.10 396.90 14.80 18.90 + 0.01311 90.00 1.220 0 0.4030 7.2490 21.90 8.6966 5 226.0 17.90 395.93 4.81 35.40 + 0.02055 85.00 0.740 0 0.4100 6.3830 35.70 9.1876 2 313.0 17.30 396.90 5.77 24.70 + 0.01432 100.00 1.320 0 0.4110 6.8160 40.50 8.3248 5 256.0 15.10 392.90 3.95 31.60 + 0.15445 25.00 5.130 0 0.4530 6.1450 29.20 7.8148 8 284.0 19.70 390.68 6.86 23.30 + 0.10328 25.00 5.130 0 0.4530 5.9270 47.20 6.9320 8 284.0 19.70 396.90 9.22 19.60 + 0.14932 25.00 5.130 0 0.4530 5.7410 66.20 7.2254 8 284.0 19.70 395.11 13.15 18.70 + 0.17171 25.00 5.130 0 0.4530 5.9660 93.40 6.8185 8 284.0 19.70 378.08 14.44 16.00 + 0.11027 25.00 5.130 0 0.4530 6.4560 67.80 7.2255 8 284.0 19.70 396.90 6.73 22.20 + 0.12650 25.00 5.130 0 0.4530 6.7620 43.40 7.9809 8 284.0 19.70 395.58 9.50 25.00 + 0.01951 17.50 1.380 0 0.4161 7.1040 59.50 9.2229 3 216.0 18.60 393.24 8.05 33.00 + 0.03584 80.00 3.370 0 0.3980 6.2900 17.80 6.6115 4 337.0 16.10 396.90 4.67 23.50 + 0.04379 80.00 3.370 0 0.3980 5.7870 31.10 6.6115 4 337.0 16.10 396.90 10.24 19.40 + 0.05789 12.50 6.070 0 0.4090 5.8780 21.40 6.4980 4 345.0 18.90 396.21 8.10 22.00 + 0.13554 12.50 6.070 0 0.4090 5.5940 36.80 6.4980 4 345.0 18.90 396.90 13.09 17.40 + 0.12816 12.50 6.070 0 0.4090 5.8850 33.00 6.4980 4 345.0 18.90 396.90 8.79 20.90 + 0.08826 0.00 10.810 0 0.4130 6.4170 6.60 5.2873 4 305.0 19.20 383.73 6.72 24.20 + 0.15876 0.00 10.810 0 0.4130 5.9610 17.50 5.2873 4 305.0 19.20 376.94 9.88 21.70 + 0.09164 0.00 10.810 0 0.4130 6.0650 7.80 5.2873 4 305.0 19.20 390.91 5.52 22.80 + 0.19539 0.00 10.810 0 0.4130 6.2450 6.20 5.2873 4 305.0 19.20 377.17 7.54 23.40 + 0.07896 0.00 12.830 0 0.4370 6.2730 6.00 4.2515 5 398.0 18.70 394.92 6.78 24.10 + 0.09512 0.00 12.830 0 0.4370 6.2860 45.00 4.5026 5 398.0 18.70 383.23 8.94 21.40 + 0.10153 0.00 12.830 0 0.4370 6.2790 74.50 4.0522 5 398.0 18.70 373.66 11.97 20.00 + 0.08707 0.00 12.830 0 0.4370 6.1400 45.80 4.0905 5 398.0 18.70 386.96 10.27 20.80 + 0.05646 0.00 12.830 0 0.4370 6.2320 53.70 5.0141 5 398.0 18.70 386.40 12.34 21.20 + 0.08387 0.00 12.830 0 0.4370 5.8740 36.60 4.5026 5 398.0 18.70 396.06 9.10 20.30 + 0.04113 25.00 4.860 0 0.4260 6.7270 33.50 5.4007 4 281.0 19.00 396.90 5.29 28.00 + 0.04462 25.00 4.860 0 0.4260 6.6190 70.40 5.4007 4 281.0 19.00 395.63 7.22 23.90 + 0.03659 25.00 4.860 0 0.4260 6.3020 32.20 5.4007 4 281.0 19.00 396.90 6.72 24.80 + 0.03551 25.00 4.860 0 0.4260 6.1670 46.70 5.4007 4 281.0 19.00 390.64 7.51 22.90 + 0.05059 0.00 4.490 0 0.4490 6.3890 48.00 4.7794 3 247.0 18.50 396.90 9.62 23.90 + 0.05735 0.00 4.490 0 0.4490 6.6300 56.10 4.4377 3 247.0 18.50 392.30 6.53 26.60 + 0.05188 0.00 4.490 0 0.4490 6.0150 45.10 4.4272 3 247.0 18.50 395.99 12.86 22.50 + 0.07151 0.00 4.490 0 0.4490 6.1210 56.80 3.7476 3 247.0 18.50 395.15 8.44 22.20 + 0.05660 0.00 3.410 0 0.4890 7.0070 86.30 3.4217 2 270.0 17.80 396.90 5.50 23.60 + 0.05302 0.00 3.410 0 0.4890 7.0790 63.10 3.4145 2 270.0 17.80 396.06 5.70 28.70 + 0.04684 0.00 3.410 0 0.4890 6.4170 66.10 3.0923 2 270.0 17.80 392.18 8.81 22.60 + 0.03932 0.00 3.410 0 0.4890 6.4050 73.90 3.0921 2 270.0 17.80 393.55 8.20 22.00 + 0.04203 28.00 15.040 0 0.4640 6.4420 53.60 3.6659 4 270.0 18.20 395.01 8.16 22.90 + 0.02875 28.00 15.040 0 0.4640 6.2110 28.90 3.6659 4 270.0 18.20 396.33 6.21 25.00 + 0.04294 28.00 15.040 0 0.4640 6.2490 77.30 3.6150 4 270.0 18.20 396.90 10.59 20.60 + 0.12204 0.00 2.890 0 0.4450 6.6250 57.80 3.4952 2 276.0 18.00 357.98 6.65 28.40 + 0.11504 0.00 2.890 0 0.4450 6.1630 69.60 3.4952 2 276.0 18.00 391.83 11.34 21.40 + 0.12083 0.00 2.890 0 0.4450 8.0690 76.00 3.4952 2 276.0 18.00 396.90 4.21 38.70 + 0.08187 0.00 2.890 0 0.4450 7.8200 36.90 3.4952 2 276.0 18.00 393.53 3.57 43.80 + 0.06860 0.00 2.890 0 0.4450 7.4160 62.50 3.4952 2 276.0 18.00 396.90 6.19 33.20 + 0.14866 0.00 8.560 0 0.5200 6.7270 79.90 2.7778 5 384.0 20.90 394.76 9.42 27.50 + 0.11432 0.00 8.560 0 0.5200 6.7810 71.30 2.8561 5 384.0 20.90 395.58 7.67 26.50 + 0.22876 0.00 8.560 0 0.5200 6.4050 85.40 2.7147 5 384.0 20.90 70.80 10.63 18.60 + 0.21161 0.00 8.560 0 0.5200 6.1370 87.40 2.7147 5 384.0 20.90 394.47 13.44 19.30 + 0.13960 0.00 8.560 0 0.5200 6.1670 90.00 2.4210 5 384.0 20.90 392.69 12.33 20.10 + 0.13262 0.00 8.560 0 0.5200 5.8510 96.70 2.1069 5 384.0 20.90 394.05 16.47 19.50 + 0.17120 0.00 8.560 0 0.5200 5.8360 91.90 2.2110 5 384.0 20.90 395.67 18.66 19.50 + 0.13117 0.00 8.560 0 0.5200 6.1270 85.20 2.1224 5 384.0 20.90 387.69 14.09 20.40 + 0.12802 0.00 8.560 0 0.5200 6.4740 97.10 2.4329 5 384.0 20.90 395.24 12.27 19.80 + 0.26363 0.00 8.560 0 0.5200 6.2290 91.20 2.5451 5 384.0 20.90 391.23 15.55 19.40 + 0.10793 0.00 8.560 0 0.5200 6.1950 54.40 2.7778 5 384.0 20.90 393.49 13.00 21.70 + 0.10084 0.00 10.010 0 0.5470 6.7150 81.60 2.6775 6 432.0 17.80 395.59 10.16 22.80 + 0.12329 0.00 10.010 0 0.5470 5.9130 92.90 2.3534 6 432.0 17.80 394.95 16.21 18.80 + 0.22212 0.00 10.010 0 0.5470 6.0920 95.40 2.5480 6 432.0 17.80 396.90 17.09 18.70 + 0.14231 0.00 10.010 0 0.5470 6.2540 84.20 2.2565 6 432.0 17.80 388.74 10.45 18.50 + 0.17134 0.00 10.010 0 0.5470 5.9280 88.20 2.4631 6 432.0 17.80 344.91 15.76 18.30 + 0.13158 0.00 10.010 0 0.5470 6.1760 72.50 2.7301 6 432.0 17.80 393.30 12.04 21.20 + 0.15098 0.00 10.010 0 0.5470 6.0210 82.60 2.7474 6 432.0 17.80 394.51 10.30 19.20 + 0.13058 0.00 10.010 0 0.5470 5.8720 73.10 2.4775 6 432.0 17.80 338.63 15.37 20.40 + 0.14476 0.00 10.010 0 0.5470 5.7310 65.20 2.7592 6 432.0 17.80 391.50 13.61 19.30 + 0.06899 0.00 25.650 0 0.5810 5.8700 69.70 2.2577 2 188.0 19.10 389.15 14.37 22.00 + 0.07165 0.00 25.650 0 0.5810 6.0040 84.10 2.1974 2 188.0 19.10 377.67 14.27 20.30 + 0.09299 0.00 25.650 0 0.5810 5.9610 92.90 2.0869 2 188.0 19.10 378.09 17.93 20.50 + 0.15038 0.00 25.650 0 0.5810 5.8560 97.00 1.9444 2 188.0 19.10 370.31 25.41 17.30 + 0.09849 0.00 25.650 0 0.5810 5.8790 95.80 2.0063 2 188.0 19.10 379.38 17.58 18.80 + 0.16902 0.00 25.650 0 0.5810 5.9860 88.40 1.9929 2 188.0 19.10 385.02 14.81 21.40 + 0.38735 0.00 25.650 0 0.5810 5.6130 95.60 1.7572 2 188.0 19.10 359.29 27.26 15.70 + 0.25915 0.00 21.890 0 0.6240 5.6930 96.00 1.7883 4 437.0 21.20 392.11 17.19 16.20 + 0.32543 0.00 21.890 0 0.6240 6.4310 98.80 1.8125 4 437.0 21.20 396.90 15.39 18.00 + 0.88125 0.00 21.890 0 0.6240 5.6370 94.70 1.9799 4 437.0 21.20 396.90 18.34 14.30 + 0.34006 0.00 21.890 0 0.6240 6.4580 98.90 2.1185 4 437.0 21.20 395.04 12.60 19.20 + 1.19294 0.00 21.890 0 0.6240 6.3260 97.70 2.2710 4 437.0 21.20 396.90 12.26 19.60 + 0.59005 0.00 21.890 0 0.6240 6.3720 97.90 2.3274 4 437.0 21.20 385.76 11.12 23.00 + 0.32982 0.00 21.890 0 0.6240 5.8220 95.40 2.4699 4 437.0 21.20 388.69 15.03 18.40 + 0.97617 0.00 21.890 0 0.6240 5.7570 98.40 2.3460 4 437.0 21.20 262.76 17.31 15.60 + 0.55778 0.00 21.890 0 0.6240 6.3350 98.20 2.1107 4 437.0 21.20 394.67 16.96 18.10 + 0.32264 0.00 21.890 0 0.6240 5.9420 93.50 1.9669 4 437.0 21.20 378.25 16.90 17.40 + 0.35233 0.00 21.890 0 0.6240 6.4540 98.40 1.8498 4 437.0 21.20 394.08 14.59 17.10 + 0.24980 0.00 21.890 0 0.6240 5.8570 98.20 1.6686 4 437.0 21.20 392.04 21.32 13.30 + 0.54452 0.00 21.890 0 0.6240 6.1510 97.90 1.6687 4 437.0 21.20 396.90 18.46 17.80 + 0.29090 0.00 21.890 0 0.6240 6.1740 93.60 1.6119 4 437.0 21.20 388.08 24.16 14.00 + 1.62864 0.00 21.890 0 0.6240 5.0190 100.00 1.4394 4 437.0 21.20 396.90 34.41 14.40 + 3.32105 0.00 19.580 1 0.8710 5.4030 100.00 1.3216 5 403.0 14.70 396.90 26.82 13.40 + 4.09740 0.00 19.580 0 0.8710 5.4680 100.00 1.4118 5 403.0 14.70 396.90 26.42 15.60 + 2.77974 0.00 19.580 0 0.8710 4.9030 97.80 1.3459 5 403.0 14.70 396.90 29.29 11.80 + 2.37934 0.00 19.580 0 0.8710 6.1300 100.00 1.4191 5 403.0 14.70 172.91 27.80 13.80 + 2.15505 0.00 19.580 0 0.8710 5.6280 100.00 1.5166 5 403.0 14.70 169.27 16.65 15.60 + 2.36862 0.00 19.580 0 0.8710 4.9260 95.70 1.4608 5 403.0 14.70 391.71 29.53 14.60 + 2.33099 0.00 19.580 0 0.8710 5.1860 93.80 1.5296 5 403.0 14.70 356.99 28.32 17.80 + 2.73397 0.00 19.580 0 0.8710 5.5970 94.90 1.5257 5 403.0 14.70 351.85 21.45 15.40 + 1.65660 0.00 19.580 0 0.8710 6.1220 97.30 1.6180 5 403.0 14.70 372.80 14.10 21.50 + 1.49632 0.00 19.580 0 0.8710 5.4040 100.00 1.5916 5 403.0 14.70 341.60 13.28 19.60 + 1.12658 0.00 19.580 1 0.8710 5.0120 88.00 1.6102 5 403.0 14.70 343.28 12.12 15.30 + 2.14918 0.00 19.580 0 0.8710 5.7090 98.50 1.6232 5 403.0 14.70 261.95 15.79 19.40 + 1.41385 0.00 19.580 1 0.8710 6.1290 96.00 1.7494 5 403.0 14.70 321.02 15.12 17.00 + 3.53501 0.00 19.580 1 0.8710 6.1520 82.60 1.7455 5 403.0 14.70 88.01 15.02 15.60 + 2.44668 0.00 19.580 0 0.8710 5.2720 94.00 1.7364 5 403.0 14.70 88.63 16.14 13.10 + 1.22358 0.00 19.580 0 0.6050 6.9430 97.40 1.8773 5 403.0 14.70 363.43 4.59 41.30 + 1.34284 0.00 19.580 0 0.6050 6.0660 100.00 1.7573 5 403.0 14.70 353.89 6.43 24.30 + 1.42502 0.00 19.580 0 0.8710 6.5100 100.00 1.7659 5 403.0 14.70 364.31 7.39 23.30 + 1.27346 0.00 19.580 1 0.6050 6.2500 92.60 1.7984 5 403.0 14.70 338.92 5.50 27.00 + 1.46336 0.00 19.580 0 0.6050 7.4890 90.80 1.9709 5 403.0 14.70 374.43 1.73 50.00 + 1.83377 0.00 19.580 1 0.6050 7.8020 98.20 2.0407 5 403.0 14.70 389.61 1.92 50.00 + 1.51902 0.00 19.580 1 0.6050 8.3750 93.90 2.1620 5 403.0 14.70 388.45 3.32 50.00 + 2.24236 0.00 19.580 0 0.6050 5.8540 91.80 2.4220 5 403.0 14.70 395.11 11.64 22.70 + 2.92400 0.00 19.580 0 0.6050 6.1010 93.00 2.2834 5 403.0 14.70 240.16 9.81 25.00 + 2.01019 0.00 19.580 0 0.6050 7.9290 96.20 2.0459 5 403.0 14.70 369.30 3.70 50.00 + 1.80028 0.00 19.580 0 0.6050 5.8770 79.20 2.4259 5 403.0 14.70 227.61 12.14 23.80 + 2.30040 0.00 19.580 0 0.6050 6.3190 96.10 2.1000 5 403.0 14.70 297.09 11.10 23.80 + 2.44953 0.00 19.580 0 0.6050 6.4020 95.20 2.2625 5 403.0 14.70 330.04 11.32 22.30 + 1.20742 0.00 19.580 0 0.6050 5.8750 94.60 2.4259 5 403.0 14.70 292.29 14.43 17.40 + 2.31390 0.00 19.580 0 0.6050 5.8800 97.30 2.3887 5 403.0 14.70 348.13 12.03 19.10 + 0.13914 0.00 4.050 0 0.5100 5.5720 88.50 2.5961 5 296.0 16.60 396.90 14.69 23.10 + 0.09178 0.00 4.050 0 0.5100 6.4160 84.10 2.6463 5 296.0 16.60 395.50 9.04 23.60 + 0.08447 0.00 4.050 0 0.5100 5.8590 68.70 2.7019 5 296.0 16.60 393.23 9.64 22.60 + 0.06664 0.00 4.050 0 0.5100 6.5460 33.10 3.1323 5 296.0 16.60 390.96 5.33 29.40 + 0.07022 0.00 4.050 0 0.5100 6.0200 47.20 3.5549 5 296.0 16.60 393.23 10.11 23.20 + 0.05425 0.00 4.050 0 0.5100 6.3150 73.40 3.3175 5 296.0 16.60 395.60 6.29 24.60 + 0.06642 0.00 4.050 0 0.5100 6.8600 74.40 2.9153 5 296.0 16.60 391.27 6.92 29.90 + 0.05780 0.00 2.460 0 0.4880 6.9800 58.40 2.8290 3 193.0 17.80 396.90 5.04 37.20 + 0.06588 0.00 2.460 0 0.4880 7.7650 83.30 2.7410 3 193.0 17.80 395.56 7.56 39.80 + 0.06888 0.00 2.460 0 0.4880 6.1440 62.20 2.5979 3 193.0 17.80 396.90 9.45 36.20 + 0.09103 0.00 2.460 0 0.4880 7.1550 92.20 2.7006 3 193.0 17.80 394.12 4.82 37.90 + 0.10008 0.00 2.460 0 0.4880 6.5630 95.60 2.8470 3 193.0 17.80 396.90 5.68 32.50 + 0.08308 0.00 2.460 0 0.4880 5.6040 89.80 2.9879 3 193.0 17.80 391.00 13.98 26.40 + 0.06047 0.00 2.460 0 0.4880 6.1530 68.80 3.2797 3 193.0 17.80 387.11 13.15 29.60 + 0.05602 0.00 2.460 0 0.4880 7.8310 53.60 3.1992 3 193.0 17.80 392.63 4.45 50.00 + 0.07875 45.00 3.440 0 0.4370 6.7820 41.10 3.7886 5 398.0 15.20 393.87 6.68 32.00 + 0.12579 45.00 3.440 0 0.4370 6.5560 29.10 4.5667 5 398.0 15.20 382.84 4.56 29.80 + 0.08370 45.00 3.440 0 0.4370 7.1850 38.90 4.5667 5 398.0 15.20 396.90 5.39 34.90 + 0.09068 45.00 3.440 0 0.4370 6.9510 21.50 6.4798 5 398.0 15.20 377.68 5.10 37.00 + 0.06911 45.00 3.440 0 0.4370 6.7390 30.80 6.4798 5 398.0 15.20 389.71 4.69 30.50 + 0.08664 45.00 3.440 0 0.4370 7.1780 26.30 6.4798 5 398.0 15.20 390.49 2.87 36.40 + 0.02187 60.00 2.930 0 0.4010 6.8000 9.90 6.2196 1 265.0 15.60 393.37 5.03 31.10 + 0.01439 60.00 2.930 0 0.4010 6.6040 18.80 6.2196 1 265.0 15.60 376.70 4.38 29.10 + 0.01381 80.00 0.460 0 0.4220 7.8750 32.00 5.6484 4 255.0 14.40 394.23 2.97 50.00 + 0.04011 80.00 1.520 0 0.4040 7.2870 34.10 7.3090 2 329.0 12.60 396.90 4.08 33.30 + 0.04666 80.00 1.520 0 0.4040 7.1070 36.60 7.3090 2 329.0 12.60 354.31 8.61 30.30 + 0.03768 80.00 1.520 0 0.4040 7.2740 38.30 7.3090 2 329.0 12.60 392.20 6.62 34.60 + 0.03150 95.00 1.470 0 0.4030 6.9750 15.30 7.6534 3 402.0 17.00 396.90 4.56 34.90 + 0.01778 95.00 1.470 0 0.4030 7.1350 13.90 7.6534 3 402.0 17.00 384.30 4.45 32.90 + 0.03445 82.50 2.030 0 0.4150 6.1620 38.40 6.2700 2 348.0 14.70 393.77 7.43 24.10 + 0.02177 82.50 2.030 0 0.4150 7.6100 15.70 6.2700 2 348.0 14.70 395.38 3.11 42.30 + 0.03510 95.00 2.680 0 0.4161 7.8530 33.20 5.1180 4 224.0 14.70 392.78 3.81 48.50 + 0.02009 95.00 2.680 0 0.4161 8.0340 31.90 5.1180 4 224.0 14.70 390.55 2.88 50.00 + 0.13642 0.00 10.590 0 0.4890 5.8910 22.30 3.9454 4 277.0 18.60 396.90 10.87 22.60 + 0.22969 0.00 10.590 0 0.4890 6.3260 52.50 4.3549 4 277.0 18.60 394.87 10.97 24.40 + 0.25199 0.00 10.590 0 0.4890 5.7830 72.70 4.3549 4 277.0 18.60 389.43 18.06 22.50 + 0.13587 0.00 10.590 1 0.4890 6.0640 59.10 4.2392 4 277.0 18.60 381.32 14.66 24.40 + 0.43571 0.00 10.590 1 0.4890 5.3440 100.00 3.8750 4 277.0 18.60 396.90 23.09 20.00 + 0.17446 0.00 10.590 1 0.4890 5.9600 92.10 3.8771 4 277.0 18.60 393.25 17.27 21.70 + 0.37578 0.00 10.590 1 0.4890 5.4040 88.60 3.6650 4 277.0 18.60 395.24 23.98 19.30 + 0.21719 0.00 10.590 1 0.4890 5.8070 53.80 3.6526 4 277.0 18.60 390.94 16.03 22.40 + 0.14052 0.00 10.590 0 0.4890 6.3750 32.30 3.9454 4 277.0 18.60 385.81 9.38 28.10 + 0.28955 0.00 10.590 0 0.4890 5.4120 9.80 3.5875 4 277.0 18.60 348.93 29.55 23.70 + 0.19802 0.00 10.590 0 0.4890 6.1820 42.40 3.9454 4 277.0 18.60 393.63 9.47 25.00 + 0.04560 0.00 13.890 1 0.5500 5.8880 56.00 3.1121 5 276.0 16.40 392.80 13.51 23.30 + 0.07013 0.00 13.890 0 0.5500 6.6420 85.10 3.4211 5 276.0 16.40 392.78 9.69 28.70 + 0.11069 0.00 13.890 1 0.5500 5.9510 93.80 2.8893 5 276.0 16.40 396.90 17.92 21.50 + 0.11425 0.00 13.890 1 0.5500 6.3730 92.40 3.3633 5 276.0 16.40 393.74 10.50 23.00 + 0.35809 0.00 6.200 1 0.5070 6.9510 88.50 2.8617 8 307.0 17.40 391.70 9.71 26.70 + 0.40771 0.00 6.200 1 0.5070 6.1640 91.30 3.0480 8 307.0 17.40 395.24 21.46 21.70 + 0.62356 0.00 6.200 1 0.5070 6.8790 77.70 3.2721 8 307.0 17.40 390.39 9.93 27.50 + 0.61470 0.00 6.200 0 0.5070 6.6180 80.80 3.2721 8 307.0 17.40 396.90 7.60 30.10 + 0.31533 0.00 6.200 0 0.5040 8.2660 78.30 2.8944 8 307.0 17.40 385.05 4.14 44.80 + 0.52693 0.00 6.200 0 0.5040 8.7250 83.00 2.8944 8 307.0 17.40 382.00 4.63 50.00 + 0.38214 0.00 6.200 0 0.5040 8.0400 86.50 3.2157 8 307.0 17.40 387.38 3.13 37.60 + 0.41238 0.00 6.200 0 0.5040 7.1630 79.90 3.2157 8 307.0 17.40 372.08 6.36 31.60 + 0.29819 0.00 6.200 0 0.5040 7.6860 17.00 3.3751 8 307.0 17.40 377.51 3.92 46.70 + 0.44178 0.00 6.200 0 0.5040 6.5520 21.40 3.3751 8 307.0 17.40 380.34 3.76 31.50 + 0.53700 0.00 6.200 0 0.5040 5.9810 68.10 3.6715 8 307.0 17.40 378.35 11.65 24.30 + 0.46296 0.00 6.200 0 0.5040 7.4120 76.90 3.6715 8 307.0 17.40 376.14 5.25 31.70 + 0.57529 0.00 6.200 0 0.5070 8.3370 73.30 3.8384 8 307.0 17.40 385.91 2.47 41.70 + 0.33147 0.00 6.200 0 0.5070 8.2470 70.40 3.6519 8 307.0 17.40 378.95 3.95 48.30 + 0.44791 0.00 6.200 1 0.5070 6.7260 66.50 3.6519 8 307.0 17.40 360.20 8.05 29.00 + 0.33045 0.00 6.200 0 0.5070 6.0860 61.50 3.6519 8 307.0 17.40 376.75 10.88 24.00 + 0.52058 0.00 6.200 1 0.5070 6.6310 76.50 4.1480 8 307.0 17.40 388.45 9.54 25.10 + 0.51183 0.00 6.200 0 0.5070 7.3580 71.60 4.1480 8 307.0 17.40 390.07 4.73 31.50 + 0.08244 30.00 4.930 0 0.4280 6.4810 18.50 6.1899 6 300.0 16.60 379.41 6.36 23.70 + 0.09252 30.00 4.930 0 0.4280 6.6060 42.20 6.1899 6 300.0 16.60 383.78 7.37 23.30 + 0.11329 30.00 4.930 0 0.4280 6.8970 54.30 6.3361 6 300.0 16.60 391.25 11.38 22.00 + 0.10612 30.00 4.930 0 0.4280 6.0950 65.10 6.3361 6 300.0 16.60 394.62 12.40 20.10 + 0.10290 30.00 4.930 0 0.4280 6.3580 52.90 7.0355 6 300.0 16.60 372.75 11.22 22.20 + 0.12757 30.00 4.930 0 0.4280 6.3930 7.80 7.0355 6 300.0 16.60 374.71 5.19 23.70 + 0.20608 22.00 5.860 0 0.4310 5.5930 76.50 7.9549 7 330.0 19.10 372.49 12.50 17.60 + 0.19133 22.00 5.860 0 0.4310 5.6050 70.20 7.9549 7 330.0 19.10 389.13 18.46 18.50 + 0.33983 22.00 5.860 0 0.4310 6.1080 34.90 8.0555 7 330.0 19.10 390.18 9.16 24.30 + 0.19657 22.00 5.860 0 0.4310 6.2260 79.20 8.0555 7 330.0 19.10 376.14 10.15 20.50 + 0.16439 22.00 5.860 0 0.4310 6.4330 49.10 7.8265 7 330.0 19.10 374.71 9.52 24.50 + 0.19073 22.00 5.860 0 0.4310 6.7180 17.50 7.8265 7 330.0 19.10 393.74 6.56 26.20 + 0.14030 22.00 5.860 0 0.4310 6.4870 13.00 7.3967 7 330.0 19.10 396.28 5.90 24.40 + 0.21409 22.00 5.860 0 0.4310 6.4380 8.90 7.3967 7 330.0 19.10 377.07 3.59 24.80 + 0.08221 22.00 5.860 0 0.4310 6.9570 6.80 8.9067 7 330.0 19.10 386.09 3.53 29.60 + 0.36894 22.00 5.860 0 0.4310 8.2590 8.40 8.9067 7 330.0 19.10 396.90 3.54 42.80 + 0.04819 80.00 3.640 0 0.3920 6.1080 32.00 9.2203 1 315.0 16.40 392.89 6.57 21.90 + 0.03548 80.00 3.640 0 0.3920 5.8760 19.10 9.2203 1 315.0 16.40 395.18 9.25 20.90 + 0.01538 90.00 3.750 0 0.3940 7.4540 34.20 6.3361 3 244.0 15.90 386.34 3.11 44.00 + 0.61154 20.00 3.970 0 0.6470 8.7040 86.90 1.8010 5 264.0 13.00 389.70 5.12 50.00 + 0.66351 20.00 3.970 0 0.6470 7.3330 100.00 1.8946 5 264.0 13.00 383.29 7.79 36.00 + 0.65665 20.00 3.970 0 0.6470 6.8420 100.00 2.0107 5 264.0 13.00 391.93 6.90 30.10 + 0.54011 20.00 3.970 0 0.6470 7.2030 81.80 2.1121 5 264.0 13.00 392.80 9.59 33.80 + 0.53412 20.00 3.970 0 0.6470 7.5200 89.40 2.1398 5 264.0 13.00 388.37 7.26 43.10 + 0.52014 20.00 3.970 0 0.6470 8.3980 91.50 2.2885 5 264.0 13.00 386.86 5.91 48.80 + 0.82526 20.00 3.970 0 0.6470 7.3270 94.50 2.0788 5 264.0 13.00 393.42 11.25 31.00 + 0.55007 20.00 3.970 0 0.6470 7.2060 91.60 1.9301 5 264.0 13.00 387.89 8.10 36.50 + 0.76162 20.00 3.970 0 0.6470 5.5600 62.80 1.9865 5 264.0 13.00 392.40 10.45 22.80 + 0.78570 20.00 3.970 0 0.6470 7.0140 84.60 2.1329 5 264.0 13.00 384.07 14.79 30.70 + 0.57834 20.00 3.970 0 0.5750 8.2970 67.00 2.4216 5 264.0 13.00 384.54 7.44 50.00 + 0.54050 20.00 3.970 0 0.5750 7.4700 52.60 2.8720 5 264.0 13.00 390.30 3.16 43.50 + 0.09065 20.00 6.960 1 0.4640 5.9200 61.50 3.9175 3 223.0 18.60 391.34 13.65 20.70 + 0.29916 20.00 6.960 0 0.4640 5.8560 42.10 4.4290 3 223.0 18.60 388.65 13.00 21.10 + 0.16211 20.00 6.960 0 0.4640 6.2400 16.30 4.4290 3 223.0 18.60 396.90 6.59 25.20 + 0.11460 20.00 6.960 0 0.4640 6.5380 58.70 3.9175 3 223.0 18.60 394.96 7.73 24.40 + 0.22188 20.00 6.960 1 0.4640 7.6910 51.80 4.3665 3 223.0 18.60 390.77 6.58 35.20 + 0.05644 40.00 6.410 1 0.4470 6.7580 32.90 4.0776 4 254.0 17.60 396.90 3.53 32.40 + 0.09604 40.00 6.410 0 0.4470 6.8540 42.80 4.2673 4 254.0 17.60 396.90 2.98 32.00 + 0.10469 40.00 6.410 1 0.4470 7.2670 49.00 4.7872 4 254.0 17.60 389.25 6.05 33.20 + 0.06127 40.00 6.410 1 0.4470 6.8260 27.60 4.8628 4 254.0 17.60 393.45 4.16 33.10 + 0.07978 40.00 6.410 0 0.4470 6.4820 32.10 4.1403 4 254.0 17.60 396.90 7.19 29.10 + 0.21038 20.00 3.330 0 0.4429 6.8120 32.20 4.1007 5 216.0 14.90 396.90 4.85 35.10 + 0.03578 20.00 3.330 0 0.4429 7.8200 64.50 4.6947 5 216.0 14.90 387.31 3.76 45.40 + 0.03705 20.00 3.330 0 0.4429 6.9680 37.20 5.2447 5 216.0 14.90 392.23 4.59 35.40 + 0.06129 20.00 3.330 1 0.4429 7.6450 49.70 5.2119 5 216.0 14.90 377.07 3.01 46.00 + 0.01501 90.00 1.210 1 0.4010 7.9230 24.80 5.8850 1 198.0 13.60 395.52 3.16 50.00 + 0.00906 90.00 2.970 0 0.4000 7.0880 20.80 7.3073 1 285.0 15.30 394.72 7.85 32.20 + 0.01096 55.00 2.250 0 0.3890 6.4530 31.90 7.3073 1 300.0 15.30 394.72 8.23 22.00 + 0.01965 80.00 1.760 0 0.3850 6.2300 31.50 9.0892 1 241.0 18.20 341.60 12.93 20.10 + 0.03871 52.50 5.320 0 0.4050 6.2090 31.30 7.3172 6 293.0 16.60 396.90 7.14 23.20 + 0.04590 52.50 5.320 0 0.4050 6.3150 45.60 7.3172 6 293.0 16.60 396.90 7.60 22.30 + 0.04297 52.50 5.320 0 0.4050 6.5650 22.90 7.3172 6 293.0 16.60 371.72 9.51 24.80 + 0.03502 80.00 4.950 0 0.4110 6.8610 27.90 5.1167 4 245.0 19.20 396.90 3.33 28.50 + 0.07886 80.00 4.950 0 0.4110 7.1480 27.70 5.1167 4 245.0 19.20 396.90 3.56 37.30 + 0.03615 80.00 4.950 0 0.4110 6.6300 23.40 5.1167 4 245.0 19.20 396.90 4.70 27.90 + 0.08265 0.00 13.920 0 0.4370 6.1270 18.40 5.5027 4 289.0 16.00 396.90 8.58 23.90 + 0.08199 0.00 13.920 0 0.4370 6.0090 42.30 5.5027 4 289.0 16.00 396.90 10.40 21.70 + 0.12932 0.00 13.920 0 0.4370 6.6780 31.10 5.9604 4 289.0 16.00 396.90 6.27 28.60 + 0.05372 0.00 13.920 0 0.4370 6.5490 51.00 5.9604 4 289.0 16.00 392.85 7.39 27.10 + 0.14103 0.00 13.920 0 0.4370 5.7900 58.00 6.3200 4 289.0 16.00 396.90 15.84 20.30 + 0.06466 70.00 2.240 0 0.4000 6.3450 20.10 7.8278 5 358.0 14.80 368.24 4.97 22.50 + 0.05561 70.00 2.240 0 0.4000 7.0410 10.00 7.8278 5 358.0 14.80 371.58 4.74 29.00 + 0.04417 70.00 2.240 0 0.4000 6.8710 47.40 7.8278 5 358.0 14.80 390.86 6.07 24.80 + 0.03537 34.00 6.090 0 0.4330 6.5900 40.40 5.4917 7 329.0 16.10 395.75 9.50 22.00 + 0.09266 34.00 6.090 0 0.4330 6.4950 18.40 5.4917 7 329.0 16.10 383.61 8.67 26.40 + 0.10000 34.00 6.090 0 0.4330 6.9820 17.70 5.4917 7 329.0 16.10 390.43 4.86 33.10 + 0.05515 33.00 2.180 0 0.4720 7.2360 41.10 4.0220 7 222.0 18.40 393.68 6.93 36.10 + 0.05479 33.00 2.180 0 0.4720 6.6160 58.10 3.3700 7 222.0 18.40 393.36 8.93 28.40 + 0.07503 33.00 2.180 0 0.4720 7.4200 71.90 3.0992 7 222.0 18.40 396.90 6.47 33.40 + 0.04932 33.00 2.180 0 0.4720 6.8490 70.30 3.1827 7 222.0 18.40 396.90 7.53 28.20 + 0.49298 0.00 9.900 0 0.5440 6.6350 82.50 3.3175 4 304.0 18.40 396.90 4.54 22.80 + 0.34940 0.00 9.900 0 0.5440 5.9720 76.70 3.1025 4 304.0 18.40 396.24 9.97 20.30 + 2.63548 0.00 9.900 0 0.5440 4.9730 37.80 2.5194 4 304.0 18.40 350.45 12.64 16.10 + 0.79041 0.00 9.900 0 0.5440 6.1220 52.80 2.6403 4 304.0 18.40 396.90 5.98 22.10 + 0.26169 0.00 9.900 0 0.5440 6.0230 90.40 2.8340 4 304.0 18.40 396.30 11.72 19.40 + 0.26938 0.00 9.900 0 0.5440 6.2660 82.80 3.2628 4 304.0 18.40 393.39 7.90 21.60 + 0.36920 0.00 9.900 0 0.5440 6.5670 87.30 3.6023 4 304.0 18.40 395.69 9.28 23.80 + 0.25356 0.00 9.900 0 0.5440 5.7050 77.70 3.9450 4 304.0 18.40 396.42 11.50 16.20 + 0.31827 0.00 9.900 0 0.5440 5.9140 83.20 3.9986 4 304.0 18.40 390.70 18.33 17.80 + 0.24522 0.00 9.900 0 0.5440 5.7820 71.70 4.0317 4 304.0 18.40 396.90 15.94 19.80 + 0.40202 0.00 9.900 0 0.5440 6.3820 67.20 3.5325 4 304.0 18.40 395.21 10.36 23.10 + 0.47547 0.00 9.900 0 0.5440 6.1130 58.80 4.0019 4 304.0 18.40 396.23 12.73 21.00 + 0.16760 0.00 7.380 0 0.4930 6.4260 52.30 4.5404 5 287.0 19.60 396.90 7.20 23.80 + 0.18159 0.00 7.380 0 0.4930 6.3760 54.30 4.5404 5 287.0 19.60 396.90 6.87 23.10 + 0.35114 0.00 7.380 0 0.4930 6.0410 49.90 4.7211 5 287.0 19.60 396.90 7.70 20.40 + 0.28392 0.00 7.380 0 0.4930 5.7080 74.30 4.7211 5 287.0 19.60 391.13 11.74 18.50 + 0.34109 0.00 7.380 0 0.4930 6.4150 40.10 4.7211 5 287.0 19.60 396.90 6.12 25.00 + 0.19186 0.00 7.380 0 0.4930 6.4310 14.70 5.4159 5 287.0 19.60 393.68 5.08 24.60 + 0.30347 0.00 7.380 0 0.4930 6.3120 28.90 5.4159 5 287.0 19.60 396.90 6.15 23.00 + 0.24103 0.00 7.380 0 0.4930 6.0830 43.70 5.4159 5 287.0 19.60 396.90 12.79 22.20 + 0.06617 0.00 3.240 0 0.4600 5.8680 25.80 5.2146 4 430.0 16.90 382.44 9.97 19.30 + 0.06724 0.00 3.240 0 0.4600 6.3330 17.20 5.2146 4 430.0 16.90 375.21 7.34 22.60 + 0.04544 0.00 3.240 0 0.4600 6.1440 32.20 5.8736 4 430.0 16.90 368.57 9.09 19.80 + 0.05023 35.00 6.060 0 0.4379 5.7060 28.40 6.6407 1 304.0 16.90 394.02 12.43 17.10 + 0.03466 35.00 6.060 0 0.4379 6.0310 23.30 6.6407 1 304.0 16.90 362.25 7.83 19.40 + 0.05083 0.00 5.190 0 0.5150 6.3160 38.10 6.4584 5 224.0 20.20 389.71 5.68 22.20 + 0.03738 0.00 5.190 0 0.5150 6.3100 38.50 6.4584 5 224.0 20.20 389.40 6.75 20.70 + 0.03961 0.00 5.190 0 0.5150 6.0370 34.50 5.9853 5 224.0 20.20 396.90 8.01 21.10 + 0.03427 0.00 5.190 0 0.5150 5.8690 46.30 5.2311 5 224.0 20.20 396.90 9.80 19.50 + 0.03041 0.00 5.190 0 0.5150 5.8950 59.60 5.6150 5 224.0 20.20 394.81 10.56 18.50 + 0.03306 0.00 5.190 0 0.5150 6.0590 37.30 4.8122 5 224.0 20.20 396.14 8.51 20.60 + 0.05497 0.00 5.190 0 0.5150 5.9850 45.40 4.8122 5 224.0 20.20 396.90 9.74 19.00 + 0.06151 0.00 5.190 0 0.5150 5.9680 58.50 4.8122 5 224.0 20.20 396.90 9.29 18.70 + 0.01301 35.00 1.520 0 0.4420 7.2410 49.30 7.0379 1 284.0 15.50 394.74 5.49 32.70 + 0.02498 0.00 1.890 0 0.5180 6.5400 59.70 6.2669 1 422.0 15.90 389.96 8.65 16.50 + 0.02543 55.00 3.780 0 0.4840 6.6960 56.40 5.7321 5 370.0 17.60 396.90 7.18 23.90 + 0.03049 55.00 3.780 0 0.4840 6.8740 28.10 6.4654 5 370.0 17.60 387.97 4.61 31.20 + 0.03113 0.00 4.390 0 0.4420 6.0140 48.50 8.0136 3 352.0 18.80 385.64 10.53 17.50 + 0.06162 0.00 4.390 0 0.4420 5.8980 52.30 8.0136 3 352.0 18.80 364.61 12.67 17.20 + 0.01870 85.00 4.150 0 0.4290 6.5160 27.70 8.5353 4 351.0 17.90 392.43 6.36 23.10 + 0.01501 80.00 2.010 0 0.4350 6.6350 29.70 8.3440 4 280.0 17.00 390.94 5.99 24.50 + 0.02899 40.00 1.250 0 0.4290 6.9390 34.50 8.7921 1 335.0 19.70 389.85 5.89 26.60 + 0.06211 40.00 1.250 0 0.4290 6.4900 44.40 8.7921 1 335.0 19.70 396.90 5.98 22.90 + 0.07950 60.00 1.690 0 0.4110 6.5790 35.90 10.7103 4 411.0 18.30 370.78 5.49 24.10 + 0.07244 60.00 1.690 0 0.4110 5.8840 18.50 10.7103 4 411.0 18.30 392.33 7.79 18.60 + 0.01709 90.00 2.020 0 0.4100 6.7280 36.10 12.1265 5 187.0 17.00 384.46 4.50 30.10 + 0.04301 80.00 1.910 0 0.4130 5.6630 21.90 10.5857 4 334.0 22.00 382.80 8.05 18.20 + 0.10659 80.00 1.910 0 0.4130 5.9360 19.50 10.5857 4 334.0 22.00 376.04 5.57 20.60 + 8.98296 0.00 18.100 1 0.7700 6.2120 97.40 2.1222 24 666.0 20.20 377.73 17.60 17.80 + 3.84970 0.00 18.100 1 0.7700 6.3950 91.00 2.5052 24 666.0 20.20 391.34 13.27 21.70 + 5.20177 0.00 18.100 1 0.7700 6.1270 83.40 2.7227 24 666.0 20.20 395.43 11.48 22.70 + 4.26131 0.00 18.100 0 0.7700 6.1120 81.30 2.5091 24 666.0 20.20 390.74 12.67 22.60 + 4.54192 0.00 18.100 0 0.7700 6.3980 88.00 2.5182 24 666.0 20.20 374.56 7.79 25.00 + 3.83684 0.00 18.100 0 0.7700 6.2510 91.10 2.2955 24 666.0 20.20 350.65 14.19 19.90 + 3.67822 0.00 18.100 0 0.7700 5.3620 96.20 2.1036 24 666.0 20.20 380.79 10.19 20.80 + 4.22239 0.00 18.100 1 0.7700 5.8030 89.00 1.9047 24 666.0 20.20 353.04 14.64 16.80 + 3.47428 0.00 18.100 1 0.7180 8.7800 82.90 1.9047 24 666.0 20.20 354.55 5.29 21.90 + 4.55587 0.00 18.100 0 0.7180 3.5610 87.90 1.6132 24 666.0 20.20 354.70 7.12 27.50 + 3.69695 0.00 18.100 0 0.7180 4.9630 91.40 1.7523 24 666.0 20.20 316.03 14.00 21.90 +13.52220 0.00 18.100 0 0.6310 3.8630 100.00 1.5106 24 666.0 20.20 131.42 13.33 23.10 + 4.89822 0.00 18.100 0 0.6310 4.9700 100.00 1.3325 24 666.0 20.20 375.52 3.26 50.00 + 5.66998 0.00 18.100 1 0.6310 6.6830 96.80 1.3567 24 666.0 20.20 375.33 3.73 50.00 + 6.53876 0.00 18.100 1 0.6310 7.0160 97.50 1.2024 24 666.0 20.20 392.05 2.96 50.00 + 9.23230 0.00 18.100 0 0.6310 6.2160 100.00 1.1691 24 666.0 20.20 366.15 9.53 50.00 + 8.26725 0.00 18.100 1 0.6680 5.8750 89.60 1.1296 24 666.0 20.20 347.88 8.88 50.00 +11.10810 0.00 18.100 0 0.6680 4.9060 100.00 1.1742 24 666.0 20.20 396.90 34.77 13.80 +18.49820 0.00 18.100 0 0.6680 4.1380 100.00 1.1370 24 666.0 20.20 396.90 37.97 13.80 +19.60910 0.00 18.100 0 0.6710 7.3130 97.90 1.3163 24 666.0 20.20 396.90 13.44 15.00 +15.28800 0.00 18.100 0 0.6710 6.6490 93.30 1.3449 24 666.0 20.20 363.02 23.24 13.90 + 9.82349 0.00 18.100 0 0.6710 6.7940 98.80 1.3580 24 666.0 20.20 396.90 21.24 13.30 +23.64820 0.00 18.100 0 0.6710 6.3800 96.20 1.3861 24 666.0 20.20 396.90 23.69 13.10 +17.86670 0.00 18.100 0 0.6710 6.2230 100.00 1.3861 24 666.0 20.20 393.74 21.78 10.20 +88.97620 0.00 18.100 0 0.6710 6.9680 91.90 1.4165 24 666.0 20.20 396.90 17.21 10.40 +15.87440 0.00 18.100 0 0.6710 6.5450 99.10 1.5192 24 666.0 20.20 396.90 21.08 10.90 + 9.18702 0.00 18.100 0 0.7000 5.5360 100.00 1.5804 24 666.0 20.20 396.90 23.60 11.30 + 7.99248 0.00 18.100 0 0.7000 5.5200 100.00 1.5331 24 666.0 20.20 396.90 24.56 12.30 +20.08490 0.00 18.100 0 0.7000 4.3680 91.20 1.4395 24 666.0 20.20 285.83 30.63 8.80 +16.81180 0.00 18.100 0 0.7000 5.2770 98.10 1.4261 24 666.0 20.20 396.90 30.81 7.20 +24.39380 0.00 18.100 0 0.7000 4.6520 100.00 1.4672 24 666.0 20.20 396.90 28.28 10.50 +22.59710 0.00 18.100 0 0.7000 5.0000 89.50 1.5184 24 666.0 20.20 396.90 31.99 7.40 +14.33370 0.00 18.100 0 0.7000 4.8800 100.00 1.5895 24 666.0 20.20 372.92 30.62 10.20 + 8.15174 0.00 18.100 0 0.7000 5.3900 98.90 1.7281 24 666.0 20.20 396.90 20.85 11.50 + 6.96215 0.00 18.100 0 0.7000 5.7130 97.00 1.9265 24 666.0 20.20 394.43 17.11 15.10 + 5.29305 0.00 18.100 0 0.7000 6.0510 82.50 2.1678 24 666.0 20.20 378.38 18.76 23.20 +11.57790 0.00 18.100 0 0.7000 5.0360 97.00 1.7700 24 666.0 20.20 396.90 25.68 9.70 + 8.64476 0.00 18.100 0 0.6930 6.1930 92.60 1.7912 24 666.0 20.20 396.90 15.17 13.80 +13.35980 0.00 18.100 0 0.6930 5.8870 94.70 1.7821 24 666.0 20.20 396.90 16.35 12.70 + 8.71675 0.00 18.100 0 0.6930 6.4710 98.80 1.7257 24 666.0 20.20 391.98 17.12 13.10 + 5.87205 0.00 18.100 0 0.6930 6.4050 96.00 1.6768 24 666.0 20.20 396.90 19.37 12.50 + 7.67202 0.00 18.100 0 0.6930 5.7470 98.90 1.6334 24 666.0 20.20 393.10 19.92 8.50 +38.35180 0.00 18.100 0 0.6930 5.4530 100.00 1.4896 24 666.0 20.20 396.90 30.59 5.00 + 9.91655 0.00 18.100 0 0.6930 5.8520 77.80 1.5004 24 666.0 20.20 338.16 29.97 6.30 +25.04610 0.00 18.100 0 0.6930 5.9870 100.00 1.5888 24 666.0 20.20 396.90 26.77 5.60 +14.23620 0.00 18.100 0 0.6930 6.3430 100.00 1.5741 24 666.0 20.20 396.90 20.32 7.20 + 9.59571 0.00 18.100 0 0.6930 6.4040 100.00 1.6390 24 666.0 20.20 376.11 20.31 12.10 +24.80170 0.00 18.100 0 0.6930 5.3490 96.00 1.7028 24 666.0 20.20 396.90 19.77 8.30 +41.52920 0.00 18.100 0 0.6930 5.5310 85.40 1.6074 24 666.0 20.20 329.46 27.38 8.50 +67.92080 0.00 18.100 0 0.6930 5.6830 100.00 1.4254 24 666.0 20.20 384.97 22.98 5.00 +20.71620 0.00 18.100 0 0.6590 4.1380 100.00 1.1781 24 666.0 20.20 370.22 23.34 11.90 +11.95110 0.00 18.100 0 0.6590 5.6080 100.00 1.2852 24 666.0 20.20 332.09 12.13 27.90 + 7.40389 0.00 18.100 0 0.5970 5.6170 97.90 1.4547 24 666.0 20.20 314.64 26.40 17.20 +14.43830 0.00 18.100 0 0.5970 6.8520 100.00 1.4655 24 666.0 20.20 179.36 19.78 27.50 +51.13580 0.00 18.100 0 0.5970 5.7570 100.00 1.4130 24 666.0 20.20 2.60 10.11 15.00 +14.05070 0.00 18.100 0 0.5970 6.6570 100.00 1.5275 24 666.0 20.20 35.05 21.22 17.20 +18.81100 0.00 18.100 0 0.5970 4.6280 100.00 1.5539 24 666.0 20.20 28.79 34.37 17.90 +28.65580 0.00 18.100 0 0.5970 5.1550 100.00 1.5894 24 666.0 20.20 210.97 20.08 16.30 +45.74610 0.00 18.100 0 0.6930 4.5190 100.00 1.6582 24 666.0 20.20 88.27 36.98 7.00 +18.08460 0.00 18.100 0 0.6790 6.4340 100.00 1.8347 24 666.0 20.20 27.25 29.05 7.20 +10.83420 0.00 18.100 0 0.6790 6.7820 90.80 1.8195 24 666.0 20.20 21.57 25.79 7.50 +25.94060 0.00 18.100 0 0.6790 5.3040 89.10 1.6475 24 666.0 20.20 127.36 26.64 10.40 +73.53410 0.00 18.100 0 0.6790 5.9570 100.00 1.8026 24 666.0 20.20 16.45 20.62 8.80 +11.81230 0.00 18.100 0 0.7180 6.8240 76.50 1.7940 24 666.0 20.20 48.45 22.74 8.40 +11.08740 0.00 18.100 0 0.7180 6.4110 100.00 1.8589 24 666.0 20.20 318.75 15.02 16.70 + 7.02259 0.00 18.100 0 0.7180 6.0060 95.30 1.8746 24 666.0 20.20 319.98 15.70 14.20 +12.04820 0.00 18.100 0 0.6140 5.6480 87.60 1.9512 24 666.0 20.20 291.55 14.10 20.80 + 7.05042 0.00 18.100 0 0.6140 6.1030 85.10 2.0218 24 666.0 20.20 2.52 23.29 13.40 + 8.79212 0.00 18.100 0 0.5840 5.5650 70.60 2.0635 24 666.0 20.20 3.65 17.16 11.70 +15.86030 0.00 18.100 0 0.6790 5.8960 95.40 1.9096 24 666.0 20.20 7.68 24.39 8.30 +12.24720 0.00 18.100 0 0.5840 5.8370 59.70 1.9976 24 666.0 20.20 24.65 15.69 10.20 +37.66190 0.00 18.100 0 0.6790 6.2020 78.70 1.8629 24 666.0 20.20 18.82 14.52 10.90 + 7.36711 0.00 18.100 0 0.6790 6.1930 78.10 1.9356 24 666.0 20.20 96.73 21.52 11.00 + 9.33889 0.00 18.100 0 0.6790 6.3800 95.60 1.9682 24 666.0 20.20 60.72 24.08 9.50 + 8.49213 0.00 18.100 0 0.5840 6.3480 86.10 2.0527 24 666.0 20.20 83.45 17.64 14.50 +10.06230 0.00 18.100 0 0.5840 6.8330 94.30 2.0882 24 666.0 20.20 81.33 19.69 14.10 + 6.44405 0.00 18.100 0 0.5840 6.4250 74.80 2.2004 24 666.0 20.20 97.95 12.03 16.10 + 5.58107 0.00 18.100 0 0.7130 6.4360 87.90 2.3158 24 666.0 20.20 100.19 16.22 14.30 +13.91340 0.00 18.100 0 0.7130 6.2080 95.00 2.2222 24 666.0 20.20 100.63 15.17 11.70 +11.16040 0.00 18.100 0 0.7400 6.6290 94.60 2.1247 24 666.0 20.20 109.85 23.27 13.40 +14.42080 0.00 18.100 0 0.7400 6.4610 93.30 2.0026 24 666.0 20.20 27.49 18.05 9.60 +15.17720 0.00 18.100 0 0.7400 6.1520 100.00 1.9142 24 666.0 20.20 9.32 26.45 8.70 +13.67810 0.00 18.100 0 0.7400 5.9350 87.90 1.8206 24 666.0 20.20 68.95 34.02 8.40 + 9.39063 0.00 18.100 0 0.7400 5.6270 93.90 1.8172 24 666.0 20.20 396.90 22.88 12.80 +22.05110 0.00 18.100 0 0.7400 5.8180 92.40 1.8662 24 666.0 20.20 391.45 22.11 10.50 + 9.72418 0.00 18.100 0 0.7400 6.4060 97.20 2.0651 24 666.0 20.20 385.96 19.52 17.10 + 5.66637 0.00 18.100 0 0.7400 6.2190 100.00 2.0048 24 666.0 20.20 395.69 16.59 18.40 + 9.96654 0.00 18.100 0 0.7400 6.4850 100.00 1.9784 24 666.0 20.20 386.73 18.85 15.40 +12.80230 0.00 18.100 0 0.7400 5.8540 96.60 1.8956 24 666.0 20.20 240.52 23.79 10.80 +10.67180 0.00 18.100 0 0.7400 6.4590 94.80 1.9879 24 666.0 20.20 43.06 23.98 11.80 + 6.28807 0.00 18.100 0 0.7400 6.3410 96.40 2.0720 24 666.0 20.20 318.01 17.79 14.90 + 9.92485 0.00 18.100 0 0.7400 6.2510 96.60 2.1980 24 666.0 20.20 388.52 16.44 12.60 + 9.32909 0.00 18.100 0 0.7130 6.1850 98.70 2.2616 24 666.0 20.20 396.90 18.13 14.10 + 7.52601 0.00 18.100 0 0.7130 6.4170 98.30 2.1850 24 666.0 20.20 304.21 19.31 13.00 + 6.71772 0.00 18.100 0 0.7130 6.7490 92.60 2.3236 24 666.0 20.20 0.32 17.44 13.40 + 5.44114 0.00 18.100 0 0.7130 6.6550 98.20 2.3552 24 666.0 20.20 355.29 17.73 15.20 + 5.09017 0.00 18.100 0 0.7130 6.2970 91.80 2.3682 24 666.0 20.20 385.09 17.27 16.10 + 8.24809 0.00 18.100 0 0.7130 7.3930 99.30 2.4527 24 666.0 20.20 375.87 16.74 17.80 + 9.51363 0.00 18.100 0 0.7130 6.7280 94.10 2.4961 24 666.0 20.20 6.68 18.71 14.90 + 4.75237 0.00 18.100 0 0.7130 6.5250 86.50 2.4358 24 666.0 20.20 50.92 18.13 14.10 + 4.66883 0.00 18.100 0 0.7130 5.9760 87.90 2.5806 24 666.0 20.20 10.48 19.01 12.70 + 8.20058 0.00 18.100 0 0.7130 5.9360 80.30 2.7792 24 666.0 20.20 3.50 16.94 13.50 + 7.75223 0.00 18.100 0 0.7130 6.3010 83.70 2.7831 24 666.0 20.20 272.21 16.23 14.90 + 6.80117 0.00 18.100 0 0.7130 6.0810 84.40 2.7175 24 666.0 20.20 396.90 14.70 20.00 + 4.81213 0.00 18.100 0 0.7130 6.7010 90.00 2.5975 24 666.0 20.20 255.23 16.42 16.40 + 3.69311 0.00 18.100 0 0.7130 6.3760 88.40 2.5671 24 666.0 20.20 391.43 14.65 17.70 + 6.65492 0.00 18.100 0 0.7130 6.3170 83.00 2.7344 24 666.0 20.20 396.90 13.99 19.50 + 5.82115 0.00 18.100 0 0.7130 6.5130 89.90 2.8016 24 666.0 20.20 393.82 10.29 20.20 + 7.83932 0.00 18.100 0 0.6550 6.2090 65.40 2.9634 24 666.0 20.20 396.90 13.22 21.40 + 3.16360 0.00 18.100 0 0.6550 5.7590 48.20 3.0665 24 666.0 20.20 334.40 14.13 19.90 + 3.77498 0.00 18.100 0 0.6550 5.9520 84.70 2.8715 24 666.0 20.20 22.01 17.15 19.00 + 4.42228 0.00 18.100 0 0.5840 6.0030 94.50 2.5403 24 666.0 20.20 331.29 21.32 19.10 +15.57570 0.00 18.100 0 0.5800 5.9260 71.00 2.9084 24 666.0 20.20 368.74 18.13 19.10 +13.07510 0.00 18.100 0 0.5800 5.7130 56.70 2.8237 24 666.0 20.20 396.90 14.76 20.10 + 4.34879 0.00 18.100 0 0.5800 6.1670 84.00 3.0334 24 666.0 20.20 396.90 16.29 19.90 + 4.03841 0.00 18.100 0 0.5320 6.2290 90.70 3.0993 24 666.0 20.20 395.33 12.87 19.60 + 3.56868 0.00 18.100 0 0.5800 6.4370 75.00 2.8965 24 666.0 20.20 393.37 14.36 23.20 + 4.64689 0.00 18.100 0 0.6140 6.9800 67.60 2.5329 24 666.0 20.20 374.68 11.66 29.80 + 8.05579 0.00 18.100 0 0.5840 5.4270 95.40 2.4298 24 666.0 20.20 352.58 18.14 13.80 + 6.39312 0.00 18.100 0 0.5840 6.1620 97.40 2.2060 24 666.0 20.20 302.76 24.10 13.30 + 4.87141 0.00 18.100 0 0.6140 6.4840 93.60 2.3053 24 666.0 20.20 396.21 18.68 16.70 +15.02340 0.00 18.100 0 0.6140 5.3040 97.30 2.1007 24 666.0 20.20 349.48 24.91 12.00 +10.23300 0.00 18.100 0 0.6140 6.1850 96.70 2.1705 24 666.0 20.20 379.70 18.03 14.60 +14.33370 0.00 18.100 0 0.6140 6.2290 88.00 1.9512 24 666.0 20.20 383.32 13.11 21.40 + 5.82401 0.00 18.100 0 0.5320 6.2420 64.70 3.4242 24 666.0 20.20 396.90 10.74 23.00 + 5.70818 0.00 18.100 0 0.5320 6.7500 74.90 3.3317 24 666.0 20.20 393.07 7.74 23.70 + 5.73116 0.00 18.100 0 0.5320 7.0610 77.00 3.4106 24 666.0 20.20 395.28 7.01 25.00 + 2.81838 0.00 18.100 0 0.5320 5.7620 40.30 4.0983 24 666.0 20.20 392.92 10.42 21.80 + 2.37857 0.00 18.100 0 0.5830 5.8710 41.90 3.7240 24 666.0 20.20 370.73 13.34 20.60 + 3.67367 0.00 18.100 0 0.5830 6.3120 51.90 3.9917 24 666.0 20.20 388.62 10.58 21.20 + 5.69175 0.00 18.100 0 0.5830 6.1140 79.80 3.5459 24 666.0 20.20 392.68 14.98 19.10 + 4.83567 0.00 18.100 0 0.5830 5.9050 53.20 3.1523 24 666.0 20.20 388.22 11.45 20.60 + 0.15086 0.00 27.740 0 0.6090 5.4540 92.70 1.8209 4 711.0 20.10 395.09 18.06 15.20 + 0.18337 0.00 27.740 0 0.6090 5.4140 98.30 1.7554 4 711.0 20.10 344.05 23.97 7.00 + 0.20746 0.00 27.740 0 0.6090 5.0930 98.00 1.8226 4 711.0 20.10 318.43 29.68 8.10 + 0.10574 0.00 27.740 0 0.6090 5.9830 98.80 1.8681 4 711.0 20.10 390.11 18.07 13.60 + 0.11132 0.00 27.740 0 0.6090 5.9830 83.50 2.1099 4 711.0 20.10 396.90 13.35 20.10 + 0.17331 0.00 9.690 0 0.5850 5.7070 54.00 2.3817 6 391.0 19.20 396.90 12.01 21.80 + 0.27957 0.00 9.690 0 0.5850 5.9260 42.60 2.3817 6 391.0 19.20 396.90 13.59 24.50 + 0.17899 0.00 9.690 0 0.5850 5.6700 28.80 2.7986 6 391.0 19.20 393.29 17.60 23.10 + 0.28960 0.00 9.690 0 0.5850 5.3900 72.90 2.7986 6 391.0 19.20 396.90 21.14 19.70 + 0.26838 0.00 9.690 0 0.5850 5.7940 70.60 2.8927 6 391.0 19.20 396.90 14.10 18.30 + 0.23912 0.00 9.690 0 0.5850 6.0190 65.30 2.4091 6 391.0 19.20 396.90 12.92 21.20 + 0.17783 0.00 9.690 0 0.5850 5.5690 73.50 2.3999 6 391.0 19.20 395.77 15.10 17.50 + 0.22438 0.00 9.690 0 0.5850 6.0270 79.70 2.4982 6 391.0 19.20 396.90 14.33 16.80 + 0.06263 0.00 11.930 0 0.5730 6.5930 69.10 2.4786 1 273.0 21.00 391.99 9.67 22.40 + 0.04527 0.00 11.930 0 0.5730 6.1200 76.70 2.2875 1 273.0 21.00 396.90 9.08 20.60 + 0.06076 0.00 11.930 0 0.5730 6.9760 91.00 2.1675 1 273.0 21.00 396.90 5.64 23.90 + 0.10959 0.00 11.930 0 0.5730 6.7940 89.30 2.3889 1 273.0 21.00 393.45 6.48 22.00 + 0.04741 0.00 11.930 0 0.5730 6.0300 80.80 2.5050 1 273.0 21.00 396.90 7.88 11.90 diff --git a/example/kfp_example_with_dewret.py b/example/kfp_example_with_dewret.py new file mode 100644 index 00000000..42ebdb0b --- /dev/null +++ b/example/kfp_example_with_dewret.py @@ -0,0 +1,170 @@ +# Copyright Shray15, 2024- +# https://github.com/Shray15/Boston_House_Pricing/blob/53837c94643531f93c0ff00b40a7fbef9793f17d/boston-house-kfp.py +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from dewret.tasks import task, workflow +from dewret.data import DataManager, DatasetPath +from dewret.renderers.kubeflow import KFPDataset + +DATASET_MANAGER = DataManager() + +Artifact = DatasetPath +Dataset = KFPDataset[DatasetPath] + +output_dataset: Artifact = DATASET_MANAGER.path() +out_data: Artifact = DATASET_MANAGER.path() +X_train_artifact: Dataset = DATASET_MANAGER.path() +X_test_artifact: Dataset = DATASET_MANAGER.path() +y_train_artifact: Dataset = DATASET_MANAGER.path() +y_test_artifact: Dataset = DATASET_MANAGER.path() +model_file: Artifact = DATASET_MANAGER.path() +metrics_output: Dataset = DATASET_MANAGER.path() +prediction: Dataset = DATASET_MANAGER.path() +X_test_scaled: Dataset = DATASET_MANAGER.path() + +@task() +def load_dataset_from_gcs(bucket_name: str, blob_name: str) -> Dataset: + import pandas as pd + from minio import Minio + from minio.error import S3Error + import io + client = Minio('minio-service.default:9000', + 'minio', + 'minio123', + secure=False) + response = client.get_object(bucket_name, blob_name) + + data = pd.read_csv(io.BytesIO(response.data), header=None, delim_whitespace=True, comment="#") + data.to_csv(output_dataset, header=True, index=False) + + return output_dataset + +@task() +def preprocess_the_dataset(dataset_content: Dataset) -> Dataset: + import pandas as pd + data = pd.read_csv(dataset_content, header=0) + if data.isna().sum().any(): + raise ValueError("The data needs preprocessing (remove missing values)") + + data.to_csv(out_data, index=False) + return out_data + +@task() +def train_test_split( + input_df: Dataset, +) -> tuple[Dataset, Dataset, Dataset, Dataset]: + from sklearn.model_selection import train_test_split + import pandas as pd + df = pd.read_csv(input_df) + X = df.iloc[:, :-1] + y = df.iloc[:, -1] + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42) + + X_train.to_csv(X_train_artifact, index=False) + X_test.to_csv(X_test_artifact, index=False) + y_train.to_csv(y_train_artifact, index=False) + y_test.to_csv(y_test_artifact, index=False) + + return ( + X_train_artifact, + X_test_artifact, + y_train_artifact, + y_test_artifact, + ) + +@task() +def model_training( + X_train_input: Dataset, + X_test_input: Dataset, + y_train_input: Dataset, +) -> tuple[Dataset, Artifact]: + from sklearn.preprocessing import StandardScaler + from sklearn.linear_model import LinearRegression + import joblib + from minio import Minio + from minio.error import S3Error + import io + import pandas as pd + scaler = StandardScaler() + + X_train = pd.read_csv(X_train_input) + X_test = pd.read_csv(X_test_input) + y_train = pd.read_csv(y_train_input) + + X_train_scaled = scaler.fit_transform(X_train) + X_test_scaled2= pd.DataFrame(scaler.transform(X_test)) + X_test_scaled2.to_csv(X_test_scaled, index=False) # Fixing typo here + + regression = LinearRegression() + regression.fit(X_train_scaled, y_train) + + joblib.dump(regression, model_file) + client = Minio('minio-service.default:9000', + 'minio', + 'minio123', + secure=False) + client.fput_object('boston-house-data', 'data/model.pkl', str(model_file)) + return X_test_scaled, model_file + +@task() +def predict(X_test: Dataset, trained_model: Artifact) -> Dataset: + import joblib + import pandas as pd + from minio import Minio + from minio.error import S3Error + X_test_data = pd.read_csv(X_test) + + client = Minio('minio-service.default:9000', + 'minio', + 'minio123', + secure=False) + model_file = 'model.pkl' + client.fget_object('boston-house-data', 'data/model.pkl', model_file) + regression = joblib.load(model_file) + + predictions = regression.predict(X_test_data) + pd.DataFrame(predictions).to_csv(prediction, index=False) + return prediction + +@task() +def evaluate(y_test: Dataset, predictions: Dataset) -> Dataset: + from sklearn.metrics import mean_squared_error, mean_absolute_error + import pandas as pd + import numpy as np + y_test_data = pd.read_csv(y_test) + predictions_data = pd.read_csv(predictions) + + mae = mean_absolute_error(y_test_data, predictions_data) + mse = mean_squared_error(y_test_data, predictions_data) + rmse = np.sqrt(mse) + + with open(metrics_output, 'w') as f: + f.write(f'MAE: {mae}\n') + f.write(f'MSE: {mse}\n') + f.write(f'RMSE: {rmse}\n') + + return metrics_output + +@workflow() +def pipeline() -> Dataset: + read_data = load_dataset_from_gcs(bucket_name="boston-house-data", blob_name="data/housing.csv") + out_data = preprocess_the_dataset(dataset_content=read_data) + X_train_artifact, X_test_artifact, y_train_artifact, y_test_artifact = train_test_split(input_df=out_data) + X_test_scaled, trained_model = model_training( + X_train_input=X_train_artifact, + X_test_input=X_test_artifact, + y_train_input=y_train_artifact + ) # Fixed typo here + predicted_value = predict(X_test=X_test_scaled, trained_model=trained_model) + return evaluate(y_test= y_test_artifact, predictions=predicted_value) # Fixed typo here diff --git a/example/kubeflow_config.yaml b/example/kubeflow_config.yaml new file mode 100644 index 00000000..e6ef3b65 --- /dev/null +++ b/example/kubeflow_config.yaml @@ -0,0 +1,8 @@ +executor: + default: + packages: + - joblib + - minio==7.1.14 + - numpy + - pandas + - scikit-learn diff --git a/src/dewret/core.py b/src/dewret/core.py index 65a65a28..0c8112b0 100644 --- a/src/dewret/core.py +++ b/src/dewret/core.py @@ -55,6 +55,11 @@ T = TypeVar("T") +# In many cases, get_type_hints built-in functionality for this means it +# is not necessary. However, outside of a type-hinted function, this is +# the cleanest way. +# Note that we only consolidated Annotateds before hitting the next generic, +# i.e. Annotated[Annotated[Map[Annotated[str, "A"], str], "B"], "C"] -> (Map[Annotated[str, "A"], str], {"B", "C"}) def strip_annotations(parent_type: type) -> tuple[type, tuple[str]]: """Discovers and removes annotations from a parent type. @@ -187,6 +192,7 @@ class ConstructConfiguration: field_separator: str = "/" field_index_types: str = "int" simplify_ids: bool = False + strict: bool = True class ConstructConfigurationTypedDict(TypedDict): @@ -204,6 +210,7 @@ class ConstructConfigurationTypedDict(TypedDict): field_separator: NotRequired[str] field_index_types: NotRequired[str] simplify_ids: NotRequired[bool] + strict: NotRequired[bool] @define @@ -289,7 +296,8 @@ def default_renderer_config() -> RenderConfiguration: default_config: Callable[[], RenderConfiguration] = render_module.default_config except ImportError: return {} - return default_config() + dc = default_config() + return dc @lru_cache @@ -308,6 +316,7 @@ def default_construct_config() -> ConstructConfiguration: allow_plain_dict_fields=False, field_separator="/", field_index_types="int", + strict=True, ) @@ -342,9 +351,11 @@ def get_render_configuration(key: str) -> RawType: Returns: (preferably) a JSON/YAML-serializable construct. """ try: - return CONFIGURATION.get().render.get(key) + if (render := CONFIGURATION.get().render): + return render.get(key) except LookupError: - return default_renderer_config().get(key) + ... + return default_renderer_config().get(key) class WorkflowComponent: @@ -613,6 +624,11 @@ class Raw: value: RawType + @property + def __type__(self) -> type: + """Provide a hash that is unique to the `value` member.""" + return type(self.value) + def __hash__(self) -> int: """Provide a hash that is unique to the `value` member.""" return hash(repr(self)) diff --git a/src/dewret/data.py b/src/dewret/data.py new file mode 100644 index 00000000..e4e79700 --- /dev/null +++ b/src/dewret/data.py @@ -0,0 +1,25 @@ +from typing import IO, Generator, cast +from pathlib import Path +from os import PathLike +from contextlib import contextmanager +from tempfile import NamedTemporaryFile + +class Dataset: + ... + +class DatasetPath(Dataset, Path): + def __truediv__(self, other: PathLike[str] | str) -> Path: + return cast(Path, super().__truediv__(other)) # Cast this up to make sure mypy flags abuse of this + +class DataManager: + def path(self, mode: str = "r") -> DatasetPath: + # Idea is that this can be opened by the decorator. + return DatasetPath("/tmp/test") + # return cast(DatasetPath, NamedTemporaryFile(mode, delete=True, delete_on_close=False)) + + def io(self, mode: str = "r") -> IO[str] | IO[bytes]: + # Idea is that this can be opened by the decorator. + # This should be able to apply logic to the mode, given it is a literal, to get typehinting + # one or other of the union. + return cast(IO[str] | IO[bytes], Dataset()) + # return cast(IO[str] | IO[bytes], NamedTemporaryFile(mode, delete=True, delete_on_close=False)) diff --git a/src/dewret/render.py b/src/dewret/render.py index d702dcad..c0324551 100644 --- a/src/dewret/render.py +++ b/src/dewret/render.py @@ -73,6 +73,7 @@ def get_render_method( sys.modules["__renderer_mod__"] = module render_module = cast(BaseRenderModule, module) else: + sys.modules["__renderer_mod__"] = renderer render_module = renderer if isinstance(render_module, RawRenderModule): diff --git a/src/dewret/renderers/kubeflow.py b/src/dewret/renderers/kubeflow.py index 45dea167..deccf353 100644 --- a/src/dewret/renderers/kubeflow.py +++ b/src/dewret/renderers/kubeflow.py @@ -21,6 +21,7 @@ import uuid import itertools +import warnings from google.protobuf import json_format from kfp.pipeline_spec import pipeline_spec_pb2 from kfp.compiler import pipeline_spec_builder as builder @@ -32,6 +33,9 @@ from collections.abc import Mapping import yaml from typing import ( + TypeVar, + Annotated, + NamedTuple, TypedDict, NotRequired, get_origin, @@ -40,12 +44,18 @@ Any, Unpack, Iterable, + Callable, + Optional, + List, ) from types import UnionType -from inspect import isclass +import inspect +from inspect import isclass, getsourcefile, getsource +from pathlib import Path from sympy import Basic, Tuple, Dict, jscode, Symbol from contextvars import ContextVar +from dewret.data import Dataset, DatasetPath from dewret.core import ( Raw, RawType, @@ -55,8 +65,11 @@ FactoryCall, Workflow, BaseStep, + NestedStep, StepReference, ParameterReference, + DatasetParameterReference, + DatasetParameter, expr_to_references, ) from dewret.utils import ( @@ -67,26 +80,269 @@ Unset, ) from dewret.render import base_render -from dewret.core import Reference, get_render_configuration, set_render_configuration +from dewret.core import Reference, get_render_configuration, set_render_configuration, strip_annotations +T = TypeVar("T") PIPELINE: ContextVar[Pipeline] = ContextVar("pipeline") -CHANNELS: ContextVar[dict[Reference[Any], dsl.pipeline_channel.PipelineChannel]] = ContextVar("channels") +CHANNELS: ContextVar[dict[Reference[Any], dsl.pipeline_channel.PipelineChannel]] = ( + ContextVar("channels") +) +KFPDataset = Annotated[T, "KFPDataset"] + +def extend_signature(func, inputs, return_ann): + import inspect + from collections import OrderedDict + sig = inspect.signature(func) + parameters = OrderedDict() + for missing_input in inputs - set(sig.parameters): + parameters[missing_input] = inspect.Parameter(missing_input, inspect.Parameter.POSITIONAL_OR_KEYWORD, annotation=dsl.Input[dsl.Artifact]) # Check + parameters["Output"] = inspect.Parameter(return_ann, inspect.Parameter.POSITIONAL_OR_KEYWORD, annotation=dsl.Output[dsl.Artifact]) + return parameters + +# pipelines/sdk/python/kfp/dsl/component_factory.py +def create_component_from_step( + step: BaseStep, + component_spec: dsl.structures.ComponentSpec, + base_image: Optional[str] = None, + target_image: Optional[str] = None, + packages_to_install: List[str] = None, + pip_index_urls: Optional[List[str]] = None, + output_component_file: Optional[str] = None, + install_kfp_package: bool = True, + kfp_package_path: Optional[str] = None, + pip_trusted_hosts: Optional[List[str]] = None, + use_venv: bool = False, +) -> dsl.python_component.PythonComponent: + """Implementation for the @component decorator. + + The decorator is defined under component_decorator.py. See the + decorator for the canonical documentation for this function. + """ + + packages_to_install_command = dsl.component_factory._get_packages_to_install_command( + install_kfp_package=install_kfp_package, + target_image=target_image, + kfp_package_path=kfp_package_path, + packages_to_install=packages_to_install, + pip_index_urls=pip_index_urls, + pip_trusted_hosts=pip_trusted_hosts, + use_venv=use_venv, + ) + + command = [] + args = [] + if base_image is None: + base_image = dsl.component_factory._DEFAULT_BASE_IMAGE + warnings.warn( + ("The default base_image used by the @dsl.component decorator will switch from 'python:3.9' to 'python:3.10' on Oct 1, 2025. To ensure your existing components work with versions of the KFP SDK released after that date, you should provide an explicit base_image argument and ensure your component works as intended on Python 3.10." + ), + FutureWarning, + stacklevel=2, + ) + + component_image = base_image + func = step.task.target -def ensure_channels(expression: Any) -> Any: + if target_image: + component_image = target_image + command, args = dsl.component_factory._get_command_and_args_for_containerized_component( + function_name=func.__name__,) + else: + command, args = dsl.component_factory._get_command_and_args_for_lightweight_component( + func=func) + # RMV - globals!? + # Need to strip signature of annotations for original function as not guaranteed to be imported. + sig = inspect.signature(func) + return_tuple = False + output_name = py_name(step.name) + def to_repr(typ): + nonlocal return_tuple + return_ann, artifacts = to_kfp_type(output_name, typ) + if artifacts: + return_ann = artifacts[output_name].__qualname__ + else: + return_type = return_ann["type"] + if hasattr(return_type, "_fields"): + annotations = [(key, to_repr(return_type.__annotations__[key])) for key in return_type._fields] + return_tuple = annotations # what if nested? + annotations = ", ".join(f"('{k}', {v})" for k, v in annotations) + command[-1] += f"{return_type.__name__} = NamedTuple('{return_type.__name__}', ({annotations}))\n" + return_ann = return_type.__name__ + return return_ann + return_ann = to_repr(step.return_type) + signature = [] + in_paths = [] + for param in sig.parameters: + ann, artifacts = to_kfp_type(param, sig.parameters[param].annotation) + if artifacts: + signature.append((param, f"Input[{artifacts[param].__qualname__}]")) + in_paths.append(param) + else: + signature.append((param, f"{sig.parameters[param].annotation.__qualname__}")) + output_datasets = {} + wrapper_str = ', '.join(f'{n}: {t}' for n, t in signature) + print(step.return_type) + command[-1] += """ +from kfp.dsl.types.artifact_types import * +import typing +from typing import NamedTuple +import os +import shutil +from tempfile import mkstemp +from pathlib import Path +""" + dataset_parameters = [] + if return_tuple: + output_param = ", ".join(f"{key}: dsl.Output[{ann}]" for key, ann in return_tuple) + else: + output_param = f"{output_name}: dsl.Output[{return_ann}]" + command[-1] += f"def {func.__name__}_({wrapper_str}, {output_param}):\n paths = {{}}\n unpaths = {{}}\n" + for p in in_paths: + command[-1] += f" {p} = {p}.path\n" + dataset_parameters = [] + for key, arg in step.arguments.items(): + if isinstance(arg, DatasetParameterReference): + command[-1] += f" f, {key} = mkstemp(); os.close(f)\n" + command[-1] += f" paths['{key}'] = Path({key})\n" + command[-1] += f" unpaths[Path({key})] = 0\n" + dataset_parameters.append((key, arg)) + command[-1] += f" globals().update(paths)\n final_output = {func.__name__}({', '.join(f'{a}={a}' for a in sig.parameters)})\n" + if return_tuple: + command[-1] += f" {output_name} = ({', '.join(key for key, _ in return_tuple)})\n" + command[-1] += f" for p, q in zip(final_output, {output_name}): shutil.move(p, q.path)\n" + else: + command[-1] += f" shutil.move(final_output, {output_name}.path)\n" + command[-1] += " for p in unpaths: shutil.rmtree(str(p), ignore_errors=True)\n" + # we could use unpaths[final_output] to update metadata here. + args[-1] += "_" + + component_spec.implementation = dsl.structures.Implementation( + container=dsl.structures.ContainerSpecImplementation( + image=component_image, + command=packages_to_install_command + command, + args=args, + )) + + module_path = Path(getsourcefile(func)) + module_path.resolve() + + component_name = dsl.component_factory._python_function_name_to_component_name(func.__name__) + component_info = dsl.component_factory.ComponentInfo( + name=component_name, + function_name=func.__name__, + func=func, + target_image=target_image, + module_path=module_path, + component_spec=component_spec, + output_component_file=output_component_file, + base_image=base_image, + packages_to_install=packages_to_install, + pip_index_urls=pip_index_urls, + pip_trusted_hosts=pip_trusted_hosts) + + if dsl.component_factory.REGISTERED_MODULES is not None: + dsl.component_factory.REGISTERED_MODULES[component_name] = component_info + + if output_component_file: + component_spec.save_to_component_yaml(output_component_file) + + return dsl.python_component.PythonComponent( + component_spec=component_spec, python_func=func) + +def get_name_to_specs(func_params, return_ann, step_name: str, containerized: bool = False): + name_to_input_specs = {} + name_to_output_specs = {} + # in_artifacts = {} + + for key, func_param in func_params: + func_param, ann = strip_annotations(func_param) + typ, _ = to_kfp_type(key, func_param) + if dsl.types.type_annotations.OutputAnnotation in ann: + # Trying to remove this on the basis that having a single output, + # while a strong constraint, is not a hard limitation (tuples are possible) + # and it lets us create an idiomatic graph. + ... + # name_to_output_specs[key] = dsl.structures.OutputSpec( + # **typ, + # ) + else: + name_to_input_specs[key] = dsl.structures.InputSpec( + **typ, + ) + # if set(in_artifacts) & set(input_artifacts): + # raise TypeError(f"Clashing naming keys for input artifacts: {in_artifacts} -- {input_artifacts}") + # in_artifacts.update(input_artifacts) + # if containerized: + # if return_ann not in [ + # inspect.Parameter.empty, + # structures.ContainerSpec, + # ]: + # raise TypeError( + # 'Return annotation should be either ContainerSpec or omitted for container components.' + # ) + # ignore omitted returns + if return_ann is None: + pass + prefix = py_name(step_name) or dsl.component_factory.SINGLE_OUTPUT_NAME + return_type, _ = to_kfp_type(prefix, return_ann) + return_type = return_type["type"] + # is NamedTuple + if hasattr(return_type, "_fields"): + output_specs, _ = make_output_spec(prefix, return_ann) + # if set(out_artifacts) & set(return_artifacts): + # raise TypeError(f"Clashing artifact names: {out_artifacts} -- {return_artifacts}") + # name_to_output_specs.update(return_artifacts) + for name, output_spec in output_specs.items(): + if output_spec is not None: + name_to_output_specs[name] = output_spec + else: + rettyp, _ = make_output_spec( + dsl.component_factory.SINGLE_OUTPUT_NAME, return_ann + ) + # name_to_output_specs.update(return_artifacts) + if rettyp is not None: + name_to_output_specs[prefix] = rettyp + # if set(name_to_input_specs) & set(in_artifacts): + # raise TypeError(f"Clashing artifact names with parameters: {in_artifacts} -- {name_to_input_specs}") + # name_to_input_specs.update({ + # key: dsl.structures.InputSpec( + # **dsl.component_factory.make_input_output_spec_args(art) + # ) + # for key, art in in_artifacts.items() + # }) + return name_to_input_specs, name_to_output_specs + +def ensure_channels(expression: Any, task_name: str | None) -> Any: def remap(ref): - if isinstance(ref, Reference): + if isinstance(ref, Reference) and not isinstance(ref, DatasetParameterReference): + # RMV: is this OK re. artifacts? if ref not in channels: + kfp_type, artifacts = to_kfp_type(ref.name, with_type(ref)) + # if kfp_type["type"] != "Artifact": channels[ref] = dsl.pipeline_channel.create_pipeline_channel( - name=ref.name, - channel_type=to_cwl_type(ref.name, ref.__type__)["type"], # type: ignore - task_name=ref._.step.name, + name=py_name(ref.name), + channel_type=kfp_type["type"], # type: ignore + task_name=k8s_name(ref._.step.name), is_artifact_list=False, ) + # for key, art in artifacts.items(): + # if key not in channels: + # spec_args = dsl.component_factory.make_input_output_spec_args(art) + # channels[key] = dsl.pipeline_channel.create_pipeline_channel( + # name=k8s_name(key), + # channel_type=spec_args["type"], # type: ignore + # task_name=k8s_name(key), + # is_artifact_list=spec_args["is_artifact_list"], + # ) return channels[ref] + elif isinstance(ref, Raw): + return ref.value + channels = CHANNELS.get() expr, to_check = expr_to_references(expression, remap=remap) return expr + class DewretPipelineTask(dsl.pipeline_task.PipelineTask): def __init__( self, @@ -99,25 +355,31 @@ def __init__( """Initilizes a PipelineTask instance.""" # import within __init__ to avoid circular import from kfp.dsl.tasks_group import TasksGroup + self.state = dsl.pipeline_task.TaskState.FUTURE self.parent_task_group: None | TasksGroup = None args = args or {} - for input_name, argument_value in args.items(): - if input_name not in component_spec.inputs: - raise ValueError( - f'Component {component_spec.name!r} got an unexpected input:' - f' {input_name!r}.') - - input_spec = component_spec.inputs[input_name] - - type_utils.verify_type_compatibility( - given_value=argument_value, - expected_spec=input_spec, - error_message_prefix=( - f'Incompatible argument passed to the input ' - f'{input_name!r} of component {component_spec.name!r}: '), - ) + if component_spec.inputs: + for input_name, argument_value in args.items(): + if input_name not in component_spec.inputs: + raise ValueError( + f"Component {component_spec.name!r} got an unexpected input:" + f" {input_name!r}." + ) + + input_spec = component_spec.inputs[input_name] + + # TODO: we cannot use this as-is, since the value->type + # map is not the same as dewret. + # type_utils.verify_type_compatibility( + # given_value=argument_value, + # expected_spec=input_spec, + # error_message_prefix=( + # f"Incompatible argument passed to the input " + # f"{input_name!r} of component {component_spec.name!r}: " + # ), + # ) self.component_spec = component_spec @@ -126,7 +388,8 @@ def __init__( inputs=dict(args.items()), dependent_tasks=[], component_ref=component_spec.name, - enable_caching=execution_caching_default) + enable_caching=execution_caching_default, + ) self._run_after: list[str] = [] self.importer_spec = None @@ -137,41 +400,50 @@ def __init__( self.platform_config = {} def validate_placeholder_types( - component_spec: dsl.structures.ComponentSpec) -> None: + component_spec: dsl.structures.ComponentSpec, + ) -> None: inputs_dict = component_spec.inputs or {} outputs_dict = component_spec.outputs or {} for arg in itertools.chain( (component_spec.implementation.container.command or []), - (component_spec.implementation.container.args or [])): + (component_spec.implementation.container.args or []), + ): dsl.pipeline_task.check_primitive_placeholder_is_used_for_correct_io_type( - inputs_dict, outputs_dict, arg) + inputs_dict, outputs_dict, arg + ) if component_spec.implementation.container is not None: validate_placeholder_types(component_spec) self.container_spec = self._extract_container_spec_and_convert_placeholders( - component_spec=component_spec) + component_spec=component_spec + ) elif component_spec.implementation.importer is not None: self.importer_spec = component_spec.implementation.importer - self.importer_spec.artifact_uri = args['uri'] + self.importer_spec.artifact_uri = args["uri"] else: self.pipeline_spec = self.component_spec.implementation.graph - self._outputs = {output.name: ensure_channels(output)} + self._outputs = {output.name: ensure_channels(output, component_spec.name)} - args = {arg.name: ensure_channels(arg) for arg in args} + # args = {arg: ensure_channels(arg) for arg in args} self._inputs = args self._channel_inputs = [ - value for _, value in args.items() + value + for _, value in args.items() if isinstance(value, dsl.pipeline_channel.PipelineChannel) - ] + dsl.pipeline_channel.extract_pipeline_channels_from_any([ - value for _, value in args.items() - if not isinstance(value, dsl.pipeline_channel.PipelineChannel) - ]) + ] + dsl.pipeline_channel.extract_pipeline_channels_from_any( + [ + value + for _, value in args.items() + if not isinstance(value, dsl.pipeline_channel.PipelineChannel) + ] + ) if execute_locally: self._execute_locally(args=args) + def register_task_handler( task: dsl.pipeline_task.PipelineTask, ) -> dsl.pipeline_task.PipelineTask: @@ -193,13 +465,19 @@ def register_task_handler( class BuilderPipeline(Pipeline): """ContextVar-based Pipeline.""" + old_pipeline: Pipeline | None = None + def __enter__(self) -> "BuilderPipeline": """Ensure a pipeline is set for tasks created in this context.""" - if Pipeline._default_pipeline: - raise Exception("Nested pipelines are not allowed.") + # if Pipeline._default_pipeline: + # raise Exception("Nested pipelines are not allowed.") Pipeline._default_pipeline = self + try: + self.old_pipeline = PIPELINE.get() + except LookupError: + ... PIPELINE.set(self) CHANNELS.set({}) @@ -207,7 +485,7 @@ def __enter__(self) -> "BuilderPipeline": def __exit__(self, *_: Any) -> None: """Reset the pipeline for new tasks to None.""" - PIPELINE.set(None) + PIPELINE.set(self.old_pipeline) CHANNELS.set({}) Pipeline._default_pipeline = None @@ -230,6 +508,7 @@ class CommandInputSchema(TypedDict): fields: NotRequired[dict[str, "CommandInputSchema"]] items: NotRequired["InputSchemaType"] default: NotRequired[RawType] + artifacts: NotRequired[list[type[DatasetPath]]] InputSchemaType = ( @@ -238,6 +517,7 @@ class CommandInputSchema(TypedDict): | list[str] | list["InputSchemaType"] | dict[str, "str | InputSchemaType"] + | DatasetPath ) @@ -301,19 +581,20 @@ def _render(ref: Any) -> Basic | RawType: return ReferenceDefinition(source=str(expr), value_from=None) -class CWLRendererConfiguration(TypedDict): +class ExecutorConfiguration(TypedDict): + packages: list[str] + +class KubeflowRendererConfiguration(TypedDict): """Configuration for the renderer. Attributes: - allow_complex_types: can input/output types be other than raw? - factories_as_params: should factories be treated as input or steps? + executor: settings to pass on to the executor. """ - allow_complex_types: NotRequired[bool] - factories_as_params: NotRequired[bool] + executor: NotRequired[dict[str, ExecutorConfiguration]] -def default_config() -> CWLRendererConfiguration: +def default_config() -> KubeflowRendererConfiguration: """Default configuration for this renderer. This is a hook-like call to give a configuration dict that this renderer @@ -323,8 +604,7 @@ def default_config() -> CWLRendererConfiguration: from YAML/JSON. """ return { - "allow_complex_types": False, - "factories_as_params": False, + "executor": {"default": {"packages": []}}, } @@ -437,35 +717,60 @@ def from_step(cls, step: BaseStep) -> "StepDefinition": Args: step: step to convert. """ - inputs = {} - for key, param in step.arguments.items(): - typ = with_type(param) - typ = type_utils._annotation_to_type_struct(typ) - input_output_spec_args = {"type": typ, "is_artifact_list": False} - inputs[key] = dsl.structures.InputSpec( - **input_output_spec_args, - ) + # Uses of global dataset parameters are to create datasets for output, + # equivalent to KFP's Output[Artifact] annotation. + # Ignore dataset parameter references when constructing the function. + # They will be _actual_ globals when it runs. + param_types = [(key, with_type(value)) for key, value in step.arguments.items() if not isinstance(value, DatasetParameterReference)] + inputs, outputs = get_name_to_specs(param_types, step.return_type, step_name=k8s_name(step.name)) + executor_config = get_render_configuration("executor")["default"] + + default_image = executor_config.get("image", "python:3.9") + default_packages = executor_config.get("packages") + default_pip_index_urls = executor_config.get("pip_index_urls") + default_kfp_package_path = executor_config.get("kfp_package_path") + default_pip_trusted_hosts = executor_config.get("pip_trusted_hosts") container = dsl.structures.ContainerSpecImplementation( - image="python:x.xx", - command=["python"], + image="python:3.9", + command=["python"], # RMV args=[], ) - - rettyp = to_output_schema(dsl.component_factory.SINGLE_OUTPUT_NAME, step.return_type) - outputs = {} - outputs[dsl.component_factory.SINGLE_OUTPUT_NAME] = rettyp component_spec = dsl.structures.ComponentSpec( name=step.name, description=f"{step.name} via dewret", inputs=inputs, - # outputs=to_output_schema("out", step.return_type)["fields"], # make_output_spec(return_ann) + # outputs=make_output_spec("out", step.return_type)["fields"], # make_output_spec(return_ann) outputs=outputs, # make_output_spec(return_ann) - implementation=dsl.structures.Implementation(container), + implementation=dsl.structures.Implementation(container) ) - python_cmpt = dsl.python_component.PythonComponent( - component_spec=component_spec, python_func=step.task.target + + if isinstance(step, NestedStep): + cmpt = dsl.container_component_class.ContainerComponent(component_spec, step.task) + else: + def fn(*args, **kwargs): + ... + cmpt = create_component_from_step( + base_image=default_image, + component_spec=component_spec, + packages_to_install=default_packages, + pip_index_urls=default_pip_index_urls, + kfp_package_path=default_kfp_package_path, + pip_trusted_hosts=default_pip_trusted_hosts, + step=step + ) + + task_inputs = { + key: ensure_channels( + arg, + step.name + ) for key, arg in step.arguments.items() + if not isinstance(arg, DatasetParameterReference) + } + task_spec = DewretPipelineTask( + cmpt.component_spec, + task_inputs, + output=step.make_reference(workflow=step.__workflow__), ) - task_spec = DewretPipelineTask(python_cmpt.component_spec, {}, output=step.make_reference(workflow=step.__workflow__)) component_spec.implementation = dsl.structures.Implementation( container=dsl.structures.ContainerSpecImplementation( image="IMAGE", @@ -505,25 +810,15 @@ def render(self) -> dict[str, RawType]: } -def cwl_type_from_value(label: str, val: RawType | Unset) -> CommandInputSchema: - """Find a CWL type for a given (possibly Unset) value. - - Args: - label: the label for the variable being checked to prefill the input def and improve debugging info. - val: a raw Python variable or an unset variable. - - Returns: - Input schema type. - """ - if val is not None and hasattr(val, "__type__"): - raw_type = val.__type__ +def dataset_path_to_artifact(typ): + typ, annotateds = strip_annotations(typ) + if "KFPDataset" in annotateds: + typ = dsl.types.artifact_types.Dataset else: - raw_type = type(val) + typ = dsl.types.artifact_types.Artifact + return typ - return to_cwl_type(label, raw_type) - - -def to_cwl_type(label: str, typ: type) -> CommandInputSchema: +def to_kfp_type(label: str, full_typ: type) -> tuple[CommandInputSchema, dict[str, type[dsl.types.artifact_types.Artifact]]]: """Map Python types to CWL types. Args: @@ -533,8 +828,10 @@ def to_cwl_type(label: str, typ: type) -> CommandInputSchema: Returns: CWL specification type dict. """ - typ_dict: CommandInputSchema = {"label": label, "type": ""} + typ, annotateds = strip_annotations(full_typ) + typ_dict: CommandInputSchema = {"type": ""} base: Any | None = typ + artifacts = {} args = get_args(typ) if args: base = get_origin(typ) @@ -554,22 +851,39 @@ def to_cwl_type(label: str, typ: type) -> CommandInputSchema: elif base == bytes: raise RuntimeError("KFP cannot currently handle bytes as a annotation type.") elif isinstance(typ, UnionType): - typ_dict.update( - {"type": tuple(to_cwl_type(label, item)["type"] for item in args)} - ) + raise RuntimeError("KFP cannot currently handle unions as a annotation type.") + #typ_dict.update( + # {"type": NamedTuple(label, ((f"item{n}", item) for n, item in enumerate(args)))} + #) + #typ_dict["type"].__annotations__ = {f"item{n}": item for n, item in enumerate(args)} elif isclass(base) and issubclass(base, Iterable): try: if len(args) > 1: + # This is only true for a pipeline - components can output only one artifact. + # artifact_args = [arg for arg in args if issubclass(strip_annotateds(arg)[0], DatasetPath)] + # if artifact_args: + # if len(args) != len(artifact_args): + # raise TypeError(f"Tuple return must be all artifacts or no artifacts: {args} -- {artifact_args}") + # if len({type(arg) for arg in args}) != 1: + # raise TypeError(f"Can only have one artifact type in a tuple: {arg}") + # print(artifact_args, label) + # typ_dict.update(dsl.component_factory.make_input_output_spec_args(list[dataset_path_to_artifact(artifact_args[0])])) + # else: + tuple_label = label.replace("-", "_") typ_dict.update( { - "type": "array", - "items": [to_cwl_type(label, t)["type"] for t in args], + "type": NamedTuple(tuple_label, ((f"{tuple_label}__{n}", item) for n, item in enumerate(args))) } ) + typ_dict["type"].__annotations__ = {f"{tuple_label}__{n}": item for n, item in enumerate(args)} elif len(args) == 1: + interior_typ, interior_artifacts = to_kfp_type(label, args[0]) typ_dict.update( - {"type": "array", "items": to_cwl_type(label, args[0])["type"]} + {"type": f"List[{interior_typ["type"]}"} ) + if set(artifacts.keys()) & set(interior_artifacts.keys()): + raise TypeError(f"Artifacts have overlapping keys: {artifacts} -- {interior_artifacts}") + artifacts.update(interior_artifacts) else: typ_dict["type"] = "array" except IndexError as err: @@ -578,9 +892,12 @@ def to_cwl_type(label: str, typ: type) -> CommandInputSchema: ) from err elif get_render_configuration("allow_complex_types"): typ_dict["type"] = typ if isinstance(typ, str) else typ.__name__ - else: - raise TypeError(f"Cannot render type ({typ}) to CWL for {label}") - return typ_dict + elif isinstance(typ, type) and issubclass(typ, Dataset): + artifacts[label] = dataset_path_to_artifact(full_typ) + typ_dict.update(dsl.component_factory.make_input_output_spec_args(artifacts[label])) + elif typ: + raise TypeError(f"Cannot render type ({typ}) to CWL for {label}; base: {base}; args: {args}") + return typ_dict, artifacts class CommandOutputSchema(CommandInputSchema): @@ -598,30 +915,11 @@ class CommandOutputSchema(CommandInputSchema): source: NotRequired[list[str]] -def raw_to_command_input_schema(label: str, value: RawType | Unset) -> InputSchemaType: - """Infer the CWL input structure for this value. - - Inspects the value, to work out an appropriate structure - describing it in CWL. - - Args: - label: name of the variable. - value: basic-typed variable from which to build structure. - - Returns: - Structure used to define (possibly compound) basic types for input. - """ - if isinstance(value, dict) or isinstance(value, list): - return _raw_to_command_input_schema_internal(label, value) - else: - return cwl_type_from_value(label, value) - - -def to_output_schema( +def make_output_spec( label: str, typ: type[RawType | AttrsInstance | DataclassProtocol], output_source: str | None = None, -) -> dsl.structures.OutputSpec: +) -> tuple[dsl.structures.OutputSpec, dict[str, type[DatasetPath]]]: """Turn a step's output into an output schema. Takes a source, type and label and provides a description for CWL. @@ -635,137 +933,58 @@ def to_output_schema( CWL CommandOutputSchema-like structure for embedding into an `outputs` block """ fields = None + artifacts = {} if attrs_has(typ): - fields = { - str(field.name): cast( - dsl.structures.OutputSpec, to_output_schema(field.name, field.type) + fields = {} + for field in attrs_fields(typ): + output_spec, field_artifacts = make_output_spec(field.name, field.type) + fields[str(field.name)] = cast( + dsl.structures.OutputSpec, output_spec ) - for field in attrs_fields(typ) - } + if set(artifacts) & set(field_artifacts): + raise TypeError(f"Clashing key names: {artifacts} -- {field_artifacts}") + artifacts.update(field_artifacts) elif is_dataclass(typ): - fields = { - str(field.name): cast( - dsl.structures.OutputSpec, to_output_schema(field.name, field.type) + fields = {} + for field in dataclass_fields(typ): + output_spec, field_artifacts = make_output_spec(field.name, field.type) + fields[str(field.name)] = cast( + dsl.structures.OutputSpec, output_spec ) - for field in dataclass_fields(typ) - } + if set(artifacts) & set(field_artifacts): + raise TypeError(f"Clashing key names: {artifacts} -- {field_artifacts}") + artifacts.update(field_artifacts) + else: + kfp_type, _ = to_kfp_type(label, typ) + kfp_type = kfp_type["type"] + if hasattr(kfp_type, "_fields"): + fields = {} + for name in kfp_type._fields: + output_spec, field_artifacts = make_output_spec(name, kfp_type.__annotations__[name]) + fields[name] = cast( + dsl.structures.OutputSpec, output_spec + ) + if set(artifacts) & set(field_artifacts): + raise TypeError(f"Clashing key names: {artifacts} -- {field_artifacts}") + artifacts.update(field_artifacts) if fields: - output = dsl.structures.OutputSpec( - type=fields, - ) + output = fields else: # TODO: this complains because NotRequired keys are never present, # but that does not seem like a problem here - likely a better solution. - print(to_cwl_type(label, typ)["type"]) - output = dsl.structures.OutputSpec( - type=to_cwl_type(label, typ)["type"] # type: ignore - ) + kfp_type, inner_artifacts = to_kfp_type(label, typ) + if set(artifacts) & set(inner_artifacts): + raise TypeError(f"Clashing key names: {artifacts} -- {inner_artifacts}") + artifacts.update({ + key: dsl.structures.OutputSpec( + **dsl.component_factory.make_input_output_spec_args(art) + ) for key, art in inner_artifacts.items() + }) + output = dsl.structures.OutputSpec(**kfp_type) # if output_source is not None: # output["outputSource"] = output_source - return output - - -def _raw_to_command_input_schema_internal( - label: str, value: RawType | Unset -) -> CommandInputSchema: - structure: CommandInputSchema = cwl_type_from_value(label, value) - if isinstance(value, dict): - structure["fields"] = { - key: _raw_to_command_input_schema_internal(key, val) - for key, val in value.items() - } - elif isinstance(value, list): - typeset = set(get_args(value)) - if not typeset: - typeset = { - item.__type__ - if item is not None and hasattr(item, "__type__") - else type(item) - for item in value - } - if len(typeset) != 1: - raise RuntimeError( - "For CWL, an input array must have a consistent type, " - "and we need at least one element to infer it, or an explicit typehint." - ) - structure["items"] = to_cwl_type(label, typeset.pop())["type"] - elif not isinstance(value, Unset): - structure["default"] = firm_to_raw(value) - return structure - - -@define -class InputsDefinition: - """CWL-renderable representation of an input parameter block. - - Turns dewret results into a CWL input block. - - Attributes: - input: sequence of results from a workflow. - """ - - inputs: dict[str, "CommandInputParameter"] - - @define - class CommandInputParameter: - """CWL-renderable reference to a specific input. - - Attributes: - type: type of variable - name: fully-qualified name of the input. - """ - - type: InputSchemaType - default: RawType | Unset - label: str - - @classmethod - def from_parameters( - cls, parameters: list[ParameterReference[Any] | FactoryCall] - ) -> "InputsDefinition": - """Takes a list of parameters into a CWL structure. - - Uses the parameters to fill out the necessary input fields. - - Returns: - CWL-like structure representing all workflow outputs. - """ - parameters_dedup = { - p._.parameter for p in parameters if isinstance(p, ParameterReference) - } - parameters = list(parameters_dedup) + [ - p for p in parameters if not isinstance(p, ParameterReference) - ] - return cls( - inputs={ - input.name: cls.CommandInputParameter( - label=input.__name__, - default=(default := flatten_if_set(input.__default__)), - type=raw_to_command_input_schema( - label=input.__original_name__, value=default - ), - ) - for input in parameters - } - ) - - def render(self) -> dict[str, RawType]: - """Render to a dict-like structure. - - Returns: - Reduced form as a native Python dict structure for - serialization. - """ - result: dict[str, RawType] = {} - for key, input in self.inputs.items(): - # Would rather not cast, but CommandInputSchema is dict[RawType] - # by construction, where type is seen as a TypedDict subclass. - item = firm_to_raw(cast(FirmType, input.type)) - if isinstance(item, dict) and not isinstance(input.default, Unset): - item["default"] = firm_to_raw(input.default) - result[key] = item - return result + return output, artifacts @define @@ -802,7 +1021,7 @@ def from_results( def _build_results(result: Any) -> RawType: if isinstance(result, Reference): # TODO: need to work out how to tell mypy that a TypedDict is also dict[str, RawType] - return to_output_schema( # type: ignore + return make_output_spec( # type: ignore with_field(result), with_type(result), output_source=to_name(result) ) results = result @@ -849,6 +1068,13 @@ def render(self) -> dict[str, RawType] | list[RawType]: else {key: crawl_raw(output) for key, output in self.outputs.items()} ) + +def py_name(name: str | None) -> str | None: + return name and name.replace("-", "_").replace("[", "__").replace("]", "") + +def k8s_name(name: str | None) -> str | None: + return name and name.replace("_", "-").replace("[", "--").replace("]", "") + class DewretGraphComponent(dsl.base_component.BaseComponent): """CWL-renderable workflow. @@ -861,7 +1087,7 @@ class DewretGraphComponent(dsl.base_component.BaseComponent): @classmethod def from_workflow( - cls, workflow: Workflow, name: None | str = None + cls, workflow: Workflow, name: None | str = None, execute: bool = True ) -> "DewretGraphComponent": """Build from a `Workflow`. @@ -871,18 +1097,22 @@ def from_workflow( workflow: workflow to convert. name: name of this workflow, if it should have one. """ - parameters: list[ParameterReference[Any] | FactoryCall] = list( + display_name = name + name = k8s_name(name) + parameters: list[ParameterReference[Any] | FactoryCall] = [ + param for param in workflow.find_parameters( include_factory_calls=not get_render_configuration( "factories_as_params" ) ) - ) + if not isinstance(param, DatasetParameter) + ] if get_render_configuration("factories_as_params"): parameters += list(workflow.find_factories().values()) - step_outputs = {} + pipeline_outputs = {} with BuilderPipeline(name or "myname") as dsl_pipeline: for step in workflow.indexed_steps.values(): if isinstance(step, FactoryCall) and get_render_configuration( @@ -890,21 +1120,17 @@ def from_workflow( ): continue StepDefinition.from_step(step) - pipeline_outputs = {dsl.component_factory.SINGLE_OUTPUT_NAME: ensure_channels(workflow.result)} - - inputs = {} - for param in parameters: - typ = with_type(param) - typ = type_utils._annotation_to_type_struct(typ) - input_output_spec_args = {"type": typ, "is_artifact_list": False} - inputs[param.name] = dsl.structures.InputSpec( - **input_output_spec_args, - ) + pipeline_outputs = { + dsl.component_factory.SINGLE_OUTPUT_NAME: ensure_channels( + workflow.result, + name + ) + } - rettyp = to_output_schema(dsl.component_factory.SINGLE_OUTPUT_NAME, workflow.result.__type__) - outputs = {} - outputs[dsl.component_factory.SINGLE_OUTPUT_NAME] = rettyp - print(dsl.component_factory.SINGLE_OUTPUT_NAME) + inputs, outputs = get_name_to_specs([ + (param.name, with_type(param)) + for param in parameters + ], with_type(workflow.result), step_name=name) description = "DESCRIPTION" component_name = "NAME" @@ -924,13 +1150,13 @@ def from_workflow( name=parameter.name, channel_type=input_spec.type, is_artifact_list=input_spec.is_artifact_list, - )) + ) + ) graph_component = cls(component_spec=component_spec) pipeline_group = dsl_pipeline.groups[0] pipeline_group.name = uuid.uuid4().hex - print(outputs, pipeline_outputs) pipeline_spec, platform_spec = builder.create_pipeline_spec( pipeline=dsl_pipeline, component_spec=graph_component.component_spec, @@ -940,8 +1166,8 @@ def from_workflow( # pipeline_root = getattr(pipeline_func, 'pipeline_root', None) # if pipeline_root is not None: # pipeline_spec.default_pipeline_root = pipeline_root - # if display_name is not None: - # pipeline_spec.pipeline_info.display_name = display_name + if display_name is not None: + pipeline_spec.pipeline_info.display_name = display_name if component_spec.description is not None: pipeline_spec.pipeline_info.description = component_spec.description @@ -976,13 +1202,13 @@ def render(self) -> dict[str, RawType]: def render( - workflow: Workflow, **kwargs: Unpack[CWLRendererConfiguration] + workflow: Workflow, **kwargs: Unpack[KubeflowRendererConfiguration] ) -> dict[str, dict[str, RawType]]: """Render to a dict-like structure. Args: workflow: workflow to evaluate result. - **kwargs: additional configuration arguments - these should match CWLRendererConfiguration. + **kwargs: additional configuration arguments - these should match KubeflowRendererConfiguration. Returns: Reduced form as a native Python dict structure for diff --git a/src/dewret/tasks.py b/src/dewret/tasks.py index c1be06f1..011851f9 100644 --- a/src/dewret/tasks.py +++ b/src/dewret/tasks.py @@ -45,6 +45,7 @@ from contextlib import contextmanager from .utils import is_firm, make_traceback, is_expr +from .data import Dataset from .workflow import ( expr_to_references, unify_workflows, @@ -56,6 +57,7 @@ LazyFactory, Parameter, ParameterReference, + DatasetParameter, param, Task, is_task, @@ -408,6 +410,7 @@ def _fn( ) -> RetType: configuration = None allow_positional_args = bool(get_configuration("allow_positional_args")) + strict = bool(get_configuration("strict")) try: # Ensure that all arguments are passed as keyword args and prevent positional args. @@ -550,9 +553,9 @@ def {fn.__name__}(...) -> ...: elif is_firm(value) or ( (attrs_has(value) or is_dataclass(value)) and not inspect.isclass(value) - ): + ) or isinstance(value, Dataset): kwargs[var] = cast( - Parameter[Any], + DatasetParameter[Any], param( var, default=value, @@ -561,11 +564,12 @@ def {fn.__name__}(...) -> ...: var, exhaustive=True ) or UNSET, + parameter_cls=DatasetParameter ), ).make_reference(workflow=workflow) elif ( is_expr(value) - and (expr_refs := expr_to_references(value)) + and (expr_refs := expr_to_references(value)[1]) and len(expr_refs[1]) != 0 ): kwargs[var] = value @@ -573,6 +577,10 @@ def {fn.__name__}(...) -> ...: raise NotImplementedError( f"Nested tasks must now only refer to global parameters, raw or tasks, not objects: {var}" ) + elif strict: + raise NotImplementedError( + f"In strict mode, tasks must now only refer to global parameters, raw or tasks, not objects: {var}" + ) if nested: if flatten_nested or get_configuration("flatten_all_nested"): with in_nested_task(): diff --git a/src/dewret/utils.py b/src/dewret/utils.py index 30f3a1cd..9269e860 100644 --- a/src/dewret/utils.py +++ b/src/dewret/utils.py @@ -23,7 +23,19 @@ import importlib import importlib.util from types import FrameType, TracebackType, UnionType, ModuleType -from typing import Any, cast, Protocol, ClassVar, Callable, Iterable, get_args, Hashable +from typing import ( + Any, + cast, + Protocol, + ClassVar, + Callable, + Iterable, + get_args, + Hashable, + Annotated, + get_origin, + get_args +) from pathlib import Path from collections.abc import Sequence, Mapping from dataclasses import asdict, is_dataclass @@ -99,6 +111,7 @@ def load_module_or_package(target_name: str, path: Path) -> ModuleType: if spec is None or spec.loader is None: raise ImportError(f"Could not open {path} module") module = importlib.util.module_from_spec(spec) + sys.modules[target_name] = module spec.loader.exec_module(module) except ImportError as exc: if exception: diff --git a/src/dewret/workflow.py b/src/dewret/workflow.py index a9e20d15..156a4bfb 100644 --- a/src/dewret/workflow.py +++ b/src/dewret/workflow.py @@ -59,6 +59,7 @@ ExprType, ) from .utils import hasher, is_raw, make_traceback, is_raw_type, is_expr, Unset +from .data import Dataset T = TypeVar("T") U = TypeVar("U") @@ -307,6 +308,22 @@ def __getattr__(self, attr: str) -> Reference[T] | Any: """ return getattr(self.make_reference(workflow=None), attr) +class DatasetParameter(Parameter[T]): + def make_reference(self, **kwargs: Any) -> "DatasetParameterReference[T]": + """Creates a new reference for the parameter. + + The kwargs will be passed to the constructor, but the + + Args: + typ: type of the new reference's target. + **kwargs: arguments to pass to the constructor. + + Returns: checks if `typ` is loopable and gives an IterableParameterReference if so, + otherwise a normal ParameterReference. + """ + kwargs["parameter"] = self + kwargs.setdefault("typ", self.__type__) + return DatasetParameterReference(**kwargs) def param( name: str, @@ -314,6 +331,7 @@ def param( tethered: Literal[False] | None | Step | Workflow = False, typ: type[T] | Unset = UNSET, autoname: bool = False, + parameter_cls: type[Parameter[T]] = Parameter[T] ) -> T: """Create a parameter. @@ -328,7 +346,7 @@ def param( default = UnsetType[T](typ) return cast( T, - Parameter(name, default=default, tethered=tethered, autoname=autoname, typ=typ), + parameter_cls(name, default=default, tethered=tethered, autoname=autoname, typ=typ), ) @@ -1006,7 +1024,13 @@ def find_field( raise AttributeError( f"Tried to index int {field} into a non-sequence type {parent_type} (base: {base})" ) - if not (field_type := get_args(parent_type)[0]): + args = get_args(parent_type) + if isinstance(base, type) and issubclass(base, tuple) and len(args) > field: + # This is the case of e.g. X = tuple[int, str], where x[1] should have type str, not int. + field_type = args[field] + elif args[0]: + field_type = args[0] + else: raise AttributeError( f"Tried to index int {field} into type {parent_type} but can only do so if the first type argument " f"is the element type (args: {get_args(parent_type)}" @@ -1140,11 +1164,13 @@ def __init__( or is_expr(value) or is_dataclass(value) or attr_has(value) + or isinstance(value, Dataset) ): # Avoid recursive type issues if ( not isinstance(value, Reference) and not isinstance(value, FactoryCall) + and not isinstance(value, Dataset) and not isinstance(value, Raw) and is_raw(value) ): @@ -1578,6 +1604,10 @@ def __make_reference__(self, **kwargs: Any) -> "ParameterReference[U]": return self._.parameter.make_reference(**kwargs) +class DatasetParameterReference(ParameterReference[U]): + ... + + class IterableParameterReference(IterableMixin[U], ParameterReference[U]): """Iterable form of parameter references.""" @@ -1781,7 +1811,10 @@ def __iter__(self) -> Generator[Reference[U], None, None]: for zipping with a fixed length iterator, or simply prepping fieldnames for serialization. """ # We cast this so that we can treat a step iterator as if it really loops over results. - yield cast(Reference[U], IteratedGenerator(self)) + if self.__fixed_len__ is None: + yield cast(Reference[U], IteratedGenerator(self)) + else: + yield from super().__iter__() def is_task(task: Lazy) -> bool: From 14eeea2fc181de6bbf199362c64af239e9d7b2dd Mon Sep 17 00:00:00 2001 From: Phil Weir Date: Sat, 18 Jan 2025 13:07:36 +0000 Subject: [PATCH 4/6] wip: refactor --- ...nents_pipeline_with_dewret_pipeline-1.yaml | 8 +- example/kfp_example.py | 187 +++--- example/kfp_example_with_dewret.py | 82 ++- src/dewret/core.py | 6 +- src/dewret/data.py | 14 +- src/dewret/renderers/kubeflow.py | 619 +++++++----------- src/dewret/tasks.py | 19 +- src/dewret/utils.py | 14 +- src/dewret/workflow.py | 14 +- 9 files changed, 428 insertions(+), 535 deletions(-) diff --git a/example/components_pipeline_with_dewret_pipeline-1.yaml b/example/components_pipeline_with_dewret_pipeline-1.yaml index 9579f811..e5d60fc6 100644 --- a/example/components_pipeline_with_dewret_pipeline-1.yaml +++ b/example/components_pipeline_with_dewret_pipeline-1.yaml @@ -400,12 +400,12 @@ deploymentSpec: \ dsl.Output[Dataset]):\n paths = {}\n unpaths = {}\n input_df\ \ = input_df.path\n f, X_train_artifact = mkstemp(); os.close(f)\n \ \ paths['X_train_artifact'] = Path(X_train_artifact)\n unpaths[Path(X_train_artifact)]\ - \ = 0\n f, X_test_artifact = mkstemp(); os.close(f)\n paths['X_test_artifact']\ - \ = Path(X_test_artifact)\n unpaths[Path(X_test_artifact)] = 0\n f,\ - \ y_train_artifact = mkstemp(); os.close(f)\n paths['y_train_artifact']\ + \ = 0\n f, y_train_artifact = mkstemp(); os.close(f)\n paths['y_train_artifact']\ \ = Path(y_train_artifact)\n unpaths[Path(y_train_artifact)] = 0\n \ \ f, y_test_artifact = mkstemp(); os.close(f)\n paths['y_test_artifact']\ - \ = Path(y_test_artifact)\n unpaths[Path(y_test_artifact)] = 0\n globals().update(paths)\n\ + \ = Path(y_test_artifact)\n unpaths[Path(y_test_artifact)] = 0\n f,\ + \ X_test_artifact = mkstemp(); os.close(f)\n paths['X_test_artifact']\ + \ = Path(X_test_artifact)\n unpaths[Path(X_test_artifact)] = 0\n globals().update(paths)\n\ \ final_output = train_test_split(input_df=input_df)\n train_test_split_1_1\ \ = (train_test_split_1_1__0, train_test_split_1_1__1, train_test_split_1_1__2,\ \ train_test_split_1_1__3)\n for p, q in zip(final_output, train_test_split_1_1):\ diff --git a/example/kfp_example.py b/example/kfp_example.py index a13f2a39..2d831d85 100644 --- a/example/kfp_example.py +++ b/example/kfp_example.py @@ -1,12 +1,12 @@ # Copyright Shray15, 2024- # https://github.com/Shray15/Boston_House_Pricing/blob/53837c94643531f93c0ff00b40a7fbef9793f17d/boston-house-kfp.py -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -17,63 +17,78 @@ from kfp.dsl import Input, Output, Artifact, Dataset from pathlib import Path -EXPERIMENT_NAME = 'Boston-house-pred' +EXPERIMENT_NAME = "Boston-house-pred" -@dsl.component(base_image='python:3.9', packages_to_install=["pandas", 'minio==7.1.14']) -def load_dataset_from_gcs(bucket_name: str, blob_name: str, output_dataset: Output[Dataset]): + +@dsl.component(base_image="python:3.9", packages_to_install=["pandas", "minio==7.1.14"]) +def load_dataset_from_gcs( + bucket_name: str, blob_name: str, output_dataset: Output[Dataset] +): import pandas as pd from minio import Minio - from minio.error import S3Error import io - client = Minio('minio-service.default:9000', - 'minio', - 'minio123', - secure=False) + + client = Minio("minio-service.default:9000", "minio", "minio123", secure=False) response = client.get_object(bucket_name, blob_name) - data = pd.read_csv(io.BytesIO(response.data), header=None, delim_whitespace=True, comment="#") + data = pd.read_csv( + io.BytesIO(response.data), header=None, delim_whitespace=True, comment="#" + ) data.to_csv(output_dataset.path, header=True, index=False) -@dsl.component(base_image='python:3.9', packages_to_install=["pandas"]) + +@dsl.component(base_image="python:3.9", packages_to_install=["pandas"]) def preprocess_the_dataset(dataset_content: Input[Dataset], out_data: Output[Dataset]): import pandas as pd + data = pd.read_csv(dataset_content.path, header=0) if data.isna().sum().any(): raise ValueError("The data needs preprocessing (remove missing values)") - + data.to_csv(out_data.path, index=False) -@dsl.component(base_image='python:3.9', packages_to_install=["scikit-learn", "pandas"]) -def train_test_split(input_df: Input[Dataset], - X_train_artifact: Output[Dataset], - X_test_artifact: Output[Dataset], - y_train_artifact: Output[Dataset], - y_test_artifact: Output[Dataset]): + +@dsl.component(base_image="python:3.9", packages_to_install=["scikit-learn", "pandas"]) +def train_test_split( + input_df: Input[Dataset], + X_train_artifact: Output[Dataset], + X_test_artifact: Output[Dataset], + y_train_artifact: Output[Dataset], + y_test_artifact: Output[Dataset], +): from sklearn.model_selection import train_test_split import pandas as pd + df = pd.read_csv(input_df.path) X = df.iloc[:, :-1] y = df.iloc[:, -1] - X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42) + X_train, X_test, y_train, y_test = train_test_split( + X, y, test_size=0.3, random_state=42 + ) X_train.to_csv(X_train_artifact.path, index=False) X_test.to_csv(X_test_artifact.path, index=False) y_train.to_csv(y_train_artifact.path, index=False) y_test.to_csv(y_test_artifact.path, index=False) -@dsl.component(base_image='python:3.9', packages_to_install=['numpy', 'scikit-learn', 'joblib', "pandas", 'minio==7.1.14']) -def model_training(X_train_input: Input[Dataset], - X_test_input: Input[Dataset], - y_train_input: Input[Dataset], - X_test_scaled: Output[Dataset], - model_output: Output[Artifact]): + +@dsl.component( + base_image="python:3.9", + packages_to_install=["numpy", "scikit-learn", "joblib", "pandas", "minio==7.1.14"], +) +def model_training( + X_train_input: Input[Dataset], + X_test_input: Input[Dataset], + y_train_input: Input[Dataset], + X_test_scaled: Output[Dataset], + model_output: Output[Artifact], +): from sklearn.preprocessing import StandardScaler from sklearn.linear_model import LinearRegression import joblib from minio import Minio - from minio.error import S3Error - import io import pandas as pd + scaler = StandardScaler() X_train = pd.read_csv(X_train_input.path) @@ -81,51 +96,58 @@ def model_training(X_train_input: Input[Dataset], y_train = pd.read_csv(y_train_input.path) X_train_scaled = scaler.fit_transform(X_train) - X_test_scaled2= pd.DataFrame(scaler.transform(X_test)) + X_test_scaled2 = pd.DataFrame(scaler.transform(X_test)) X_test_scaled2.to_csv(X_test_scaled.path, index=False) # Fixing typo here regression = LinearRegression() regression.fit(X_train_scaled, y_train) - model_file = '/trained_model.joblib' + model_file = "/trained_model.joblib" joblib.dump(regression, model_file) # Upload the model file to Google Cloud Storage - client = Minio('minio-service.default:9000', - 'minio', - 'minio123', - secure=False) - client.fput_object('boston-house-data', 'data/model.pkl', model_file) + client = Minio("minio-service.default:9000", "minio", "minio123", secure=False) + client.fput_object("boston-house-data", "data/model.pkl", model_file) model_output.file = model_file ### # Upload the model file to Google Cloud Storage model_output.file = model_file -@dsl.component(base_image='python:3.9', packages_to_install=["pandas", "joblib", 'minio==7.1.14',"scikit-learn"]) -def predict(X_test: Input[Dataset], trained_model: Input[Artifact], prediction: Output[Dataset]): + +@dsl.component( + base_image="python:3.9", + packages_to_install=["pandas", "joblib", "minio==7.1.14", "scikit-learn"], +) +def predict( + X_test: Input[Dataset], trained_model: Input[Artifact], prediction: Output[Dataset] +): import joblib import pandas as pd from minio import Minio - from minio.error import S3Error - import sklearn + X_test_data = pd.read_csv(X_test.path) - client = Minio('minio-service.default:9000', - 'minio', - 'minio123', - secure=False) - model_file = 'model.pkl' - client.fget_object('boston-house-data', 'data/model.pkl', model_file) + client = Minio("minio-service.default:9000", "minio", "minio123", secure=False) + model_file = "model.pkl" + client.fget_object("boston-house-data", "data/model.pkl", model_file) regression = joblib.load(model_file) predictions = regression.predict(X_test_data) pd.DataFrame(predictions).to_csv(prediction.path, index=False) -@dsl.component(base_image='python:3.9', packages_to_install=["pandas", "scikit-learn", "numpy"]) -def evaluate(y_test: Input[Dataset], predictions: Input[Dataset], metrics_output: Output[Artifact]): + +@dsl.component( + base_image="python:3.9", packages_to_install=["pandas", "scikit-learn", "numpy"] +) +def evaluate( + y_test: Input[Dataset], + predictions: Input[Dataset], + metrics_output: Output[Artifact], +): from sklearn.metrics import mean_squared_error, mean_absolute_error import pandas as pd import numpy as np + y_test_data = pd.read_csv(y_test.path) predictions_data = pd.read_csv(predictions.path) @@ -133,46 +155,61 @@ def evaluate(y_test: Input[Dataset], predictions: Input[Dataset], metrics_output mse = mean_squared_error(y_test_data, predictions_data) rmse = np.sqrt(mse) - with open(metrics_output.path, 'w') as f: - f.write(f'MAE: {mae}\n') - f.write(f'MSE: {mse}\n') - f.write(f'RMSE: {rmse}\n') + with open(metrics_output.path, "w") as f: + f.write(f"MAE: {mae}\n") + f.write(f"MSE: {mse}\n") + f.write(f"RMSE: {rmse}\n") + @dsl.pipeline( name="Boston-house-training-prediction", - description='A pipeline to prepare dataset, split into train and test sets, train a model, and predict', - pipeline_root='gs://boston-house-pred' + description="A pipeline to prepare dataset, split into train and test sets, train a model, and predict", + pipeline_root="gs://boston-house-pred", ) def pipeline(): - read_data = load_dataset_from_gcs(bucket_name="boston-house-data", blob_name="data/housing.csv") - preprocess_data = preprocess_the_dataset(dataset_content=read_data.outputs['output_dataset']) - split = train_test_split(input_df=preprocess_data.outputs['out_data']) - trained_model = model_training(X_train_input=split.outputs['X_train_artifact'], - X_test_input=split.outputs['X_test_artifact'], - y_train_input=split.outputs['y_train_artifact'] - ) # Fixed typo here - predicted_value = predict(X_test=trained_model.outputs['X_test_scaled'], trained_model=trained_model.outputs['model_output']) - evaluate(y_test= split.outputs['y_test_artifact'], predictions=predicted_value.outputs['prediction']) # Fixed typo here - - - -pipeline_file = 'components_pipeline.yaml' + read_data = load_dataset_from_gcs( + bucket_name="boston-house-data", blob_name="data/housing.csv" + ) + preprocess_data = preprocess_the_dataset( + dataset_content=read_data.outputs["output_dataset"] + ) + split = train_test_split(input_df=preprocess_data.outputs["out_data"]) + trained_model = model_training( + X_train_input=split.outputs["X_train_artifact"], + X_test_input=split.outputs["X_test_artifact"], + y_train_input=split.outputs["y_train_artifact"], + ) # Fixed typo here + predicted_value = predict( + X_test=trained_model.outputs["X_test_scaled"], + trained_model=trained_model.outputs["model_output"], + ) + evaluate( + y_test=split.outputs["y_test_artifact"], + predictions=predicted_value.outputs["prediction"], + ) # Fixed typo here + + +pipeline_file = "components_pipeline.yaml" compiler.Compiler().compile(pipeline_func=pipeline, package_path=pipeline_file) + def upload(): from minio import Minio - from minio.error import S3Error + # You need to port-forward manually before running - client = Minio('localhost:9000', - 'minio', - 'minio123', - secure=False) - bucket_name = 'boston-house-data' + client = Minio("localhost:9000", "minio", "minio123", secure=False) + bucket_name = "boston-house-data" found = client.bucket_exists(bucket_name) if not found: client.make_bucket(bucket_name) print("Created bucket", bucket_name) else: print("Bucket", bucket_name, "already exists") - client.fput_object(bucket_name, 'data/housing.csv', Path(__file__).parent / 'kfp_example_housing.csv') -#upload() + client.fput_object( + bucket_name, + "data/housing.csv", + Path(__file__).parent / "kfp_example_housing.csv", + ) + + +# upload() diff --git a/example/kfp_example_with_dewret.py b/example/kfp_example_with_dewret.py index 42ebdb0b..401a6174 100644 --- a/example/kfp_example_with_dewret.py +++ b/example/kfp_example_with_dewret.py @@ -1,12 +1,12 @@ # Copyright Shray15, 2024- # https://github.com/Shray15/Boston_House_Pricing/blob/53837c94643531f93c0ff00b40a7fbef9793f17d/boston-house-kfp.py -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -33,26 +33,28 @@ prediction: Dataset = DATASET_MANAGER.path() X_test_scaled: Dataset = DATASET_MANAGER.path() + @task() def load_dataset_from_gcs(bucket_name: str, blob_name: str) -> Dataset: import pandas as pd from minio import Minio - from minio.error import S3Error import io - client = Minio('minio-service.default:9000', - 'minio', - 'minio123', - secure=False) + + client = Minio("minio-service.default:9000", "minio", "minio123", secure=False) response = client.get_object(bucket_name, blob_name) - data = pd.read_csv(io.BytesIO(response.data), header=None, delim_whitespace=True, comment="#") + data = pd.read_csv( + io.BytesIO(response.data), header=None, delim_whitespace=True, comment="#" + ) data.to_csv(output_dataset, header=True, index=False) return output_dataset + @task() def preprocess_the_dataset(dataset_content: Dataset) -> Dataset: import pandas as pd + data = pd.read_csv(dataset_content, header=0) if data.isna().sum().any(): raise ValueError("The data needs preprocessing (remove missing values)") @@ -60,16 +62,20 @@ def preprocess_the_dataset(dataset_content: Dataset) -> Dataset: data.to_csv(out_data, index=False) return out_data + @task() def train_test_split( - input_df: Dataset, + input_df: Dataset, ) -> tuple[Dataset, Dataset, Dataset, Dataset]: from sklearn.model_selection import train_test_split import pandas as pd + df = pd.read_csv(input_df) X = df.iloc[:, :-1] y = df.iloc[:, -1] - X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42) + X_train, X_test, y_train, y_test = train_test_split( + X, y, test_size=0.3, random_state=42 + ) X_train.to_csv(X_train_artifact, index=False) X_test.to_csv(X_test_artifact, index=False) @@ -83,6 +89,7 @@ def train_test_split( y_test_artifact, ) + @task() def model_training( X_train_input: Dataset, @@ -93,9 +100,8 @@ def model_training( from sklearn.linear_model import LinearRegression import joblib from minio import Minio - from minio.error import S3Error - import io import pandas as pd + scaler = StandardScaler() X_train = pd.read_csv(X_train_input) @@ -103,45 +109,42 @@ def model_training( y_train = pd.read_csv(y_train_input) X_train_scaled = scaler.fit_transform(X_train) - X_test_scaled2= pd.DataFrame(scaler.transform(X_test)) + X_test_scaled2 = pd.DataFrame(scaler.transform(X_test)) X_test_scaled2.to_csv(X_test_scaled, index=False) # Fixing typo here regression = LinearRegression() regression.fit(X_train_scaled, y_train) joblib.dump(regression, model_file) - client = Minio('minio-service.default:9000', - 'minio', - 'minio123', - secure=False) - client.fput_object('boston-house-data', 'data/model.pkl', str(model_file)) + client = Minio("minio-service.default:9000", "minio", "minio123", secure=False) + client.fput_object("boston-house-data", "data/model.pkl", str(model_file)) return X_test_scaled, model_file + @task() def predict(X_test: Dataset, trained_model: Artifact) -> Dataset: import joblib import pandas as pd from minio import Minio - from minio.error import S3Error + X_test_data = pd.read_csv(X_test) - client = Minio('minio-service.default:9000', - 'minio', - 'minio123', - secure=False) - model_file = 'model.pkl' - client.fget_object('boston-house-data', 'data/model.pkl', model_file) + client = Minio("minio-service.default:9000", "minio", "minio123", secure=False) + model_file = "model.pkl" + client.fget_object("boston-house-data", "data/model.pkl", model_file) regression = joblib.load(model_file) predictions = regression.predict(X_test_data) pd.DataFrame(predictions).to_csv(prediction, index=False) return prediction + @task() def evaluate(y_test: Dataset, predictions: Dataset) -> Dataset: from sklearn.metrics import mean_squared_error, mean_absolute_error import pandas as pd import numpy as np + y_test_data = pd.read_csv(y_test) predictions_data = pd.read_csv(predictions) @@ -149,22 +152,29 @@ def evaluate(y_test: Dataset, predictions: Dataset) -> Dataset: mse = mean_squared_error(y_test_data, predictions_data) rmse = np.sqrt(mse) - with open(metrics_output, 'w') as f: - f.write(f'MAE: {mae}\n') - f.write(f'MSE: {mse}\n') - f.write(f'RMSE: {rmse}\n') + with open(metrics_output, "w") as f: + f.write(f"MAE: {mae}\n") + f.write(f"MSE: {mse}\n") + f.write(f"RMSE: {rmse}\n") return metrics_output + @workflow() def pipeline() -> Dataset: - read_data = load_dataset_from_gcs(bucket_name="boston-house-data", blob_name="data/housing.csv") + read_data = load_dataset_from_gcs( + bucket_name="boston-house-data", blob_name="data/housing.csv" + ) out_data = preprocess_the_dataset(dataset_content=read_data) - X_train_artifact, X_test_artifact, y_train_artifact, y_test_artifact = train_test_split(input_df=out_data) + X_train_artifact, X_test_artifact, y_train_artifact, y_test_artifact = ( + train_test_split(input_df=out_data) + ) X_test_scaled, trained_model = model_training( - X_train_input=X_train_artifact, - X_test_input=X_test_artifact, - y_train_input=y_train_artifact + X_train_input=X_train_artifact, + X_test_input=X_test_artifact, + y_train_input=y_train_artifact, ) # Fixed typo here predicted_value = predict(X_test=X_test_scaled, trained_model=trained_model) - return evaluate(y_test= y_test_artifact, predictions=predicted_value) # Fixed typo here + return evaluate( + y_test=y_test_artifact, predictions=predicted_value + ) # Fixed typo here diff --git a/src/dewret/core.py b/src/dewret/core.py index 0c8112b0..88b212b8 100644 --- a/src/dewret/core.py +++ b/src/dewret/core.py @@ -49,7 +49,9 @@ RawType = BasicType | list["RawType"] | dict[str, "RawType"] FirmType = RawType | list["FirmType"] | dict[str, "FirmType"] | tuple["FirmType", ...] # Basic is from Sympy, which does not have type annotations, so ExprType cannot pass mypy -ExprType = (FirmType | Basic | list["ExprType"] | dict[str, "ExprType"] | tuple["ExprType", ...]) # type: ignore # fmt: skip +ExprType = ( + FirmType | Basic | list["ExprType"] | dict[str, "ExprType"] | tuple["ExprType", ...] # type: ignore +) U = TypeVar("U") T = TypeVar("T") @@ -351,7 +353,7 @@ def get_render_configuration(key: str) -> RawType: Returns: (preferably) a JSON/YAML-serializable construct. """ try: - if (render := CONFIGURATION.get().render): + if render := CONFIGURATION.get().render: return render.get(key) except LookupError: ... diff --git a/src/dewret/data.py b/src/dewret/data.py index e4e79700..3ce74afb 100644 --- a/src/dewret/data.py +++ b/src/dewret/data.py @@ -1,15 +1,17 @@ -from typing import IO, Generator, cast +from typing import IO, cast from pathlib import Path from os import PathLike -from contextlib import contextmanager -from tempfile import NamedTemporaryFile -class Dataset: - ... + +class Dataset: ... + class DatasetPath(Dataset, Path): def __truediv__(self, other: PathLike[str] | str) -> Path: - return cast(Path, super().__truediv__(other)) # Cast this up to make sure mypy flags abuse of this + return cast( + Path, super().__truediv__(other) + ) # Cast this up to make sure mypy flags abuse of this + class DataManager: def path(self, mode: str = "r") -> DatasetPath: diff --git a/src/dewret/renderers/kubeflow.py b/src/dewret/renderers/kubeflow.py index deccf353..5d418cdb 100644 --- a/src/dewret/renderers/kubeflow.py +++ b/src/dewret/renderers/kubeflow.py @@ -26,7 +26,6 @@ from kfp.pipeline_spec import pipeline_spec_pb2 from kfp.compiler import pipeline_spec_builder as builder from kfp import dsl -from kfp.dsl.types import type_utils from kfp.dsl.pipeline_context import Pipeline from attrs import define, has as attrs_has, fields as attrs_fields, AttrsInstance from dataclasses import is_dataclass, fields as dataclass_fields @@ -44,13 +43,12 @@ Any, Unpack, Iterable, - Callable, Optional, List, ) from types import UnionType import inspect -from inspect import isclass, getsourcefile, getsource +from inspect import isclass, getsourcefile from pathlib import Path from sympy import Basic, Tuple, Dict, jscode, Symbol from contextvars import ContextVar @@ -59,7 +57,6 @@ from dewret.core import ( Raw, RawType, - FirmType, ) from dewret.workflow import ( FactoryCall, @@ -76,11 +73,14 @@ crawl_raw, DataclassProtocol, firm_to_raw, - flatten_if_set, - Unset, ) from dewret.render import base_render -from dewret.core import Reference, get_render_configuration, set_render_configuration, strip_annotations +from dewret.core import ( + Reference, + get_render_configuration, + set_render_configuration, + strip_annotations, +) T = TypeVar("T") PIPELINE: ContextVar[Pipeline] = ContextVar("pipeline") @@ -89,15 +89,6 @@ ) KFPDataset = Annotated[T, "KFPDataset"] -def extend_signature(func, inputs, return_ann): - import inspect - from collections import OrderedDict - sig = inspect.signature(func) - parameters = OrderedDict() - for missing_input in inputs - set(sig.parameters): - parameters[missing_input] = inspect.Parameter(missing_input, inspect.Parameter.POSITIONAL_OR_KEYWORD, annotation=dsl.Input[dsl.Artifact]) # Check - parameters["Output"] = inspect.Parameter(return_ann, inspect.Parameter.POSITIONAL_OR_KEYWORD, annotation=dsl.Output[dsl.Artifact]) - return parameters # pipelines/sdk/python/kfp/dsl/component_factory.py def create_component_from_step( @@ -105,12 +96,12 @@ def create_component_from_step( component_spec: dsl.structures.ComponentSpec, base_image: Optional[str] = None, target_image: Optional[str] = None, - packages_to_install: List[str] = None, - pip_index_urls: Optional[List[str]] = None, + packages_to_install: list[str] | None = None, + pip_index_urls: Optional[list[str]] = None, output_component_file: Optional[str] = None, install_kfp_package: bool = True, kfp_package_path: Optional[str] = None, - pip_trusted_hosts: Optional[List[str]] = None, + pip_trusted_hosts: Optional[list[str]] = None, use_venv: bool = False, ) -> dsl.python_component.PythonComponent: """Implementation for the @component decorator. @@ -118,15 +109,16 @@ def create_component_from_step( The decorator is defined under component_decorator.py. See the decorator for the canonical documentation for this function. """ - - packages_to_install_command = dsl.component_factory._get_packages_to_install_command( - install_kfp_package=install_kfp_package, - target_image=target_image, - kfp_package_path=kfp_package_path, - packages_to_install=packages_to_install, - pip_index_urls=pip_index_urls, - pip_trusted_hosts=pip_trusted_hosts, - use_venv=use_venv, + packages_to_install_command = ( + dsl.component_factory._get_packages_to_install_command( + install_kfp_package=install_kfp_package, + target_image=target_image, + kfp_package_path=kfp_package_path, + packages_to_install=packages_to_install, + pip_index_urls=pip_index_urls, + pip_trusted_hosts=pip_trusted_hosts, + use_venv=use_venv, + ) ) command = [] @@ -134,7 +126,8 @@ def create_component_from_step( if base_image is None: base_image = dsl.component_factory._DEFAULT_BASE_IMAGE warnings.warn( - ("The default base_image used by the @dsl.component decorator will switch from 'python:3.9' to 'python:3.10' on Oct 1, 2025. To ensure your existing components work with versions of the KFP SDK released after that date, you should provide an explicit base_image argument and ensure your component works as intended on Python 3.10." + ( + "The default base_image used by the @dsl.component decorator will switch from 'python:3.9' to 'python:3.10' on Oct 1, 2025. To ensure your existing components work with versions of the KFP SDK released after that date, you should provide an explicit base_image argument and ensure your component works as intended on Python 3.10." ), FutureWarning, stacklevel=2, @@ -145,31 +138,43 @@ def create_component_from_step( if target_image: component_image = target_image - command, args = dsl.component_factory._get_command_and_args_for_containerized_component( - function_name=func.__name__,) + command, args = ( + dsl.component_factory._get_command_and_args_for_containerized_component( + function_name=func.__name__, + ) + ) else: - command, args = dsl.component_factory._get_command_and_args_for_lightweight_component( - func=func) - # RMV - globals!? - # Need to strip signature of annotations for original function as not guaranteed to be imported. + command, args = ( + dsl.component_factory._get_command_and_args_for_lightweight_component( + func=func + ) + ) + sig = inspect.signature(func) - return_tuple = False - output_name = py_name(step.name) - def to_repr(typ): - nonlocal return_tuple - return_ann, artifacts = to_kfp_type(output_name, typ) + output_name = py_name(step.name) or step.name + + def to_repr(typ: type[Any]) -> tuple[str, None | list[tuple[str, str]]]: + return_tuple: None | list[tuple[str, str]] = None + return_schema, artifacts = to_kfp_type(output_name, typ) + return_ann: str | tuple[Any, ...] if artifacts: return_ann = artifacts[output_name].__qualname__ else: - return_type = return_ann["type"] - if hasattr(return_type, "_fields"): - annotations = [(key, to_repr(return_type.__annotations__[key])) for key in return_type._fields] - return_tuple = annotations # what if nested? - annotations = ", ".join(f"('{k}', {v})" for k, v in annotations) - command[-1] += f"{return_type.__name__} = NamedTuple('{return_type.__name__}', ({annotations}))\n" - return_ann = return_type.__name__ - return return_ann - return_ann = to_repr(step.return_type) + return_ann = return_schema["type"] + if hasattr(return_ann, "_fields"): + return_tuple = [ + (key, to_repr(return_ann.__annotations__[key])[0]) + for key in return_ann._fields + ] + annotations = ", ".join(f"('{k}', {v})" for k, v in return_tuple) + return_ann = getattr(return_ann, "__name__", "UndefinedTuple") + command[-1] += ( + f"{return_ann} = NamedTuple('{return_ann}', ({annotations}))\n" + ) + return_ann = return_ann.__name__ + return return_ann, return_tuple + + return_ann, return_tuple = to_repr(step.return_type) signature = [] in_paths = [] for param in sig.parameters: @@ -178,9 +183,10 @@ def to_repr(typ): signature.append((param, f"Input[{artifacts[param].__qualname__}]")) in_paths.append(param) else: - signature.append((param, f"{sig.parameters[param].annotation.__qualname__}")) - output_datasets = {} - wrapper_str = ', '.join(f'{n}: {t}' for n, t in signature) + signature.append( + (param, sig.parameters[param].annotation.__qualname__) + ) + wrapper_str = ", ".join(f"{n}: {t}" for n, t in signature) print(step.return_type) command[-1] += """ from kfp.dsl.types.artifact_types import * @@ -191,12 +197,15 @@ def to_repr(typ): from tempfile import mkstemp from pathlib import Path """ - dataset_parameters = [] if return_tuple: - output_param = ", ".join(f"{key}: dsl.Output[{ann}]" for key, ann in return_tuple) + output_param = ", ".join( + f"{key}: dsl.Output[{ann}]" for key, ann in return_tuple + ) else: output_param = f"{output_name}: dsl.Output[{return_ann}]" - command[-1] += f"def {func.__name__}_({wrapper_str}, {output_param}):\n paths = {{}}\n unpaths = {{}}\n" + command[-1] += ( + f"def {func.__name__}_({wrapper_str}, {output_param}):\n paths = {{}}\n unpaths = {{}}\n" + ) for p in in_paths: command[-1] += f" {p} = {p}.path\n" dataset_parameters = [] @@ -206,13 +215,21 @@ def to_repr(typ): command[-1] += f" paths['{key}'] = Path({key})\n" command[-1] += f" unpaths[Path({key})] = 0\n" dataset_parameters.append((key, arg)) - command[-1] += f" globals().update(paths)\n final_output = {func.__name__}({', '.join(f'{a}={a}' for a in sig.parameters)})\n" + command[-1] += ( + f" globals().update(paths)\n final_output = {func.__name__}({', '.join(f'{a}={a}' for a in sig.parameters)})\n" + ) if return_tuple: - command[-1] += f" {output_name} = ({', '.join(key for key, _ in return_tuple)})\n" - command[-1] += f" for p, q in zip(final_output, {output_name}): shutil.move(p, q.path)\n" + command[-1] += ( + f" {output_name} = ({', '.join(key for key, _ in return_tuple)})\n" + ) + command[-1] += ( + f" for p, q in zip(final_output, {output_name}): shutil.move(p, q.path)\n" + ) else: command[-1] += f" shutil.move(final_output, {output_name}.path)\n" - command[-1] += " for p in unpaths: shutil.rmtree(str(p), ignore_errors=True)\n" + command[-1] += ( + " for p in unpaths: shutil.rmtree(str(p), ignore_errors=True)\n" + ) # we could use unpaths[final_output] to update metadata here. args[-1] += "_" @@ -221,12 +238,19 @@ def to_repr(typ): image=component_image, command=packages_to_install_command + command, args=args, - )) + ) + ) - module_path = Path(getsourcefile(func)) - module_path.resolve() + source_file = getsourcefile(func) + if source_file: + module_path = Path(source_file) + module_path.resolve() + else: + module_path = None - component_name = dsl.component_factory._python_function_name_to_component_name(func.__name__) + component_name = dsl.component_factory._python_function_name_to_component_name( + func.__name__ + ) component_info = dsl.component_factory.ComponentInfo( name=component_name, function_name=func.__name__, @@ -238,7 +262,8 @@ def to_repr(typ): base_image=base_image, packages_to_install=packages_to_install, pip_index_urls=pip_index_urls, - pip_trusted_hosts=pip_trusted_hosts) + pip_trusted_hosts=pip_trusted_hosts, + ) if dsl.component_factory.REGISTERED_MODULES is not None: dsl.component_factory.REGISTERED_MODULES[component_name] = component_info @@ -247,39 +272,30 @@ def to_repr(typ): component_spec.save_to_component_yaml(output_component_file) return dsl.python_component.PythonComponent( - component_spec=component_spec, python_func=func) + component_spec=component_spec, python_func=func + ) + -def get_name_to_specs(func_params, return_ann, step_name: str, containerized: bool = False): +def get_name_to_specs( + func_params: list[tuple[str, str]], return_ann: type[Any], step_name: str, containerized: bool = False +) -> tuple[dict[str, dsl.structures.InputSpec], dict[str, dsl.structures.OutputSpec]]: name_to_input_specs = {} name_to_output_specs = {} - # in_artifacts = {} for key, func_param in func_params: func_param, ann = strip_annotations(func_param) typ, _ = to_kfp_type(key, func_param) - if dsl.types.type_annotations.OutputAnnotation in ann: - # Trying to remove this on the basis that having a single output, - # while a strong constraint, is not a hard limitation (tuples are possible) - # and it lets us create an idiomatic graph. - ... - # name_to_output_specs[key] = dsl.structures.OutputSpec( - # **typ, - # ) - else: + if dsl.types.type_annotations.OutputAnnotation not in ann: name_to_input_specs[key] = dsl.structures.InputSpec( **typ, ) - # if set(in_artifacts) & set(input_artifacts): - # raise TypeError(f"Clashing naming keys for input artifacts: {in_artifacts} -- {input_artifacts}") - # in_artifacts.update(input_artifacts) - # if containerized: - # if return_ann not in [ - # inspect.Parameter.empty, - # structures.ContainerSpec, - # ]: - # raise TypeError( - # 'Return annotation should be either ContainerSpec or omitted for container components.' - # ) + # Trying to remove this on the basis that having a single output, + # while a strong constraint, is not a hard limitation (tuples are possible) + # and it lets us create an idiomatic graph. + # else: + # name_to_output_specs[key] = dsl.structures.OutputSpec( + # **typ, + # ) # ignore omitted returns if return_ann is None: pass @@ -288,52 +304,33 @@ def get_name_to_specs(func_params, return_ann, step_name: str, containerized: bo return_type = return_type["type"] # is NamedTuple if hasattr(return_type, "_fields"): - output_specs, _ = make_output_spec(prefix, return_ann) - # if set(out_artifacts) & set(return_artifacts): - # raise TypeError(f"Clashing artifact names: {out_artifacts} -- {return_artifacts}") - # name_to_output_specs.update(return_artifacts) + output_specs = make_output_spec(prefix, return_ann) for name, output_spec in output_specs.items(): if output_spec is not None: name_to_output_specs[name] = output_spec else: - rettyp, _ = make_output_spec( + rettyp = make_output_spec( dsl.component_factory.SINGLE_OUTPUT_NAME, return_ann ) - # name_to_output_specs.update(return_artifacts) if rettyp is not None: name_to_output_specs[prefix] = rettyp - # if set(name_to_input_specs) & set(in_artifacts): - # raise TypeError(f"Clashing artifact names with parameters: {in_artifacts} -- {name_to_input_specs}") - # name_to_input_specs.update({ - # key: dsl.structures.InputSpec( - # **dsl.component_factory.make_input_output_spec_args(art) - # ) - # for key, art in in_artifacts.items() - # }) return name_to_input_specs, name_to_output_specs + def ensure_channels(expression: Any, task_name: str | None) -> Any: - def remap(ref): - if isinstance(ref, Reference) and not isinstance(ref, DatasetParameterReference): - # RMV: is this OK re. artifacts? + def remap(ref: Any) -> Any: + if isinstance(ref, Reference) and not isinstance( + ref, DatasetParameterReference + ): if ref not in channels: kfp_type, artifacts = to_kfp_type(ref.name, with_type(ref)) - # if kfp_type["type"] != "Artifact": + channel_type = kfp_type["type"] channels[ref] = dsl.pipeline_channel.create_pipeline_channel( name=py_name(ref.name), - channel_type=kfp_type["type"], # type: ignore + channel_type=channel_type, task_name=k8s_name(ref._.step.name), - is_artifact_list=False, + is_artifact_list=kfp_type["is_artifact_list"], ) - # for key, art in artifacts.items(): - # if key not in channels: - # spec_args = dsl.component_factory.make_input_output_spec_args(art) - # channels[key] = dsl.pipeline_channel.create_pipeline_channel( - # name=k8s_name(key), - # channel_type=spec_args["type"], # type: ignore - # task_name=k8s_name(key), - # is_artifact_list=spec_args["is_artifact_list"], - # ) return channels[ref] elif isinstance(ref, Raw): return ref.value @@ -360,27 +357,6 @@ def __init__( self.parent_task_group: None | TasksGroup = None args = args or {} - if component_spec.inputs: - for input_name, argument_value in args.items(): - if input_name not in component_spec.inputs: - raise ValueError( - f"Component {component_spec.name!r} got an unexpected input:" - f" {input_name!r}." - ) - - input_spec = component_spec.inputs[input_name] - - # TODO: we cannot use this as-is, since the value->type - # map is not the same as dewret. - # type_utils.verify_type_compatibility( - # given_value=argument_value, - # expected_spec=input_spec, - # error_message_prefix=( - # f"Incompatible argument passed to the input " - # f"{input_name!r} of component {component_spec.name!r}: " - # ), - # ) - self.component_spec = component_spec self._task_spec = dsl.structures.TaskSpec( @@ -396,7 +372,6 @@ def __init__( self.container_spec = None self.pipeline_spec = None self._ignore_upstream_failure_tag = False - # platform_config for this primitive task; empty if task is for a graph component self.platform_config = {} def validate_placeholder_types( @@ -423,9 +398,10 @@ def validate_placeholder_types( else: self.pipeline_spec = self.component_spec.implementation.graph - self._outputs = {output.name: ensure_channels(output, component_spec.name)} - # args = {arg: ensure_channels(arg) for arg in args} + if output is not None: + self._outputs = {output.name: ensure_channels(output, component_spec.name)} + self._inputs = args self._channel_inputs = [ @@ -498,17 +474,10 @@ class CommandInputSchema(TypedDict): Attributes: type: CWL type of this input. - label: name to show for this input. - fields: (for `record`) individual fields in a dict-like structure. - items: (for `array`) type that each field will have. """ - type: "InputSchemaType" - label: str - fields: NotRequired[dict[str, "CommandInputSchema"]] - items: NotRequired["InputSchemaType"] - default: NotRequired[RawType] - artifacts: NotRequired[list[type[DatasetPath]]] + type: str | tuple[Any, ...] + is_artifact_list: NotRequired[bool] InputSchemaType = ( @@ -583,6 +552,11 @@ def _render(ref: Any) -> Basic | RawType: class ExecutorConfiguration(TypedDict): packages: list[str] + image: NotRequired[str] + pip_index_urls: NotRequired[list[str]] + kfp_package_path: NotRequired[str] + pip_trusted_hosts: NotRequired[list[str]] + class KubeflowRendererConfiguration(TypedDict): """Configuration for the renderer. @@ -690,7 +664,6 @@ def render(self) -> dict[str, RawType]: return representation -@define class StepDefinition: """CWL-renderable step. @@ -703,11 +676,6 @@ class StepDefinition: in_: inputs from values or other steps. """ - name: str - run: str - out: dict[str, "CommandInputSchema"] | list[str] - in_: Mapping[str, ReferenceDefinition | Raw] - @classmethod def from_step(cls, step: BaseStep) -> "StepDefinition": """Build from a `BaseStep`. @@ -721,9 +689,16 @@ def from_step(cls, step: BaseStep) -> "StepDefinition": # equivalent to KFP's Output[Artifact] annotation. # Ignore dataset parameter references when constructing the function. # They will be _actual_ globals when it runs. - param_types = [(key, with_type(value)) for key, value in step.arguments.items() if not isinstance(value, DatasetParameterReference)] - inputs, outputs = get_name_to_specs(param_types, step.return_type, step_name=k8s_name(step.name)) - executor_config = get_render_configuration("executor")["default"] + param_types = [ + (key, with_type(value)) + for key, value in step.arguments.items() + if not isinstance(value, DatasetParameterReference) + ] + inputs, outputs = get_name_to_specs( + param_types, step.return_type, step_name=k8s_name(step.name) or step.name + ) + executor_configs = cast(dict[str, ExecutorConfiguration], get_render_configuration("executor")) + executor_config = executor_configs["default"] default_image = executor_config.get("image", "python:3.9") default_packages = executor_config.get("packages") @@ -732,7 +707,7 @@ def from_step(cls, step: BaseStep) -> "StepDefinition": default_pip_trusted_hosts = executor_config.get("pip_trusted_hosts") container = dsl.structures.ContainerSpecImplementation( image="python:3.9", - command=["python"], # RMV + command=["python"], # RMV args=[], ) component_spec = dsl.structures.ComponentSpec( @@ -741,14 +716,14 @@ def from_step(cls, step: BaseStep) -> "StepDefinition": inputs=inputs, # outputs=make_output_spec("out", step.return_type)["fields"], # make_output_spec(return_ann) outputs=outputs, # make_output_spec(return_ann) - implementation=dsl.structures.Implementation(container) + implementation=dsl.structures.Implementation(container), ) if isinstance(step, NestedStep): - cmpt = dsl.container_component_class.ContainerComponent(component_spec, step.task) + cmpt = dsl.container_component_class.ContainerComponent( + component_spec, step.task + ) else: - def fn(*args, **kwargs): - ... cmpt = create_component_from_step( base_image=default_image, component_spec=component_spec, @@ -756,14 +731,12 @@ def fn(*args, **kwargs): pip_index_urls=default_pip_index_urls, kfp_package_path=default_kfp_package_path, pip_trusted_hosts=default_pip_trusted_hosts, - step=step + step=step, ) task_inputs = { - key: ensure_channels( - arg, - step.name - ) for key, arg in step.arguments.items() + key: ensure_channels(arg, step.name) + for key, arg in step.arguments.items() if not isinstance(arg, DatasetParameterReference) } task_spec = DewretPipelineTask( @@ -785,40 +758,18 @@ def fn(*args, **kwargs): ) return task_spec - def render(self) -> dict[str, RawType]: - """Render to a dict-like structure. - Returns: - Reduced form as a native Python dict structure for - serialization. - """ - return { - "run": self.run, - "in": { - key: ( - ref.render() - if isinstance(ref, ReferenceDefinition) - else render_expression(ref).render() - if isinstance(ref, Basic) - else {"default": firm_to_raw(ref.value)} - if hasattr(ref, "value") - else render_expression(ref).render() - ) - for key, ref in self.in_.items() - }, - "out": crawl_raw(self.out), - } - - -def dataset_path_to_artifact(typ): - typ, annotateds = strip_annotations(typ) +def dataset_path_to_artifact(typ: type[Any]) -> type[dsl.types.artifact_types.Artifact]: + _, annotateds = strip_annotations(typ) + typ = dsl.types.artifact_types.Artifact if "KFPDataset" in annotateds: typ = dsl.types.artifact_types.Dataset - else: - typ = dsl.types.artifact_types.Artifact return typ -def to_kfp_type(label: str, full_typ: type) -> tuple[CommandInputSchema, dict[str, type[dsl.types.artifact_types.Artifact]]]: + +def to_kfp_type( + label: str, full_typ: type +) -> tuple[CommandInputSchema, dict[str, type[dsl.types.artifact_types.Artifact]]]: """Map Python types to CWL types. Args: @@ -830,8 +781,8 @@ def to_kfp_type(label: str, full_typ: type) -> tuple[CommandInputSchema, dict[st """ typ, annotateds = strip_annotations(full_typ) typ_dict: CommandInputSchema = {"type": ""} + artifacts: dict[str, type[dsl.types.artifact_types.Artifact]]= {} base: Any | None = typ - artifacts = {} args = get_args(typ) if args: base = get_origin(typ) @@ -852,37 +803,32 @@ def to_kfp_type(label: str, full_typ: type) -> tuple[CommandInputSchema, dict[st raise RuntimeError("KFP cannot currently handle bytes as a annotation type.") elif isinstance(typ, UnionType): raise RuntimeError("KFP cannot currently handle unions as a annotation type.") - #typ_dict.update( - # {"type": NamedTuple(label, ((f"item{n}", item) for n, item in enumerate(args)))} - #) - #typ_dict["type"].__annotations__ = {f"item{n}": item for n, item in enumerate(args)} elif isclass(base) and issubclass(base, Iterable): try: if len(args) > 1: # This is only true for a pipeline - components can output only one artifact. - # artifact_args = [arg for arg in args if issubclass(strip_annotateds(arg)[0], DatasetPath)] - # if artifact_args: - # if len(args) != len(artifact_args): - # raise TypeError(f"Tuple return must be all artifacts or no artifacts: {args} -- {artifact_args}") - # if len({type(arg) for arg in args}) != 1: - # raise TypeError(f"Can only have one artifact type in a tuple: {arg}") - # print(artifact_args, label) - # typ_dict.update(dsl.component_factory.make_input_output_spec_args(list[dataset_path_to_artifact(artifact_args[0])])) - # else: tuple_label = label.replace("-", "_") typ_dict.update( { - "type": NamedTuple(tuple_label, ((f"{tuple_label}__{n}", item) for n, item in enumerate(args))) + "type": NamedTuple( + tuple_label, + ( + (f"{tuple_label}__{n}", item) + for n, item in enumerate(args) + ), + ) } ) - typ_dict["type"].__annotations__ = {f"{tuple_label}__{n}": item for n, item in enumerate(args)} + typ_dict["type"].__annotations__ = { + f"{tuple_label}__{n}": item for n, item in enumerate(args) + } elif len(args) == 1: interior_typ, interior_artifacts = to_kfp_type(label, args[0]) - typ_dict.update( - {"type": f"List[{interior_typ["type"]}"} - ) + typ_dict.update({"type": f"List[{interior_typ["type"]}"}) if set(artifacts.keys()) & set(interior_artifacts.keys()): - raise TypeError(f"Artifacts have overlapping keys: {artifacts} -- {interior_artifacts}") + raise TypeError( + f"Artifacts have overlapping keys: {artifacts} -- {interior_artifacts}" + ) artifacts.update(interior_artifacts) else: typ_dict["type"] = "array" @@ -894,9 +840,13 @@ def to_kfp_type(label: str, full_typ: type) -> tuple[CommandInputSchema, dict[st typ_dict["type"] = typ if isinstance(typ, str) else typ.__name__ elif isinstance(typ, type) and issubclass(typ, Dataset): artifacts[label] = dataset_path_to_artifact(full_typ) - typ_dict.update(dsl.component_factory.make_input_output_spec_args(artifacts[label])) + typ_dict.update( + dsl.component_factory.make_input_output_spec_args(artifacts[label]) + ) elif typ: - raise TypeError(f"Cannot render type ({typ}) to CWL for {label}; base: {base}; args: {args}") + raise TypeError( + f"Cannot render type ({typ}) to CWL for {label}; base: {base}; args: {args}" + ) return typ_dict, artifacts @@ -919,7 +869,7 @@ def make_output_spec( label: str, typ: type[RawType | AttrsInstance | DataclassProtocol], output_source: str | None = None, -) -> tuple[dsl.structures.OutputSpec, dict[str, type[DatasetPath]]]: +) -> dsl.structures.OutputSpec: """Turn a step's output into an output schema. Takes a source, type and label and provides a description for CWL. @@ -933,40 +883,26 @@ def make_output_spec( CWL CommandOutputSchema-like structure for embedding into an `outputs` block """ fields = None - artifacts = {} if attrs_has(typ): fields = {} for field in attrs_fields(typ): - output_spec, field_artifacts = make_output_spec(field.name, field.type) - fields[str(field.name)] = cast( - dsl.structures.OutputSpec, output_spec - ) - if set(artifacts) & set(field_artifacts): - raise TypeError(f"Clashing key names: {artifacts} -- {field_artifacts}") - artifacts.update(field_artifacts) + output_spec = make_output_spec(field.name, field.type) + fields[str(field.name)] = cast(dsl.structures.OutputSpec, output_spec) elif is_dataclass(typ): fields = {} for field in dataclass_fields(typ): - output_spec, field_artifacts = make_output_spec(field.name, field.type) - fields[str(field.name)] = cast( - dsl.structures.OutputSpec, output_spec - ) - if set(artifacts) & set(field_artifacts): - raise TypeError(f"Clashing key names: {artifacts} -- {field_artifacts}") - artifacts.update(field_artifacts) + output_spec = make_output_spec(field.name, field.type) + fields[str(field.name)] = cast(dsl.structures.OutputSpec, output_spec) else: kfp_type, _ = to_kfp_type(label, typ) - kfp_type = kfp_type["type"] - if hasattr(kfp_type, "_fields"): + kfp_ann = kfp_type["type"] + if hasattr(kfp_ann, "_fields"): fields = {} - for name in kfp_type._fields: - output_spec, field_artifacts = make_output_spec(name, kfp_type.__annotations__[name]) - fields[name] = cast( - dsl.structures.OutputSpec, output_spec + for name in kfp_ann._fields: + output_spec = make_output_spec( + name, kfp_ann.__annotations__[name] ) - if set(artifacts) & set(field_artifacts): - raise TypeError(f"Clashing key names: {artifacts} -- {field_artifacts}") - artifacts.update(field_artifacts) + fields[name] = cast(dsl.structures.OutputSpec, output_spec) if fields: output = fields @@ -974,107 +910,20 @@ def make_output_spec( # TODO: this complains because NotRequired keys are never present, # but that does not seem like a problem here - likely a better solution. kfp_type, inner_artifacts = to_kfp_type(label, typ) - if set(artifacts) & set(inner_artifacts): - raise TypeError(f"Clashing key names: {artifacts} -- {inner_artifacts}") - artifacts.update({ - key: dsl.structures.OutputSpec( - **dsl.component_factory.make_input_output_spec_args(art) - ) for key, art in inner_artifacts.items() - }) output = dsl.structures.OutputSpec(**kfp_type) # if output_source is not None: # output["outputSource"] = output_source - return output, artifacts - - -@define -class OutputsDefinition: - """CWL-renderable set of workflow outputs. - - Turns dewret results into a CWL output block. - - Attributes: - outputs: sequence of results from a workflow. - """ - - outputs: ( - dict[str, "CommandOutputSchema"] - | list["CommandOutputSchema"] - | CommandOutputSchema - ) - - @classmethod - def from_results( - cls, - results: dict[str, StepReference[Any]] - | list[StepReference[Any]] - | tuple[StepReference[Any], ...], - ) -> "OutputsDefinition": - """Takes a mapping of results into a CWL structure. - - Pulls the result type from the signature, ultimately, if possible. - - Returns: - CWL-like structure representing all workflow outputs. - """ - - def _build_results(result: Any) -> RawType: - if isinstance(result, Reference): - # TODO: need to work out how to tell mypy that a TypedDict is also dict[str, RawType] - return make_output_spec( # type: ignore - with_field(result), with_type(result), output_source=to_name(result) - ) - results = result - return ( - [_build_results(result) for result in results] - if isinstance(results, list | tuple | Tuple) - else {key: _build_results(result) for key, result in results.items()} - ) - - try: - # TODO: sort out this nested type building. - return cls(outputs=_build_results(results)) # type: ignore - except AttributeError: - expr, references = expr_to_references(results) - reference_names = sorted( - { - str(ref._.parameter) - if isinstance(ref, ParameterReference) - else str(ref._.step) - for ref in references - } - ) - return cls( - outputs={ - "out": { - "type": "float", # WARNING: we assume any arithmetic expression returns a float. - "label": "out", - "expression": str(expr), - "source": reference_names, - } - } - ) - - def render(self) -> dict[str, RawType] | list[RawType]: - """Render to a dict-like structure. - - Returns: - Reduced form as a native Python dict structure for - serialization. - """ - return ( - [crawl_raw(output) for output in self.outputs] - if isinstance(self.outputs, list) - else {key: crawl_raw(output) for key, output in self.outputs.items()} - ) + return output def py_name(name: str | None) -> str | None: return name and name.replace("-", "_").replace("[", "__").replace("]", "") + def k8s_name(name: str | None) -> str | None: return name and name.replace("_", "-").replace("[", "--").replace("]", "") + class DewretGraphComponent(dsl.base_component.BaseComponent): """CWL-renderable workflow. @@ -1086,22 +935,32 @@ class DewretGraphComponent(dsl.base_component.BaseComponent): """ @classmethod - def from_workflow( - cls, workflow: Workflow, name: None | str = None, execute: bool = True - ) -> "DewretGraphComponent": - """Build from a `Workflow`. + def _make_pipeline( + cls, workflow: Workflow, name: str + ) -> tuple[Pipeline, dict[str, Any]]: + pipeline_outputs = {} + with BuilderPipeline(name) as dsl_pipeline: + for step in workflow.indexed_steps.values(): + if isinstance(step, FactoryCall) and get_render_configuration( + "factories_as_params" + ): + continue + StepDefinition.from_step(step) + pipeline_outputs = { + dsl.component_factory.SINGLE_OUTPUT_NAME: ensure_channels( + workflow.result, name + ) + } + return dsl_pipeline, pipeline_outputs - Converts a `dewret.workflow.Workflow` into a CWL-rendering object. - Args: - workflow: workflow to convert. - name: name of this workflow, if it should have one. - """ - display_name = name - name = k8s_name(name) + @classmethod + def _make_component_spec( + cls, workflow: Workflow, name: str | None + ) -> dsl.structures.ComponentSpec: parameters: list[ParameterReference[Any] | FactoryCall] = [ - param for param in - workflow.find_parameters( + param + for param in workflow.find_parameters( include_factory_calls=not get_render_configuration( "factories_as_params" ) @@ -1112,29 +971,16 @@ def from_workflow( if get_render_configuration("factories_as_params"): parameters += list(workflow.find_factories().values()) - pipeline_outputs = {} - with BuilderPipeline(name or "myname") as dsl_pipeline: - for step in workflow.indexed_steps.values(): - if isinstance(step, FactoryCall) and get_render_configuration( - "factories_as_params" - ): - continue - StepDefinition.from_step(step) - pipeline_outputs = { - dsl.component_factory.SINGLE_OUTPUT_NAME: ensure_channels( - workflow.result, - name - ) - } - - inputs, outputs = get_name_to_specs([ - (param.name, with_type(param)) - for param in parameters - ], with_type(workflow.result), step_name=name) + inputs, outputs = get_name_to_specs( + [(param.name, with_type(param)) for param in parameters], + with_type(workflow.result), + step_name=name or "undefined_step", + ) + print(outputs, "OO") description = "DESCRIPTION" component_name = "NAME" - component_spec = dsl.structures.ComponentSpec( + return dsl.structures.ComponentSpec( name=component_name, description=description, inputs=inputs, @@ -1142,16 +988,22 @@ def from_workflow( implementation=dsl.structures.Implementation(), ) - args_list = [] - for parameter in parameters: - input_spec = component_spec.inputs[parameter.name] - args_list.append( - dsl.pipeline_channel.create_pipeline_channel( - name=parameter.name, - channel_type=input_spec.type, - is_artifact_list=input_spec.is_artifact_list, - ) - ) + + @classmethod + def from_workflow( + cls, workflow: Workflow, display_name: None | str = None, execute: bool = True + ) -> "DewretGraphComponent": + """Build from a `Workflow`. + + Converts a `dewret.workflow.Workflow` into a CWL-rendering object. + + Args: + workflow: workflow to convert. + name: name of this workflow, if it should have one. + """ + name = k8s_name(display_name) or "unnamed-pipeline" + dsl_pipeline, pipeline_outputs = cls._make_pipeline(workflow, name) + component_spec = cls._make_component_spec(workflow, name) graph_component = cls(component_spec=component_spec) pipeline_group = dsl_pipeline.groups[0] @@ -1163,9 +1015,6 @@ def from_workflow( pipeline_outputs=pipeline_outputs, pipeline_config={}, ) - # pipeline_root = getattr(pipeline_func, 'pipeline_root', None) - # if pipeline_root is not None: - # pipeline_spec.default_pipeline_root = pipeline_root if display_name is not None: pipeline_spec.pipeline_info.display_name = display_name if component_spec.description is not None: @@ -1190,15 +1039,7 @@ def render(self) -> dict[str, RawType]: Reduced form as a native Python dict structure for serialization. """ - pipeline_spec_dict = json_format.MessageToDict(self.pipeline_spec) - # yaml_comments = extract_comments_from_pipeline_spec(pipeline_spec_dict, - # self.description) - # has_platform_specific_features = len(self.platform_spec.platforms) > 0 - - # documents = [pipeline_spec_dict] - # if has_platform_specific_features: - # documents.append(json_format.MessageToDict(self.platform_spec)) - return yaml.safe_dump(pipeline_spec_dict, sort_keys=True) + return json_format.MessageToDict(self.pipeline_spec) # type: ignore def render( diff --git a/src/dewret/tasks.py b/src/dewret/tasks.py index 011851f9..2aca9ebc 100644 --- a/src/dewret/tasks.py +++ b/src/dewret/tasks.py @@ -76,6 +76,7 @@ RetType = TypeVar("RetType") T = TypeVar("T") + class Backend(Enum): """Stringy enum representing available backends.""" @@ -325,6 +326,7 @@ def factory(fn: Callable[..., RetType]) -> Callable[..., RetType]: """ return task(is_factory=True)(fn) + # Workaround for PyCharm factory: Callable[[Callable[..., RetType]], Callable[..., RetType]] = factory @@ -356,6 +358,7 @@ def workflow() -> Callable[[Callable[Param, RetType]], Callable[Param, RetType]] """ return task(nested=True, flatten_nested=False) + # Workaround for PyCharm workflow: Callable[[], Callable[[T], T]] = workflow @@ -550,10 +553,14 @@ def {fn.__name__}(...) -> ...: elif inspect.isclass(value) or inspect.isfunction(value): # We assume these are loaded at runtime. ... - elif is_firm(value) or ( - (attrs_has(value) or is_dataclass(value)) - and not inspect.isclass(value) - ) or isinstance(value, Dataset): + elif ( + is_firm(value) + or ( + (attrs_has(value) or is_dataclass(value)) + and not inspect.isclass(value) + ) + or isinstance(value, Dataset) + ): kwargs[var] = cast( DatasetParameter[Any], param( @@ -564,7 +571,7 @@ def {fn.__name__}(...) -> ...: var, exhaustive=True ) or UNSET, - parameter_cls=DatasetParameter + parameter_cls=DatasetParameter, ), ).make_reference(workflow=workflow) elif ( @@ -667,9 +674,11 @@ def {fn.__name__}(...) -> ...: return _task + # Workaround for PyCharm task: Callable[[], Callable[[T], T]] = task + def set_backend(backend: Backend) -> None: """Choose a backend. diff --git a/src/dewret/utils.py b/src/dewret/utils.py index 9269e860..f0c22985 100644 --- a/src/dewret/utils.py +++ b/src/dewret/utils.py @@ -23,19 +23,7 @@ import importlib import importlib.util from types import FrameType, TracebackType, UnionType, ModuleType -from typing import ( - Any, - cast, - Protocol, - ClassVar, - Callable, - Iterable, - get_args, - Hashable, - Annotated, - get_origin, - get_args -) +from typing import Any, cast, Protocol, ClassVar, Callable, Iterable, Hashable, get_args from pathlib import Path from collections.abc import Sequence, Mapping from dataclasses import asdict, is_dataclass diff --git a/src/dewret/workflow.py b/src/dewret/workflow.py index 156a4bfb..fe7008ba 100644 --- a/src/dewret/workflow.py +++ b/src/dewret/workflow.py @@ -308,11 +308,12 @@ def __getattr__(self, attr: str) -> Reference[T] | Any: """ return getattr(self.make_reference(workflow=None), attr) + class DatasetParameter(Parameter[T]): def make_reference(self, **kwargs: Any) -> "DatasetParameterReference[T]": """Creates a new reference for the parameter. - The kwargs will be passed to the constructor, but the + The kwargs will be passed to the constructor, but the Args: typ: type of the new reference's target. @@ -325,13 +326,14 @@ def make_reference(self, **kwargs: Any) -> "DatasetParameterReference[T]": kwargs.setdefault("typ", self.__type__) return DatasetParameterReference(**kwargs) + def param( name: str, default: T | UnsetType[T] | Unset = UNSET, tethered: Literal[False] | None | Step | Workflow = False, typ: type[T] | Unset = UNSET, autoname: bool = False, - parameter_cls: type[Parameter[T]] = Parameter[T] + parameter_cls: type[Parameter[T]] = Parameter[T], ) -> T: """Create a parameter. @@ -346,7 +348,9 @@ def param( default = UnsetType[T](typ) return cast( T, - parameter_cls(name, default=default, tethered=tethered, autoname=autoname, typ=typ), + parameter_cls( + name, default=default, tethered=tethered, autoname=autoname, typ=typ + ), ) @@ -1604,8 +1608,8 @@ def __make_reference__(self, **kwargs: Any) -> "ParameterReference[U]": return self._.parameter.make_reference(**kwargs) -class DatasetParameterReference(ParameterReference[U]): - ... +class DatasetParameterReference(ParameterReference[U]): ... + class IterableParameterReference(IterableMixin[U], ParameterReference[U]): From c45d2f4f39a5c3edbf64e1cac897e8f4ffee8056 Mon Sep 17 00:00:00 2001 From: Phil Weir Date: Sat, 18 Jan 2025 14:56:09 +0000 Subject: [PATCH 5/6] chore: linting --- src/dewret/renderers/kubeflow.py | 30 +++++++++++++----------------- src/dewret/workflow.py | 1 - 2 files changed, 13 insertions(+), 18 deletions(-) diff --git a/src/dewret/renderers/kubeflow.py b/src/dewret/renderers/kubeflow.py index 5d418cdb..bd6b1cc5 100644 --- a/src/dewret/renderers/kubeflow.py +++ b/src/dewret/renderers/kubeflow.py @@ -183,9 +183,7 @@ def to_repr(typ: type[Any]) -> tuple[str, None | list[tuple[str, str]]]: signature.append((param, f"Input[{artifacts[param].__qualname__}]")) in_paths.append(param) else: - signature.append( - (param, sig.parameters[param].annotation.__qualname__) - ) + signature.append((param, sig.parameters[param].annotation.__qualname__)) wrapper_str = ", ".join(f"{n}: {t}" for n, t in signature) print(step.return_type) command[-1] += """ @@ -277,7 +275,10 @@ def to_repr(typ: type[Any]) -> tuple[str, None | list[tuple[str, str]]]: def get_name_to_specs( - func_params: list[tuple[str, str]], return_ann: type[Any], step_name: str, containerized: bool = False + func_params: list[tuple[str, str]], + return_ann: type[Any], + step_name: str, + containerized: bool = False, ) -> tuple[dict[str, dsl.structures.InputSpec], dict[str, dsl.structures.OutputSpec]]: name_to_input_specs = {} name_to_output_specs = {} @@ -309,9 +310,7 @@ def get_name_to_specs( if output_spec is not None: name_to_output_specs[name] = output_spec else: - rettyp = make_output_spec( - dsl.component_factory.SINGLE_OUTPUT_NAME, return_ann - ) + rettyp = make_output_spec(dsl.component_factory.SINGLE_OUTPUT_NAME, return_ann) if rettyp is not None: name_to_output_specs[prefix] = rettyp return name_to_input_specs, name_to_output_specs @@ -398,7 +397,6 @@ def validate_placeholder_types( else: self.pipeline_spec = self.component_spec.implementation.graph - if output is not None: self._outputs = {output.name: ensure_channels(output, component_spec.name)} @@ -697,7 +695,9 @@ def from_step(cls, step: BaseStep) -> "StepDefinition": inputs, outputs = get_name_to_specs( param_types, step.return_type, step_name=k8s_name(step.name) or step.name ) - executor_configs = cast(dict[str, ExecutorConfiguration], get_render_configuration("executor")) + executor_configs = cast( + dict[str, ExecutorConfiguration], get_render_configuration("executor") + ) executor_config = executor_configs["default"] default_image = executor_config.get("image", "python:3.9") @@ -781,7 +781,7 @@ def to_kfp_type( """ typ, annotateds = strip_annotations(full_typ) typ_dict: CommandInputSchema = {"type": ""} - artifacts: dict[str, type[dsl.types.artifact_types.Artifact]]= {} + artifacts: dict[str, type[dsl.types.artifact_types.Artifact]] = {} base: Any | None = typ args = get_args(typ) if args: @@ -824,7 +824,7 @@ def to_kfp_type( } elif len(args) == 1: interior_typ, interior_artifacts = to_kfp_type(label, args[0]) - typ_dict.update({"type": f"List[{interior_typ["type"]}"}) + typ_dict.update({"type": f"List[{interior_typ['type']}"}) if set(artifacts.keys()) & set(interior_artifacts.keys()): raise TypeError( f"Artifacts have overlapping keys: {artifacts} -- {interior_artifacts}" @@ -899,9 +899,7 @@ def make_output_spec( if hasattr(kfp_ann, "_fields"): fields = {} for name in kfp_ann._fields: - output_spec = make_output_spec( - name, kfp_ann.__annotations__[name] - ) + output_spec = make_output_spec(name, kfp_ann.__annotations__[name]) fields[name] = cast(dsl.structures.OutputSpec, output_spec) if fields: @@ -953,7 +951,6 @@ def _make_pipeline( } return dsl_pipeline, pipeline_outputs - @classmethod def _make_component_spec( cls, workflow: Workflow, name: str | None @@ -988,7 +985,6 @@ def _make_component_spec( implementation=dsl.structures.Implementation(), ) - @classmethod def from_workflow( cls, workflow: Workflow, display_name: None | str = None, execute: bool = True @@ -1039,7 +1035,7 @@ def render(self) -> dict[str, RawType]: Reduced form as a native Python dict structure for serialization. """ - return json_format.MessageToDict(self.pipeline_spec) # type: ignore + return json_format.MessageToDict(self.pipeline_spec) # type: ignore def render( diff --git a/src/dewret/workflow.py b/src/dewret/workflow.py index fe7008ba..a86c1bae 100644 --- a/src/dewret/workflow.py +++ b/src/dewret/workflow.py @@ -1611,7 +1611,6 @@ def __make_reference__(self, **kwargs: Any) -> "ParameterReference[U]": class DatasetParameterReference(ParameterReference[U]): ... - class IterableParameterReference(IterableMixin[U], ParameterReference[U]): """Iterable form of parameter references.""" From 24e04cdb0e13f4eb3598a1418f6e2c22b2cecea0 Mon Sep 17 00:00:00 2001 From: Phil Weir Date: Sat, 18 Jan 2025 14:56:45 +0000 Subject: [PATCH 6/6] chore: linting --- src/dewret/renderers/kubeflow.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/dewret/renderers/kubeflow.py b/src/dewret/renderers/kubeflow.py index bd6b1cc5..1967459e 100644 --- a/src/dewret/renderers/kubeflow.py +++ b/src/dewret/renderers/kubeflow.py @@ -30,7 +30,6 @@ from attrs import define, has as attrs_has, fields as attrs_fields, AttrsInstance from dataclasses import is_dataclass, fields as dataclass_fields from collections.abc import Mapping -import yaml from typing import ( TypeVar, Annotated, @@ -44,7 +43,6 @@ Unpack, Iterable, Optional, - List, ) from types import UnionType import inspect @@ -70,9 +68,7 @@ expr_to_references, ) from dewret.utils import ( - crawl_raw, DataclassProtocol, - firm_to_raw, ) from dewret.render import base_render from dewret.core import (